def policy_fn(name, ob_space, ac_space): if env.env.env.state_self_standardize: return mlp_mirror_norms_policy.MlpMirrorNormsPolicy( name=name, ob_space=ob_space, ac_space=ac_space, hid_size=args.hsize, num_hid_layers=args.layers, gmm_comp=1, mirror_loss=True, observation_permutation=obs_per, action_permutation=np.array([ 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18, 19, 20, 13, 14, 15, 16 ])) else: return mlp_mirror_policy.MlpMirrorPolicy( name=name, ob_space=ob_space, ac_space=ac_space, hid_size=args.hsize, num_hid_layers=args.layers, gmm_comp=1, mirror_loss=True, observation_permutation=obs_per, action_permutation=np.array([ 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18, 19, 20, 13, 14, 15, 16 ]))
def policy_fn(name, ob_space, ac_space): old_act_permute = [-86, 87, -88, 93, 94, 95, 96, 89, 90, 91, 92] mus_act_l = np.arange(43) mus_act_r = mus_act_l + 43 mus_act_l[0] = 0.001 act_permute = np.concatenate([mus_act_r, mus_act_l, old_act_permute]) if env.env.env.state_self_standardize: return mlp_mirror_norms_policy.MlpMirrorNormsPolicy( name=name, ob_space=ob_space, ac_space=ac_space, hid_size=args.hsize, num_hid_layers=args.layers, gmm_comp=1, mirror_loss=True, observation_permutation=obs_per, action_permutation=act_permute) else: return mlp_mirror_policy.MlpMirrorPolicy( name=name, ob_space=ob_space, ac_space=ac_space, hid_size=args.hsize, num_hid_layers=args.layers, gmm_comp=1, mirror_loss=True, observation_permutation=obs_per, action_permutation=act_permute)
def policy_fn(name, ob_space, ac_space): return mlp_mirror_policy.MlpMirrorPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=3, gmm_comp=1, mirror_loss=True, observation_permutation=np.array([0.0001,-1,2,-3,-4, -11,12,-13,14,15,16, -5,6,-7,8,9,10, -17,18, -19, -24,25,-26,27, -20,21,-22,23,\ 28,29,-30,31,-32,-33, -40,41,-42,43,44,45, -34,35,-36,37,38,39, -46,47, -48, -53,54,-55,56, -49,50,-51,52, 58,57, 59]), action_permutation=np.array([-6,7,-8, 9, 10,11, -0.001,1,-2, 3, 4,5, -12,13, -14, -19,20,-21,22, -15,16,-17,18]))
def policy_fn(name, ob_space, ac_space): return mlp_mirror_policy.MlpMirrorPolicy( name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=3, gmm_comp=1, mirror_loss=True, observation_permutation=np.array([1] * 2), action_permutation=np.array([0.001] * 1))
def policy_fn(name, ob_space, ac_space): return mlp_mirror_policy.MlpMirrorPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=3, gmm_comp=1, mirror_loss=True, observation_permutation=np.array( [0.0001, -1, 2, -3, -4, -5, -6, 7, 14, -15, -16, 17, 18, -19, 8, -9, -10, 11, 12, -13, 20, 21, -22, 23, -24, -25, -26, -27, 28, 35, -36, -37, 38, 39, -40, 29, -30, -31, 32, 33, -34, 42, 41, 43]), action_permutation=np.array( [-0.0001, -1, 2, 9, -10, -11, 12, 13, -14, 3, -4, -5, 6, 7, -8]))
def policy_fn(name, ob_space, ac_space): return mlp_mirror_policy.MlpMirrorPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=3, gmm_comp=1, mirror_loss=True, observation_permutation=np.array( [0.0001, -1, 2, -3, -4, 9, 10, 11, 12, 5, 6, 7, 8, 17, 18, 19, 20, 13, 14, 15, 16, 21, 22, -23, 24, -25, -26, 31, 32, 33, 34, 27, 28, 29, 30, 39, 40, 41, 42, 35, 36, 37, 38, 44, 43, 46, 45, 47]), action_permutation=np.array( [4, 5, 6, 7, 0.0001, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11]))