Example #1
0
 def policy_fn(name, ob_space, ac_space):
     if env.env.env.state_self_standardize:
         return mlp_mirror_norms_policy.MlpMirrorNormsPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=np.array([
                 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18,
                 19, 20, 13, 14, 15, 16
             ]))
     else:
         return mlp_mirror_policy.MlpMirrorPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=np.array([
                 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18,
                 19, 20, 13, 14, 15, 16
             ]))
 def policy_fn(name, ob_space, ac_space):
     old_act_permute = [-86, 87, -88, 93, 94, 95, 96, 89, 90, 91, 92]
     mus_act_l = np.arange(43)
     mus_act_r = mus_act_l + 43
     mus_act_l[0] = 0.001
     act_permute = np.concatenate([mus_act_r, mus_act_l, old_act_permute])
     if env.env.env.state_self_standardize:
         return mlp_mirror_norms_policy.MlpMirrorNormsPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=act_permute)
     else:
         return mlp_mirror_policy.MlpMirrorPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=act_permute)