Beispiel #1
0
 def policy_fn(name, ob_space, ac_space):
     if env.env.env.state_self_standardize:
         return mlp_mirror_norms_policy.MlpMirrorNormsPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=np.array([
                 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18,
                 19, 20, 13, 14, 15, 16
             ]))
     else:
         return mlp_mirror_policy.MlpMirrorPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=np.array([
                 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18,
                 19, 20, 13, 14, 15, 16
             ]))
 def policy_fn(name, ob_space, ac_space):
     old_act_permute = [-86, 87, -88, 93, 94, 95, 96, 89, 90, 91, 92]
     mus_act_l = np.arange(43)
     mus_act_r = mus_act_l + 43
     mus_act_l[0] = 0.001
     act_permute = np.concatenate([mus_act_r, mus_act_l, old_act_permute])
     if env.env.env.state_self_standardize:
         return mlp_mirror_norms_policy.MlpMirrorNormsPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=act_permute)
     else:
         return mlp_mirror_policy.MlpMirrorPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=act_permute)
Beispiel #3
0
 def policy_fn(name, ob_space, ac_space):
     return mlp_mirror_policy.MlpMirrorPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                                              hid_size=64, num_hid_layers=3, gmm_comp=1,
                                              mirror_loss=True,
                                              observation_permutation=np.array([0.0001,-1,2,-3,-4, -11,12,-13,14,15,16, -5,6,-7,8,9,10, -17,18, -19, -24,25,-26,27, -20,21,-22,23,\
                                       28,29,-30,31,-32,-33, -40,41,-42,43,44,45, -34,35,-36,37,38,39, -46,47, -48, -53,54,-55,56, -49,50,-51,52, 58,57, 59]),
     action_permutation=np.array([-6,7,-8, 9, 10,11,  -0.001,1,-2, 3, 4,5, -12,13, -14, -19,20,-21,22, -15,16,-17,18]))
 def policy_fn(name, ob_space, ac_space):
     return mlp_mirror_policy.MlpMirrorPolicy(
         name=name,
         ob_space=ob_space,
         ac_space=ac_space,
         hid_size=64,
         num_hid_layers=3,
         gmm_comp=1,
         mirror_loss=True,
         observation_permutation=np.array([1] * 2),
         action_permutation=np.array([0.001] * 1))
Beispiel #5
0
 def policy_fn(name, ob_space, ac_space):
     return mlp_mirror_policy.MlpMirrorPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                                              hid_size=64, num_hid_layers=3, gmm_comp=1,
                                              mirror_loss=True,
                                              observation_permutation=np.array(
                                                  [0.0001, -1, 2, -3, -4, -5, -6, 7, 14, -15, -16, 17, 18, -19, 8,
                                                   -9, -10, 11, 12, -13,
                                                   20, 21, -22, 23, -24, -25, -26, -27, 28, 35, -36, -37, 38, 39,
                                                   -40, 29, -30, -31, 32, 33,
                                                   -34, 42, 41, 43]),
                                              action_permutation=np.array(
                                                  [-0.0001, -1, 2, 9, -10, -11, 12, 13, -14, 3, -4, -5, 6, 7, -8]))
 def policy_fn(name, ob_space, ac_space):
     return mlp_mirror_policy.MlpMirrorPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                                              hid_size=64, num_hid_layers=3, gmm_comp=1,
                                              mirror_loss=True,
                                              observation_permutation=np.array(
                                                  [0.0001, -1, 2, -3, -4, 9, 10, 11, 12, 5, 6, 7, 8, 17, 18, 19, 20,
                                                   13,
                                                   14, 15, 16,
                                                   21, 22, -23, 24, -25, -26, 31, 32, 33, 34, 27, 28, 29, 30, 39, 40,
                                                   41,
                                                   42, 35, 36, 37, 38, 44, 43, 46, 45, 47]),
                                              action_permutation=np.array(
                                                  [4, 5, 6, 7, 0.0001, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11]))