Python MlpMirrorNormsPolicy Examples

Programming Language: Python

Namespace/Package Name: baselines.ppo1.mlp_mirror_norms_policy

Method/Function: MlpMirrorNormsPolicy

Examples at hotexamples.com: 2

Python MlpMirrorNormsPolicy - 2 examples found. These are the top rated real world Python examples of baselines.ppo1.mlp_mirror_norms_policy.MlpMirrorNormsPolicy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def policy_fn(name, ob_space, ac_space):
     if env.env.env.state_self_standardize:
         return mlp_mirror_norms_policy.MlpMirrorNormsPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=np.array([
                 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18,
                 19, 20, 13, 14, 15, 16
             ]))
     else:
         return mlp_mirror_policy.MlpMirrorPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=np.array([
                 5, 6, 7, 8, 9, 0.0001, 1, 2, 3, 4, -10, 11, -12, 17, 18,
                 19, 20, 13, 14, 15, 16
             ]))

Example #2

Show file

File: run_humanoid_wtoe_MD_running.py Project: a7b23/lrle-rl-examples

 def policy_fn(name, ob_space, ac_space):
     old_act_permute = [-86, 87, -88, 93, 94, 95, 96, 89, 90, 91, 92]
     mus_act_l = np.arange(43)
     mus_act_r = mus_act_l + 43
     mus_act_l[0] = 0.001
     act_permute = np.concatenate([mus_act_r, mus_act_l, old_act_permute])
     if env.env.env.state_self_standardize:
         return mlp_mirror_norms_policy.MlpMirrorNormsPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=act_permute)
     else:
         return mlp_mirror_policy.MlpMirrorPolicy(
             name=name,
             ob_space=ob_space,
             ac_space=ac_space,
             hid_size=args.hsize,
             num_hid_layers=args.layers,
             gmm_comp=1,
             mirror_loss=True,
             observation_permutation=obs_per,
             action_permutation=act_permute)