return targets if __name__=="__main__": T.set_num_threads(1) ID = ''.join(random.choices(string.ascii_uppercase + string.digits, k=3)) params = {"iters": 500000, "batchsize": 20, "gamma": 0.99, "lambda_init" : 1., "lambda_min" : 0.3, "policy_lr": 0.0005, "latent_predictor_lr": 0.0005, "weight_decay" : 0.0001, "ppo": True, "ppo_update_iters": 6, "animate": False, "train" : True, "note" : "HP, active latent estimation", "ID" : ID} if socket.gethostname() == "goedel": params["animate"] = False params["train"] = True from src.envs.cartpole_pbt.hangpole import HangPoleBulletEnv env = HangPoleBulletEnv(animate=params["animate"], latent_input=False, action_input=False) # Test if params["train"]: print("Training") policy = policies.NN_PG(env, 16, obs_dim=env.obs_dim + env.latent_dim, tanh=False, std_fixed=True) latent_predictor = policies.RNN_PG(env, hid_dim=8, memory_dim=8, n_temp=2, obs_dim = env.obs_dim + env.act_dim, act_dim=env.latent_dim) print(params, env.obs_dim, env.act_dim, env.__class__.__name__, policy.__class__.__name__) train(env, policy, latent_predictor, params) else: print("Testing") policy = T.load('agents/HangPoleBulletEnv_NN_PG_GO5_pg.p') env.test(policy)
# perlin: P92 # Current experts w/ orientation rew: # flat: KYH # holes: 2CW # tiles: YI7 # triangles: M3X # Stairs: H1Y # pipe: W01 # perlin: H03 # Test if params["train"]: print("Training") policy = policies.NN_PG(env, 96) print(params, env.obs_dim, env.act_dim, env.__class__.__name__, policy.__class__.__name__) train(env, policy, params) else: print("Testing") policy_name = "MQ7" # LX3: joints + contacts + yaw policy_path = 'agents/{}_NN_PG_{}_pg.p'.format(env.__class__.__name__, policy_name) policy = policies.NN_PG(env, 96) policy.load_state_dict(T.load(policy_path)) env.test(policy, N=10) print(policy_path)
"weight_decay": 0.001, "ppo": True, "ppo_update_iters": 6, "animate": False, "train": False, "env_list": env_list, "note": "Score, 2E1", "ID": ID } if socket.gethostname() == "goedel": params["animate"] = False params["train"] = True from src.envs.hexapod_trossen_terrain_all import hexapod_trossen_terrain_all as hex_env env = hex_env.Hexapod(env_list=env_list, max_n_envs=1) r_lists = [] for i in range(5): # Test print("Training: {}/N".format(i + 1)) policy = policies.NN_PG(env, 64, tanh=False, std_fixed=True) print(params, env.obs_dim, env.act_dim, env.__class__.__name__, policy.__class__.__name__) r_list = train(env, policy, params) r_lists.append(np.array(r_list)) r_lists = np.stack(r_lists, 0) print(r_lists.shape) np.save("R_{}_{}".format(env_list, params["ID"]), r_lists)
"animate": True, "train": False, "note": "Supervised model learning", "ID": ID } if socket.gethostname() == "goedel": params["animate"] = False params["train"] = True from src.envs.cartpole_pbt.hangpole import HangPoleBulletEnv env = HangPoleBulletEnv(animate=params["animate"], latent_input=True, action_input=False) # Test if params["train"]: print("Training") model = policies.NN_PG(env, 16, obs_dim=env.obs_dim, act_dim=env.obs_dim) print(params, env.obs_dim, env.act_dim, env.__class__.__name__, model.__class__.__name__) train(env, model, params) else: print("Testing") policy_path = 'agents/HangPoleBulletEnv_NN_PG_ETX_pg.p' policy = T.load(policy_path) env.test(policy, slow=params["animate"], seed=1338) print(policy_path)