def make_leg_model(leg, env): leg_env = gym.make('PhantomXLeg-v0') leg_env.set_info(env.info) leg_env.leg_name = leg policy = build_policy(leg_env, defaults['network'], **alg_kwargs) model = ppo2.Model(policy=policy, ob_space=leg_env.observation_space, ac_space=leg_env.action_space, nbatch_act=nenvs, nbatch_train=nbatch_train, nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'], max_grad_norm=defaults['max_grad_norm']) model.load('' + leg + '/checkpoints/05000') return model
def runner(leg, env): leg_env = gym.make('PhantomXLeg-v0') leg_env.set_info(env.info) leg_env.leg_name = leg policy = build_policy(leg_env, defaults['network'], **alg_kwargs) model = ppo2.Model(policy=policy, ob_space=leg_env.observation_space, ac_space=leg_env.action_space, nbatch_act=nenvs, nbatch_train=nbatch_train, nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'], max_grad_norm=defaults['max_grad_norm']) model.load('' + leg + '/checkpoints/05000') obs = leg_env.reset() ep_reward = 0 rewards = [] episode = 0 step = 0 while True: step += 1 action, value_estimate, next_state, neglogp = model.step(obs) obs, reward, done, _ = leg_env.step(action[0]) ep_reward += reward if done: leg_env.reset() episode += 1 print(step) print(ep_reward) rewards.append(ep_reward) step = 0 ep_reward = 0 if episode >= 100: break f = open(filename, "w+") f.write("Variance: " + str(np.var(rewards))) rewards = np.array(rewards, dtype=float) f.write(",Median: " + str(statistics.median(rewards))) f.write(",Mean: " + str(np.mean(rewards))) f.close() while True: time.sleep(2) print("DONE")
def runner(leg, env): leg_env = gym.make('PhantomXLeg-v0') leg_env.set_info(env.info) leg_env.leg_name = leg policy = build_policy(leg_env, defaults['network'], **alg_kwargs) model = ppo2.Model(policy=policy, ob_space=leg_env.observation_space, ac_space=leg_env.action_space, nbatch_act=nenvs, nbatch_train=nbatch_train, nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'], max_grad_norm=defaults['max_grad_norm']) model.load( '/tmp/training_data/dockerv1.3/PhantomX-v0/dppo2_mlp/2019-12-03_17h05min/' + leg + '/checkpoints/07000') obs = leg_env.reset() while True: action, value_estimate, next_state, neglogp = model.step(obs) obs, reward, done, _ = leg_env.step(action[0]) time.sleep(1 / 1000)
'num_layers': defaults['num_layers'], 'num_hidden': defaults['num_hidden'] } policy = build_policy(env, defaults['network'], **alg_kwargs) nenvs = env.num_envs ob_space = env.observation_space ac_space = env.action_space nbatch = nenvs * defaults['nsteps'] nbatch_train = nbatch // defaults['nminibatches'] make_model = lambda: ppo2.Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nbatch_act=nenvs, nbatch_train=nbatch_train, nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'], max_grad_norm=defaults['max_grad_norm']) model = make_model() if defaults['trained_path'] is not None: model.load(defaults['trained_path']) obs = env.reset() loop = True while loop: actions = model.step_deterministic(obs)[0] obs, reward, done, _ = env.step_runtime(actions)