Ejemplo n.º 1
0
def make_leg_model(leg, env):
    leg_env = gym.make('PhantomXLeg-v0')
    leg_env.set_info(env.info)
    leg_env.leg_name = leg
    policy = build_policy(leg_env, defaults['network'], **alg_kwargs)

    model = ppo2.Model(policy=policy, ob_space=leg_env.observation_space, ac_space=leg_env.action_space, nbatch_act=nenvs,
                    nbatch_train=nbatch_train,
                    nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'],
                    max_grad_norm=defaults['max_grad_norm'])
    model.load('' + leg + '/checkpoints/05000')
    return model
Ejemplo n.º 2
0
def runner(leg, env):
    leg_env = gym.make('PhantomXLeg-v0')
    leg_env.set_info(env.info)
    leg_env.leg_name = leg
    policy = build_policy(leg_env, defaults['network'], **alg_kwargs)

    model = ppo2.Model(policy=policy,
                       ob_space=leg_env.observation_space,
                       ac_space=leg_env.action_space,
                       nbatch_act=nenvs,
                       nbatch_train=nbatch_train,
                       nsteps=defaults['nsteps'],
                       ent_coef=defaults['ent_coef'],
                       vf_coef=defaults['vf_coef'],
                       max_grad_norm=defaults['max_grad_norm'])
    model.load('' + leg + '/checkpoints/05000')
    obs = leg_env.reset()
    ep_reward = 0
    rewards = []
    episode = 0
    step = 0
    while True:
        step += 1
        action, value_estimate, next_state, neglogp = model.step(obs)
        obs, reward, done, _ = leg_env.step(action[0])
        ep_reward += reward
        if done:
            leg_env.reset()
            episode += 1
            print(step)
            print(ep_reward)
            rewards.append(ep_reward)
            step = 0
            ep_reward = 0
        if episode >= 100:
            break
    f = open(filename, "w+")
    f.write("Variance: " + str(np.var(rewards)))
    rewards = np.array(rewards, dtype=float)
    f.write(",Median: " + str(statistics.median(rewards)))
    f.write(",Mean: " + str(np.mean(rewards)))
    f.close()
    while True:
        time.sleep(2)
        print("DONE")
Ejemplo n.º 3
0
def runner(leg, env):
    leg_env = gym.make('PhantomXLeg-v0')
    leg_env.set_info(env.info)
    leg_env.leg_name = leg
    policy = build_policy(leg_env, defaults['network'], **alg_kwargs)

    model = ppo2.Model(policy=policy,
                       ob_space=leg_env.observation_space,
                       ac_space=leg_env.action_space,
                       nbatch_act=nenvs,
                       nbatch_train=nbatch_train,
                       nsteps=defaults['nsteps'],
                       ent_coef=defaults['ent_coef'],
                       vf_coef=defaults['vf_coef'],
                       max_grad_norm=defaults['max_grad_norm'])
    model.load(
        '/tmp/training_data/dockerv1.3/PhantomX-v0/dppo2_mlp/2019-12-03_17h05min/'
        + leg + '/checkpoints/07000')
    obs = leg_env.reset()
    while True:
        action, value_estimate, next_state, neglogp = model.step(obs)
        obs, reward, done, _ = leg_env.step(action[0])
        time.sleep(1 / 1000)
Ejemplo n.º 4
0
    'num_layers': defaults['num_layers'],
    'num_hidden': defaults['num_hidden']
}
policy = build_policy(env, defaults['network'], **alg_kwargs)

nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
nbatch = nenvs * defaults['nsteps']
nbatch_train = nbatch // defaults['nminibatches']

make_model = lambda: ppo2.Model(policy=policy,
                                ob_space=ob_space,
                                ac_space=ac_space,
                                nbatch_act=nenvs,
                                nbatch_train=nbatch_train,
                                nsteps=defaults['nsteps'],
                                ent_coef=defaults['ent_coef'],
                                vf_coef=defaults['vf_coef'],
                                max_grad_norm=defaults['max_grad_norm'])

model = make_model()

if defaults['trained_path'] is not None:
    model.load(defaults['trained_path'])

obs = env.reset()
loop = True
while loop:
    actions = model.step_deterministic(obs)[0]
    obs, reward, done, _ = env.step_runtime(actions)