def main():
    matplotlib.use('agg')

    parser = argparse.ArgumentParser(description="Train agent in env: %s" %
                                     FocalPointTaskUsEnv.__name__)
    parser.add_argument(
        "--exp_dir",
        dest="exp_dir",
        help="Where to put all information about the experiment",
        required=True)

    args = parser.parse_args()

    trajactory_logger = envs.logger.TrajectoryLogger(log_dir=".",
                                                     log_action_csv_freq=1,
                                                     log_state_csv_freq=1,
                                                     log_state_render_freq=500)
    spinup_logger_kwargs = dict(output_dir=".", exp_name='log_files')
    env_builder = lambda: env_fn(trajactory_logger)
    vpg(env_fn=env_builder,
        actor_critic=cnn_actor_critic,
        ac_kwargs=AC_KWARGS,
        steps_per_epoch=N_STEPS_PER_EPOCH,
        epochs=EPOCHS,
        max_ep_len=N_STEPS_PER_EPISODE,
        logger_kwargs=spinup_logger_kwargs,
        save_freq=200,
        lam=0.97)
from spinup import vpg
import tensorflow as tf
import roboschool
import gym

env_fn = lambda: gym.make('RoboschoolAnt-v1')

ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu)

logger_kwargs = dict(output_dir='data/vpg_bench/seed20', exp_name='vpg_ant')

vpg(env_fn=env_fn,
    ac_kwargs=ac_kwargs,
    seed=20,
    steps_per_epoch=5000,
    epochs=250,
    logger_kwargs=logger_kwargs)

#test on seed 10,20

# python spinningup/spinup/utils/plot.py src/data/ppo_bench
Exemple #3
0
# string specifying the polic gradient algorithm: vpg, ppo, trpo
algorithm = sys.argv[1]

# train with vanilla policy gradient
if algorithm == 'vpg':
    lam = sys.argv[2]
    exp_name = 'll_vpg_seed' + str(seed) + '_epochs' + str(
        epochs) + '_lam' + lam
    logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/',
                         exp_name=exp_name)
    vpg(env_fn=env_fn,
        ac_kwargs=ac_kwargs,
        max_ep_len=1000,
        gamma=0.99,
        seed=seed,
        steps_per_epoch=steps_per_epoch,
        pi_lr=0.005,
        vf_lr=0.005,
        epochs=epochs,
        logger_kwargs=logger_kwargs,
        lam=float(lam))

# train with PPO
if algorithm == 'ppo':
    clip_ratio = sys.argv[2]
    target_kl = sys.argv[3]
    exp_name = 'll_ppo_seed' + str(seed) + '_epochs' + str(epochs)
    exp_name += '_cr' + clip_ratio + '_tk' + target_kl
    logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/',
                         exp_name=exp_name)
    ppo(env_fn=env_fn,
Exemple #4
0
from gridworld_env import *
from spinup.utils.test_policy import load_policy, run_policy


class VpgAgent(object):
    def __init__(self):
        _, self.get_action = load_policy('log2/')

    def act(self, obs, *argv):
        action = self.get_action(obs)
        return action


if __name__ == '__main__':

    spinup.vpg(env_fn)
    '''
    _, get_action = load_policy('log2/')
    env = env_fn()
    obs = env.reset()
    env.render()

    n_steps = 20
    for step in range(n_steps):
        print("Step {}".format(step + 1))
        action = get_action(obs)
        obs, reward, done, info = env.step(action)
        print('action=', action, 'obs=', obs, 'reward=', reward, 'done=', done)
        env.render()
        if done:
            print("Goal reached!", "reward=", reward)
import spinup
from spinup import vpg_pytorch as vpg
import torch
import gym
import gridworlds

env_fn = lambda: gym.make('gridworld-v0')

ac_kwargs = dict(hidden_sizes=[32], activation=torch.nn.ReLU)

logger_kwargs = dict(output_dir='vpg_results', exp_name='experiment_name')

vpg(env_fn=env_fn,
    ac_kwargs=ac_kwargs,
    steps_per_epoch=5000,
    epochs=250,
    gamma=0.9,
    logger_kwargs=logger_kwargs)
Exemple #6
0
from functools import partial

import spinup
from simple_env import simpleEnv
from datetime import datetime as dt
import time
from spinup.utils.mpi_tools import mpi_fork

if __name__ == '__main__':
    exp_string = f"vpg-{dt.now().strftime('%d%m%y-%H%M')}"
    env_fn = partial(simpleEnv, rm_size=5)

    mpi_fork(4)

    spinup.vpg(env_fn=env_fn,
               seed=int(time.time()),
               steps_per_epoch=1000,
               epochs=250,
               gamma=0.99,
               pi_lr=0.1e-3,
               vf_lr=0.8e-3,
               train_v_iters=80,
               lam=0.97,
               max_ep_len=1000,
               logger_kwargs={
                   "output_dir": os.path.join("results", exp_string),
                   "exp_name": exp_string
               },
               save_freq=50)