def main(): matplotlib.use('agg') parser = argparse.ArgumentParser(description="Train agent in env: %s" % FocalPointTaskUsEnv.__name__) parser.add_argument( "--exp_dir", dest="exp_dir", help="Where to put all information about the experiment", required=True) args = parser.parse_args() trajactory_logger = envs.logger.TrajectoryLogger(log_dir=".", log_action_csv_freq=1, log_state_csv_freq=1, log_state_render_freq=500) spinup_logger_kwargs = dict(output_dir=".", exp_name='log_files') env_builder = lambda: env_fn(trajactory_logger) vpg(env_fn=env_builder, actor_critic=cnn_actor_critic, ac_kwargs=AC_KWARGS, steps_per_epoch=N_STEPS_PER_EPOCH, epochs=EPOCHS, max_ep_len=N_STEPS_PER_EPISODE, logger_kwargs=spinup_logger_kwargs, save_freq=200, lam=0.97)
from spinup import vpg import tensorflow as tf import roboschool import gym env_fn = lambda: gym.make('RoboschoolAnt-v1') ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu) logger_kwargs = dict(output_dir='data/vpg_bench/seed20', exp_name='vpg_ant') vpg(env_fn=env_fn, ac_kwargs=ac_kwargs, seed=20, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs) #test on seed 10,20 # python spinningup/spinup/utils/plot.py src/data/ppo_bench
# string specifying the polic gradient algorithm: vpg, ppo, trpo algorithm = sys.argv[1] # train with vanilla policy gradient if algorithm == 'vpg': lam = sys.argv[2] exp_name = 'll_vpg_seed' + str(seed) + '_epochs' + str( epochs) + '_lam' + lam logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/', exp_name=exp_name) vpg(env_fn=env_fn, ac_kwargs=ac_kwargs, max_ep_len=1000, gamma=0.99, seed=seed, steps_per_epoch=steps_per_epoch, pi_lr=0.005, vf_lr=0.005, epochs=epochs, logger_kwargs=logger_kwargs, lam=float(lam)) # train with PPO if algorithm == 'ppo': clip_ratio = sys.argv[2] target_kl = sys.argv[3] exp_name = 'll_ppo_seed' + str(seed) + '_epochs' + str(epochs) exp_name += '_cr' + clip_ratio + '_tk' + target_kl logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/', exp_name=exp_name) ppo(env_fn=env_fn,
from gridworld_env import * from spinup.utils.test_policy import load_policy, run_policy class VpgAgent(object): def __init__(self): _, self.get_action = load_policy('log2/') def act(self, obs, *argv): action = self.get_action(obs) return action if __name__ == '__main__': spinup.vpg(env_fn) ''' _, get_action = load_policy('log2/') env = env_fn() obs = env.reset() env.render() n_steps = 20 for step in range(n_steps): print("Step {}".format(step + 1)) action = get_action(obs) obs, reward, done, info = env.step(action) print('action=', action, 'obs=', obs, 'reward=', reward, 'done=', done) env.render() if done: print("Goal reached!", "reward=", reward)
import spinup from spinup import vpg_pytorch as vpg import torch import gym import gridworlds env_fn = lambda: gym.make('gridworld-v0') ac_kwargs = dict(hidden_sizes=[32], activation=torch.nn.ReLU) logger_kwargs = dict(output_dir='vpg_results', exp_name='experiment_name') vpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, gamma=0.9, logger_kwargs=logger_kwargs)
from functools import partial import spinup from simple_env import simpleEnv from datetime import datetime as dt import time from spinup.utils.mpi_tools import mpi_fork if __name__ == '__main__': exp_string = f"vpg-{dt.now().strftime('%d%m%y-%H%M')}" env_fn = partial(simpleEnv, rm_size=5) mpi_fork(4) spinup.vpg(env_fn=env_fn, seed=int(time.time()), steps_per_epoch=1000, epochs=250, gamma=0.99, pi_lr=0.1e-3, vf_lr=0.8e-3, train_v_iters=80, lam=0.97, max_ep_len=1000, logger_kwargs={ "output_dir": os.path.join("results", exp_string), "exp_name": exp_string }, save_freq=50)