def main(): """ Runs the test """ """ Create an argparse.ArgumentParser for run_mujoco.py. :return: (ArgumentParser) parser {'--env': 'Reacher-v2', '--seed': 0, '--num-timesteps': int(1e6), '--play': False} parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Reacher-v2') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(1e6)) parser.add_argument('--play', default=False, action='store_true') return parse """ env_id = 'UR5Gripper-v0' model_path = '/tmp/gym/trpo_mpi/' # args = mujoco_arg_parser().parse_args() # train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) # train(env_id=env_id, num_timesteps=int(1e7), seed=0, model_path=model_path) env = gym.make(env_id) env = Monitor(env, model_path, allow_early_resets=True) model = TRPO(MlpPolicy, env, verbose=1, tensorboard_log=model_path) model = model.load(model_path + "trpo.pkl") model.learn(total_timesteps=int(1e5), callback=callback) model.save(model_path + "trpo.pkl") # tf_util.save_state(model_path) # Enjoy trained agent obs = env.reset() for i in range(100): obs = env.reset() env.render() for i in range(200): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
# Create log dir for callback model saving os.makedirs("./temp_models/", exist_ok=True) env = Monitor(env, "./temp_models/", allow_early_resets=True) ##### TRAIN ##### if args.train: check_overwrite(args.model) model = SAC(MlpPolicy, env, verbose=1, tensorboard_log="./tensorboard_log/") model.learn(total_timesteps=int(args.step), log_interval=10, tb_log_name="log", callback=callback.callback) model.save(MODELS_FOLDER_PATH) #### TEST ##### if not args.train: model = SAC.load(MODELS_FOLDER_PATH) obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, done, info = env.step(scale_range(action, -1, 1, 0, 1)) env.render() if done: obs = env.reset()