def main(args): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) print(args) #args.env="MountainCarContinuous-v0" train_copos(args)
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: run.parse(v) for k, v in parse_unknown_args(unknown_args).items() } train(args, extra_args)
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128,**kwargs): return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs,S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() ppo_parser = ppo_arg_parser() network_parser = network_arg_parser() args, ppo_args = arg_parser.parse_known_args(args) ppo_args, network_args = ppo_parser.parse_known_args(ppo_args) network_args, extra_args = network_parser.parse_known_args(network_args) extra_args = parse_cmdline_kwargs(extra_args) dt = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") save_dir = osp.join(os.getenv('OPENAI_LOGDIR'), args.env, dt) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure(dir=save_dir) with open(osp.join(logger.get_dir(), 'run.conf'), 'wt') as fh: print(datetime.now().isoformat(), file=fh) print(json.dumps(vars(args), indent=2), file=fh) print(json.dumps(vars(ppo_args), indent=2), file=fh) print(json.dumps(vars(network_args), indent=2), file=fh) if extra_args: print(json.dumps(vars(extra_args), indent=2), file=fh) else: logger.configure(format_strs=[]) # disable logging rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, ppo_args, network_args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.join(save_dir, '{}_{}'.format(args.env, args.num_timesteps)) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) print(args) train_copos(args)