def configure_logger(log_path=None, **kwargs): """ Configure logger Args: log_path: (str) path to logger **kwargs: pointer to additional arguments if log_path is not given """ if log_path is not None: logger.configure(log_path) else: logger.configure(**kwargs)
def __init__(self, path): self.step = 0 self.episode = 0 """ config the logfile """ configlist = ["stdout", "log", 'tensorboard'] logger.configure(path, configlist) self.csvwritter = CSVOutputFormat(path + "record_trajectory.csv") loggerCEN = logger.get_current().output_formats[configlist.index( 'tensorboard')] self.writer = loggerCEN.writer
def main(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--gpu', action='store_true', help='enable GPU mode', default=False) parser.add_argument('--log', help='log directory', type=str, default='') parser.add_argument('--load', help='load path of model', type=str, default='') parser.add_argument('--test', action='store_true', help='test mode', default=False) parser.add_argument('--n_step', help='num rollouts', type=int, default=300) parser.add_argument('--n_roll', help='num rollouts', type=int, default=1) args = parser.parse_args() pp = pprint.PrettyPrinter(indent=1) print(pp.pformat(args)) logger.configure(args.log) config = Config() env = config.env(frame_skip=config.frame_skip, max_timestep=config.timestep_per_episode, log_dir=args.log, seed=args.seed) if args.test: test(env, args.gpu, policy=config.policy, load_path=args.load, num_hid_layers=config.num_hid_layers, hid_size=config.hid_size, n_steps=args.n_step, n=args.n_roll) else: train(env, args.gpu, num_timesteps=config.num_timesteps, seed=args.seed, config=config, log_dir=args.log, load_path=args.load)
def main(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='PongNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--gpu', action='store_true', help='enable GPU mode', default=False) parser.add_argument('--log', help='log directory', type=str, default='logs') args = parser.parse_args() logger.configure(args.log) config = Config() train(args.env, args.gpu, num_timesteps=config.num_timesteps, seed=args.seed, config=config)
parser = argparse.ArgumentParser() parser.add_argument( '-c', '--checkpoint', help='Checkpoint file.' ) # e.g. "models/mevea/mantsinen/ppo/model_checkpoints/rl_model_5001216_steps.zip" parser.add_argument('-s', '--save', type=bool, help='Save new training steps?', default=True) args = parser.parse_args() # configure logger format_strs = os.getenv('', 'stdout,log,csv').split(',') log_dir = osp.join(os.path.abspath(model_output), 'sac') logger.configure(log_dir, format_strs) # check that server is running while not is_server_running(server): print('Start the server: python3 env_server.py') sleep(sleep_interval) # prepare training data trajectory_files = [ osp.join(trajectory_dir, fpath) for fpath in os.listdir(trajectory_dir) if fpath.endswith('csv') ] bc_train, bc_val, waypoints = prepare_trajectories( signal_dir,
def configure_logger(log_path, **kwargs): if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: logger.configure(log_path, **kwargs) else: logger.configure(log_path)
def run(mode, render, render_eval, verbose_eval, sanity_run, env_kwargs, model_kwargs, train_kwargs): if sanity_run: # Mode to sanity check the basic code. # Fixed seed and logging dir. # Dynamic setting of nb_rollout_steps and nb_train_steps in training.train() is disabled. print('SANITY CHECK MODE!!!') # Configure MPI, logging, random seeds, etc. mpi_rank = MPI.COMM_WORLD.Get_rank() mpi_size = MPI.COMM_WORLD.Get_size() if mpi_rank == 0: logger.configure(dir='logs' if sanity_run else datetime.datetime.now(). strftime("train_%m%d_%H%M")) logdir = logger.get_dir() else: logger.set_level(logger.DISABLED) logdir = None logdir = MPI.COMM_WORLD.bcast(logdir, root=0) start_time = time.time() # fixed seed when running sanity check, same seed hourly for training. seed = 1000000 * mpi_rank seed += int(start_time) // 3600 if not sanity_run else 0 seed_list = MPI.COMM_WORLD.gather(seed, root=0) logger.info('mpi_size {}: seeds={}, logdir={}'.format( mpi_size, seed_list, logger.get_dir())) # Create envs. envs = [] if mode in [MODE_TRAIN]: train_env = cust_env.ProsEnvMon( visualize=render, seed=seed, fn_step=None, fn_epis=logdir and os.path.join(logdir, '%d' % mpi_rank), reset_dflt_interval=2, **env_kwargs) logger.info('action, observation space:', train_env.action_space.shape, train_env.observation_space.shape) envs.append(train_env) else: train_env = None # Always run eval_env, either in evaluation mode during MODE_TRAIN, or MODE_SAMPLE, MODE_TEST. # Reset to random states (reset_dflt_interval=0) in MODE_SAMPLE , # Reset to default state (reset_dflt_interval=1) in evaluation of MODE_TRAIN, or MODE_TEST reset_dflt_interval = 0 if mode in [MODE_SAMPLE] else 1 eval_env = cust_env.ProsEnvMon( visualize=render_eval, seed=seed, fn_step=logdir and os.path.join(logdir, 'eval_step_%d.csv' % mpi_rank), fn_epis=logdir and os.path.join(logdir, 'eval_%d' % mpi_rank), reset_dflt_interval=reset_dflt_interval, verbose=verbose_eval, **env_kwargs) envs.append(eval_env) # Create DDPG agent tf.reset_default_graph() set_global_seeds(seed) assert (eval_env is not None), 'Empty Eval Environment!' action_range = (min(eval_env.action_space.low), max(eval_env.action_space.high)) logger.info('\naction_range', action_range) nb_demo_kine, nb_key_states = eval_env.obs_cust_params agent = ddpg.DDPG(eval_env.observation_space.shape, eval_env.action_space.shape, nb_demo_kine, nb_key_states, action_range=action_range, save_ckpt=mpi_rank == 0, **model_kwargs) logger.debug('Using agent with the following configuration:') logger.debug(str(agent.__dict__.items())) # Set up agent mimic reward interface, for environment for env in envs: env.set_agent_intf_fp(agent.get_mimic_rwd) # Run.. logger.info('\nEnv params:', env_kwargs) logger.info('Model params:', model_kwargs) if mode == MODE_TRAIN: logger.info('Start training', train_kwargs) training.train(train_env, eval_env, agent, render=render, render_eval=render_eval, sanity_run=sanity_run, **train_kwargs) elif mode == MODE_SAMPLE: sampling.sample(eval_env, agent, render=render_eval, **train_kwargs) else: training.test(eval_env, agent, render_eval=render_eval, **train_kwargs) # Close up. if train_env: train_env.close() if eval_env: eval_env.close() mpi_complete(start_time, mpi_rank, mpi_size, non_blocking_mpi=True)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--env', help='environment ID', type=str, default='MontezumaRevengeNoFrameskip-v4') parser.add_argument( '--env_type', help= 'type of environment, used when the environment type cannot be automatically determined', type=str, default='atari') parser.add_argument('--seed', help='RNG seed', type=int, default=None) parser.add_argument('--num_timesteps', help='', type=float, default=1e6) parser.add_argument('--pre_train_timesteps', help='', type=float, default=750000) parser.add_argument('--max_episode_steps', help='', type=int, default=10000) parser.add_argument('--network', help='', type=str, default='cnn') parser.add_argument('--save_path', help='Path to save trained model to', default='data/temp', type=str) parser.add_argument('--load_path', help='Path to load trained model to', default='data/temp', type=str) parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 2000', default=2000, type=int) parser.add_argument( '--demo_path', help='Directory to save learning curve data.', default="data/demo/human.MontezumaRevengeNoFrameskip-v4.pkl", type=str) parser.add_argument('--log_path', help='Path to save log to', default='data/logs', type=str) parser.add_argument('--play', default=False, action='store_true') args = parser.parse_args() logger.configure(args.log_path) model, env = train(args) if args.play: logger.log("Running trained model") obs = env.reset() obs = np.expand_dims(np.array(obs), axis=0) state = model.initial_state if hasattr(model, 'initial_state') else None episode_rew = np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions.numpy()) obs = np.expand_dims(np.array(obs), axis=0) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 env.reset() env.close() return model
def main(): parser = argparse.ArgumentParser() parser.add_argument('--env', help='environment ID', type=str, default='BreakoutNoFrameskip-v4') parser.add_argument( '--env_type', help= 'type of environment, used when the environment type cannot be automatically determined', type=str, default='atari') parser.add_argument('--seed', help='RNG seed', type=int, default=None) parser.add_argument('--num_timesteps', help='', type=float, default=2e6) parser.add_argument('--pre_train_timesteps', help='', type=float, default=100000) parser.add_argument('--max_episode_steps', help='', type=int, default=10000) parser.add_argument('--network', help='', type=str, default='cnn') parser.add_argument('--save_path', help='Path to save trained model to', default='data/temp', type=str) parser.add_argument('--load_path', help='Path to load trained model to', default='data/temp', type=str) parser.add_argument('--save_video_interval', help='Save video every x episodes (0 = disabled)', default=10, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 2000', default=2000, type=int) parser.add_argument( '--demo_path', help='Directory to save learning curve data.', default="data/demo/human.BreakoutNoFrameskip-v4.episodic.pkl", type=str) parser.add_argument('--log_path', help='Path to save log to', default='data/logs', type=str) parser.add_argument('--play', default=False, action='store_true') parser.add_argument('--batch_size', help='batch size for both pretraining and training', type=int, default=64) parser.add_argument('--buffer_size', help='experience replay buffer size', type=float, default=5e5) parser.add_argument( '--exploration_fraction', help= 'anneal exploration epsilon for this fraction of total training steps', type=float, default=0.1) parser.add_argument('--exploration_final_eps', help='exploration epsilon after annealing', type=float, default=0.1) parser.add_argument('--epsilon_schedule', help='linear or constant', type=str, default='linear') parser.add_argument('--lr', help='learning rate', type=float, default=5e-4) parser.add_argument('--print_freq', help='print every x episodes', type=int, default=100) args = parser.parse_args() logger.configure(args.log_path) model, env = train(args) if args.play: logger.log("Running trained model") obs = env.reset() obs = np.expand_dims(np.array(obs), axis=0) state = model.initial_state if hasattr(model, 'initial_state') else None episode_rew = np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions.numpy()) obs = np.expand_dims(np.array(obs), axis=0) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 env.reset() env.close() return model