Beispiel #1
0
def configure_logger(log_path=None, **kwargs):
    """
    Configure logger

    Args:
        log_path: (str) path to logger
        **kwargs: pointer to additional arguments if log_path is not given
    """
    if log_path is not None:
        logger.configure(log_path)
    else:
        logger.configure(**kwargs)
Beispiel #2
0
 def __init__(self, path):
     self.step = 0
     self.episode = 0
     """
     config the logfile 
     """
     configlist = ["stdout", "log", 'tensorboard']
     logger.configure(path, configlist)
     self.csvwritter = CSVOutputFormat(path + "record_trajectory.csv")
     loggerCEN = logger.get_current().output_formats[configlist.index(
         'tensorboard')]
     self.writer = loggerCEN.writer
Beispiel #3
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--gpu',
                        action='store_true',
                        help='enable GPU mode',
                        default=False)
    parser.add_argument('--log', help='log directory', type=str, default='')
    parser.add_argument('--load',
                        help='load path of model',
                        type=str,
                        default='')
    parser.add_argument('--test',
                        action='store_true',
                        help='test mode',
                        default=False)
    parser.add_argument('--n_step', help='num rollouts', type=int, default=300)
    parser.add_argument('--n_roll', help='num rollouts', type=int, default=1)
    args = parser.parse_args()
    pp = pprint.PrettyPrinter(indent=1)
    print(pp.pformat(args))
    logger.configure(args.log)
    config = Config()
    env = config.env(frame_skip=config.frame_skip,
                     max_timestep=config.timestep_per_episode,
                     log_dir=args.log,
                     seed=args.seed)
    if args.test:
        test(env,
             args.gpu,
             policy=config.policy,
             load_path=args.load,
             num_hid_layers=config.num_hid_layers,
             hid_size=config.hid_size,
             n_steps=args.n_step,
             n=args.n_roll)
    else:
        train(env,
              args.gpu,
              num_timesteps=config.num_timesteps,
              seed=args.seed,
              config=config,
              log_dir=args.log,
              load_path=args.load)
Beispiel #4
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='PongNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--gpu',
                        action='store_true',
                        help='enable GPU mode',
                        default=False)
    parser.add_argument('--log',
                        help='log directory',
                        type=str,
                        default='logs')
    args = parser.parse_args()
    logger.configure(args.log)
    config = Config()
    train(args.env,
          args.gpu,
          num_timesteps=config.num_timesteps,
          seed=args.seed,
          config=config)
Beispiel #5
0
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c', '--checkpoint', help='Checkpoint file.'
    )  # e.g. "models/mevea/mantsinen/ppo/model_checkpoints/rl_model_5001216_steps.zip"
    parser.add_argument('-s',
                        '--save',
                        type=bool,
                        help='Save new training steps?',
                        default=True)
    args = parser.parse_args()

    # configure logger

    format_strs = os.getenv('', 'stdout,log,csv').split(',')
    log_dir = osp.join(os.path.abspath(model_output), 'sac')
    logger.configure(log_dir, format_strs)

    # check that server is running

    while not is_server_running(server):
        print('Start the server: python3 env_server.py')
        sleep(sleep_interval)

    # prepare training data

    trajectory_files = [
        osp.join(trajectory_dir, fpath) for fpath in os.listdir(trajectory_dir)
        if fpath.endswith('csv')
    ]
    bc_train, bc_val, waypoints = prepare_trajectories(
        signal_dir,
Beispiel #6
0
def configure_logger(log_path, **kwargs):

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        logger.configure(log_path, **kwargs)
    else:
        logger.configure(log_path)
Beispiel #7
0
def run(mode, render, render_eval, verbose_eval, sanity_run, env_kwargs,
        model_kwargs, train_kwargs):
    if sanity_run:
        # Mode to sanity check the basic code.
        # Fixed seed and logging dir.
        # Dynamic setting of nb_rollout_steps and nb_train_steps in training.train() is disabled.
        print('SANITY CHECK MODE!!!')

    # Configure MPI, logging, random seeds, etc.
    mpi_rank = MPI.COMM_WORLD.Get_rank()
    mpi_size = MPI.COMM_WORLD.Get_size()

    if mpi_rank == 0:
        logger.configure(dir='logs' if sanity_run else datetime.datetime.now().
                         strftime("train_%m%d_%H%M"))
        logdir = logger.get_dir()
    else:
        logger.set_level(logger.DISABLED)
        logdir = None
    logdir = MPI.COMM_WORLD.bcast(logdir, root=0)

    start_time = time.time()
    # fixed seed when running sanity check, same seed hourly for training.
    seed = 1000000 * mpi_rank
    seed += int(start_time) // 3600 if not sanity_run else 0

    seed_list = MPI.COMM_WORLD.gather(seed, root=0)
    logger.info('mpi_size {}: seeds={}, logdir={}'.format(
        mpi_size, seed_list, logger.get_dir()))

    # Create envs.
    envs = []
    if mode in [MODE_TRAIN]:
        train_env = cust_env.ProsEnvMon(
            visualize=render,
            seed=seed,
            fn_step=None,
            fn_epis=logdir and os.path.join(logdir, '%d' % mpi_rank),
            reset_dflt_interval=2,
            **env_kwargs)
        logger.info('action, observation space:', train_env.action_space.shape,
                    train_env.observation_space.shape)
        envs.append(train_env)
    else:
        train_env = None

    # Always run eval_env, either in evaluation mode during MODE_TRAIN, or MODE_SAMPLE, MODE_TEST.
    # Reset to random states (reset_dflt_interval=0) in MODE_SAMPLE ,
    # Reset to default state (reset_dflt_interval=1) in evaluation of MODE_TRAIN, or MODE_TEST
    reset_dflt_interval = 0 if mode in [MODE_SAMPLE] else 1
    eval_env = cust_env.ProsEnvMon(
        visualize=render_eval,
        seed=seed,
        fn_step=logdir and os.path.join(logdir, 'eval_step_%d.csv' % mpi_rank),
        fn_epis=logdir and os.path.join(logdir, 'eval_%d' % mpi_rank),
        reset_dflt_interval=reset_dflt_interval,
        verbose=verbose_eval,
        **env_kwargs)
    envs.append(eval_env)

    # Create DDPG agent
    tf.reset_default_graph()
    set_global_seeds(seed)
    assert (eval_env is not None), 'Empty Eval Environment!'

    action_range = (min(eval_env.action_space.low),
                    max(eval_env.action_space.high))
    logger.info('\naction_range', action_range)
    nb_demo_kine, nb_key_states = eval_env.obs_cust_params
    agent = ddpg.DDPG(eval_env.observation_space.shape,
                      eval_env.action_space.shape,
                      nb_demo_kine,
                      nb_key_states,
                      action_range=action_range,
                      save_ckpt=mpi_rank == 0,
                      **model_kwargs)
    logger.debug('Using agent with the following configuration:')
    logger.debug(str(agent.__dict__.items()))

    # Set up agent mimic reward interface, for environment
    for env in envs:
        env.set_agent_intf_fp(agent.get_mimic_rwd)

    # Run..
    logger.info('\nEnv params:', env_kwargs)
    logger.info('Model params:', model_kwargs)
    if mode == MODE_TRAIN:
        logger.info('Start training', train_kwargs)
        training.train(train_env,
                       eval_env,
                       agent,
                       render=render,
                       render_eval=render_eval,
                       sanity_run=sanity_run,
                       **train_kwargs)

    elif mode == MODE_SAMPLE:
        sampling.sample(eval_env, agent, render=render_eval, **train_kwargs)
    else:
        training.test(eval_env, agent, render_eval=render_eval, **train_kwargs)

    # Close up.
    if train_env:
        train_env.close()
    if eval_env:
        eval_env.close()

    mpi_complete(start_time, mpi_rank, mpi_size, non_blocking_mpi=True)
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        help='environment ID',
                        type=str,
                        default='MontezumaRevengeNoFrameskip-v4')
    parser.add_argument(
        '--env_type',
        help=
        'type of environment, used when the environment type cannot be automatically determined',
        type=str,
        default='atari')
    parser.add_argument('--seed', help='RNG seed', type=int, default=None)
    parser.add_argument('--num_timesteps', help='', type=float, default=1e6)
    parser.add_argument('--pre_train_timesteps',
                        help='',
                        type=float,
                        default=750000)
    parser.add_argument('--max_episode_steps',
                        help='',
                        type=int,
                        default=10000)
    parser.add_argument('--network', help='', type=str, default='cnn')
    parser.add_argument('--save_path',
                        help='Path to save trained model to',
                        default='data/temp',
                        type=str)
    parser.add_argument('--load_path',
                        help='Path to load trained model to',
                        default='data/temp',
                        type=str)
    parser.add_argument('--save_video_interval',
                        help='Save video every x steps (0 = disabled)',
                        default=0,
                        type=int)
    parser.add_argument('--save_video_length',
                        help='Length of recorded video. Default: 2000',
                        default=2000,
                        type=int)
    parser.add_argument(
        '--demo_path',
        help='Directory to save learning curve data.',
        default="data/demo/human.MontezumaRevengeNoFrameskip-v4.pkl",
        type=str)
    parser.add_argument('--log_path',
                        help='Path to save log to',
                        default='data/logs',
                        type=str)
    parser.add_argument('--play', default=False, action='store_true')
    args = parser.parse_args()

    logger.configure(args.log_path)
    model, env = train(args)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()
        obs = np.expand_dims(np.array(obs), axis=0)

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None

        episode_rew = np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions.numpy())
            obs = np.expand_dims(np.array(obs), axis=0)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0
                    env.reset()
    env.close()

    return model
Beispiel #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        help='environment ID',
                        type=str,
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument(
        '--env_type',
        help=
        'type of environment, used when the environment type cannot be automatically determined',
        type=str,
        default='atari')
    parser.add_argument('--seed', help='RNG seed', type=int, default=None)
    parser.add_argument('--num_timesteps', help='', type=float, default=2e6)
    parser.add_argument('--pre_train_timesteps',
                        help='',
                        type=float,
                        default=100000)
    parser.add_argument('--max_episode_steps',
                        help='',
                        type=int,
                        default=10000)
    parser.add_argument('--network', help='', type=str, default='cnn')
    parser.add_argument('--save_path',
                        help='Path to save trained model to',
                        default='data/temp',
                        type=str)
    parser.add_argument('--load_path',
                        help='Path to load trained model to',
                        default='data/temp',
                        type=str)
    parser.add_argument('--save_video_interval',
                        help='Save video every x episodes (0 = disabled)',
                        default=10,
                        type=int)
    parser.add_argument('--save_video_length',
                        help='Length of recorded video. Default: 2000',
                        default=2000,
                        type=int)
    parser.add_argument(
        '--demo_path',
        help='Directory to save learning curve data.',
        default="data/demo/human.BreakoutNoFrameskip-v4.episodic.pkl",
        type=str)
    parser.add_argument('--log_path',
                        help='Path to save log to',
                        default='data/logs',
                        type=str)
    parser.add_argument('--play', default=False, action='store_true')
    parser.add_argument('--batch_size',
                        help='batch size for both pretraining and training',
                        type=int,
                        default=64)
    parser.add_argument('--buffer_size',
                        help='experience replay buffer size',
                        type=float,
                        default=5e5)
    parser.add_argument(
        '--exploration_fraction',
        help=
        'anneal exploration epsilon for this fraction of total training steps',
        type=float,
        default=0.1)
    parser.add_argument('--exploration_final_eps',
                        help='exploration epsilon after annealing',
                        type=float,
                        default=0.1)
    parser.add_argument('--epsilon_schedule',
                        help='linear or constant',
                        type=str,
                        default='linear')
    parser.add_argument('--lr', help='learning rate', type=float, default=5e-4)
    parser.add_argument('--print_freq',
                        help='print every x episodes',
                        type=int,
                        default=100)
    args = parser.parse_args()

    logger.configure(args.log_path)
    model, env = train(args)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()
        obs = np.expand_dims(np.array(obs), axis=0)

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None

        episode_rew = np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions.numpy())
            obs = np.expand_dims(np.array(obs), axis=0)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0
                    env.reset()
    env.close()

    return model