예제 #1
0
def logger_context(log_dir, run_ID, name, log_params=None, snapshot_mode="none", snapshot_gap=50):
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_snapshot_gap(snapshot_gap)
    logger.set_log_tabular_only(False)
    log_dir = osp.join(log_dir, f"run_{run_ID}")
    exp_dir = osp.abspath(log_dir)
    tabular_log_file = osp.join(exp_dir, "progress.csv")
    text_log_file = osp.join(exp_dir, "debug.log")
    params_log_file = osp.join(exp_dir, "params.json")

    logger.set_snapshot_dir(exp_dir)
    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.push_prefix(f"{name}_{run_ID} ")

    if log_params is None:
        log_params = dict()
    log_params["name"] = name
    log_params["run_ID"] = run_ID
    with open(params_log_file, "w") as f:
        json.dump(log_params, f)

    yield

    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
예제 #2
0
 def initialize_logging(self):
     self._opt_infos = {k: list() for k in self.algo.opt_info_fields}
     self._start_time = self._last_time = time.time()
     self._cum_time = 0.0
     if self.snapshot_gap_intervals is not None:
         logger.set_snapshot_gap(self.snapshot_gap_intervals *
                                 self.log_interval_updates)
     self.pbar = ProgBarCounter(self.log_interval_updates)
예제 #3
0
파일: rlpyt.py 프로젝트: qxcv/mtil
def make_logger_ctx(out_dir,
                    algo,
                    orig_env_name,
                    custom_run_name=None,
                    snapshot_gap=10,
                    **kwargs):
    # for logging & model-saving
    if custom_run_name is None:
        run_name = make_unique_run_name(algo, orig_env_name)
    else:
        run_name = custom_run_name
    logger.set_snapshot_gap(snapshot_gap)
    log_dir = os.path.abspath(out_dir)
    # this is irrelevant so long as it's a prefix of log_dir
    # FIXME: update rlpyt so that I can remove this LOG_DIR kludge.
    log_ctx.LOG_DIR = log_dir
    os.makedirs(out_dir, exist_ok=True)
    return log_ctx.logger_context(out_dir,
                                  run_ID=run_name,
                                  name="mtil",
                                  snapshot_mode="gap",
                                  **kwargs)
예제 #4
0
def logger_context(log_dir,
                   run_ID,
                   name,
                   log_params=None,
                   snapshot_mode="none",
                   override_prefix=False,
                   use_summary_writer=False,
                   use_wandb=False,
                   log_gap=1,
                   tags=None):
    """Use as context manager around calls to the runner's ``train()`` method.
    Sets up the logger directory and filenames.  Unless override_prefix is
    True, this function automatically prepends ``log_dir`` with the rlpyt
    logging directory and the date: `path-to-rlpyt/data/yyyymmdd/hhmmss`
    (`data/` is in the gitignore), and appends with `/run_{run_ID}` to
    separate multiple runs of the same settings. Saves hyperparameters
    provided in ``log_params`` to `params.json`, along with experiment `name`
    and `run_ID`.

    Input ``snapshot_mode`` refers to how often the logger actually saves the
    snapshot (e.g. may include agent parameters).  The runner calls on the
    logger to save the snapshot at every iteration, but the input
    ``snapshot_mode`` sets how often the logger actually saves (e.g. snapshot
    may include agent parameters). Possible modes include (but check inside
    the logger itself):
        * "none": don't save at all
        * "last": always save and overwrite the previous
        * "all": always save and keep each iteration
        * "gap": save periodically and keep each (will also need to set the gap, not done here) 

    The cleanup operations after the ``yield`` close files but might not be
    strictly necessary if not launching another training session in the same
    python process.
    """
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_snapshot_gap(log_gap)
    logger.set_log_tabular_only(False)
    log_dir = osp.join(log_dir, f"run_{run_ID}")
    exp_dir = osp.abspath(log_dir)
    if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]) and not override_prefix:
        print(f"logger_context received log_dir outside of {LOG_DIR}: "
              f"prepending by {LOG_DIR}/local/<yyyymmdd>/<hhmmss>/")
        exp_dir = get_log_dir(log_dir)
    tabular_log_file = osp.join(exp_dir, "progress.csv")
    text_log_file = osp.join(exp_dir, "debug.log")
    params_log_file = osp.join(exp_dir, "params.json")

    logger.set_snapshot_dir(exp_dir)
    if use_summary_writer:
        logger.set_tf_summary_writer(SummaryWriter(exp_dir))

    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    logger.push_prefix(f"{name}_{run_ID} ")

    if log_params is None:
        log_params = dict()
    log_params["name"] = name
    log_params["run_ID"] = run_ID
    with open(params_log_file, "w") as f:
        json.dump(log_params, f, default=lambda o: type(o).__name__)
    if use_wandb:
        logger.use_wandb()
        wandb.init(name=name,
                   project='gfootball_p8',
                   config=log_params,
                   monitor_gym=True,
                   reinit=True,
                   tags=tags)
    yield

    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
예제 #5
0
def build_and_train(env="Ant-v2",
                    run_ID=0,
                    cuda_idx=None,
                    sample_mode="serial",
                    n_parallel=2):
    affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel)))
    gpu_cpu = "CPU" if cuda_idx is None else f"GPU {cuda_idx}"
    if sample_mode == "serial":
        Sampler = IntrinsicSerialSampler  # (Ignores workers_cpus.)
        print(f"Using serial sampler, {gpu_cpu} for sampling and optimizing.")
    elif sample_mode == "gpu":
        Sampler = IntrinsicGpuSampler
        print(
            f"Using GPU parallel sampler (agent in master), {gpu_cpu} for sampling and optimizing."
        )

    env_kwargs = dict(id=env)

    sampler = Sampler(
        EnvCls=gym_make,
        env_kwargs=env_kwargs,
        batch_T=128,
        batch_B=64,
        obs_norm_steps=0,  #128*50,
        max_decorrelation_steps=0)

    algo = RndIntrinsicPPO(int_rew_coeff=1.,
                           ext_rew_coeff=0.,
                           ext_rew_clip=(-1, 1),
                           minibatches=4,
                           epochs=4,
                           entropy_loss_coeff=0.001,
                           learning_rate=0.0001,
                           gae_lambda=0.95,
                           discount=0.999,
                           int_discount=0.99)

    rnd_model_kwargs = dict(hidden_sizes=[64, 64],
                            output_size=10,
                            nonlinearity=torch.nn.ReLU)
    base_model_kwargs = dict(  # Same front-end architecture as RND model, different fc kwarg name
        hidden_sizes=[64, 64],
        normalize_observation=True)
    agent = RndMujocoFfAgent(rnd_model_kwargs=rnd_model_kwargs,
                             model_kwargs=base_model_kwargs)

    runner = MinibatchRlFlex(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=int(
            49152e4
        ),  # this is 30k rollouts per environment at (T, B) = (128, 128)
        log_interval_steps=int(1e3),
        affinity=affinity)

    config = dict(game=env)
    name = "intrinsicPPO_" + env
    log_dir = "rnd_mujoco"
    set_snapshot_gap(
        1000)  # Save parameter checkpoint every 1000 training iterations
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="gap"):
        runner.train()
예제 #6
0
파일: main.py 프로젝트: hanegawa/DRIML
def build_and_train(args, game="", run_ID=0, config=None):
    """
    1. Parse the args object into dictionaries understood by rlpyt
    """
    config['env']['id'] = args.env_name
    config["eval_env"]["id"] = args.env_name

    config["eval_env"]["horizon"] = args.horizon
    config["env"]["horizon"] = args.horizon

    if 'procgen' in args.env_name:
        for k, v in vars(args).items():
            if args.env_name.split('-')[1] in k:
                config['env'][k] = v

    config['model']['frame_stack'] = args.frame_stack
    config['model']['nce_loss'] = args.nce_loss
    config['model']['algo'] = args.algo
    config['model']['env_name'] = args.env_name
    config['model']['dueling'] = args.dueling == 1
    config['algo']['double_dqn'] = args.double_dqn == 1
    config['algo']['prioritized_replay'] = args.prioritized_replay == 1
    config['algo']['n_step_return'] = args.n_step_return
    config['algo']['learning_rate'] = args.learning_rate

    config['runner']['log_interval_steps'] = args.log_interval_steps
    config['cmd_args'] = vars(args)
    """
    2. Create the CatDQN (C51) agent from custom implementation
    """

    agent = AtariCatDqnAgent(ModelCls=AtariCatDqnModel_nce,
                             model_kwargs=config["model"],
                             **config["agent"])
    algo = CategoricalDQN_nce(args=config['cmd_args'],
                              ReplayBufferCls=None,
                              optim_kwargs=config["optim"],
                              **config["algo"])

    if args.mode == 'parallel':
        affinity = make_affinity(n_cpu_core=args.n_cpus,
                                 n_gpu=args.n_gpus,
                                 n_socket=1
                                 # hyperthread_offset=0
                                 )
        """
        Some architecture require the following block to be uncommented. Try with and without.
        This is here to allow scheduling of non-sequential CPU IDs
        """
        # import psutil
        # psutil.Process().cpu_affinity([])
        # cpus = tuple(psutil.Process().cpu_affinity())
        # affinity['all_cpus'] = affinity['master_cpus'] = cpus
        # affinity['workers_cpus'] = tuple([tuple([x]) for x in cpus+cpus])
        # env_kwargs = config['env']

        sampler = GpuSampler(EnvCls=make_env,
                             env_kwargs=config["env"],
                             CollectorCls=GpuWaitResetCollector,
                             TrajInfoCls=AtariTrajInfo,
                             eval_env_kwargs=config["eval_env"],
                             **config["sampler"])
        """
        If you don't have a GPU, use the CpuSampler
        """
        # sampler = CpuSampler(
        #             EnvCls=AtariEnv if args.game is not None else make_env,
        #             env_kwargs=config["env"],
        #             CollectorCls=CpuWaitResetCollector,
        #             TrajInfoCls=AtariTrajInfo,
        #             eval_env_kwargs=config["eval_env"],
        #             **config["sampler"]
        #         )

    elif args.mode == 'serial':
        affinity = make_affinity(
            n_cpu_core=1,  # Use 16 cores across all experiments.
            n_gpu=args.n_gpus,  # Use 8 gpus across all experiments.
            n_socket=1,
        )
        """
        Some architecture require the following block to be uncommented. Try with and without.
        """
        # import psutil
        # psutil.Process().cpu_affinity([])
        # cpus = tuple(psutil.Process().cpu_affinity())
        # affinity['all_cpus'] = affinity['master_cpus'] = cpus
        # affinity['workers_cpus'] = tuple([tuple([x]) for x in cpus+cpus])
        # env_kwargs = config['env']

        sampler = SerialSampler(
            EnvCls=make_env,
            env_kwargs=config["env"],
            # CollectorCls=SerialEvalCollector,
            TrajInfoCls=AtariTrajInfo,
            eval_env_kwargs=config["eval_env"],
            **config["sampler"])
    """
    3. Bookkeeping, setting up Comet.ml experiments, etc
    """
    folders_name = [args.output_dir, args.env_name, 'run_' + args.run_ID]
    path = os.path.join(*folders_name)
    os.makedirs(path, exist_ok=True)

    experiment = Experiment(api_key='your_key',
                            auto_output_logging=False,
                            project_name='driml',
                            workspace="your_workspace",
                            disabled=True)
    experiment.add_tag('C51+DIM' if (
        args.lambda_LL > 0 or args.lambda_LG > 0 or args.lambda_GL > 0
        or args.lambda_GG > 0) else 'C51')
    experiment.set_name(args.experiment_name)
    experiment.log_parameters(config)

    MinibatchRlEval.TF_logger = Logger(path,
                                       use_TFX=True,
                                       params=config,
                                       comet_experiment=experiment,
                                       disable_local=True)
    MinibatchRlEval.log_diagnostics = log_diagnostics_custom
    MinibatchRlEval._log_infos = _log_infos
    MinibatchRlEval.evaluate_agent = evaluate_agent
    """
    4. Define the runner as minibatch
    """
    runner = MinibatchRlEval(algo=algo,
                             agent=agent,
                             sampler=sampler,
                             affinity=affinity,
                             **config["runner"])

    runner.algo.opt_info_fields = tuple(
        list(runner.algo.opt_info_fields) + ['lossNCE'] +
        ['action%d' % i for i in range(15)])
    name = args.mode + "_value_based_nce_" + args.env_name
    log_dir = os.path.join(args.output_dir, args.env_name)
    logger.set_snapshot_gap(args.weight_save_interval //
                            config['runner']['log_interval_steps'])
    """
    6. Run the experiment and optionally save network weights
    """

    with experiment.train():
        with logger_context(
                log_dir,
                run_ID,
                name,
                config,
                snapshot_mode=(
                    'last' if args.weight_save_interval == -1 else 'gap'
                )):  # set 'all' to save every it, 'gap' for every X it
            runner.train()
예제 #7
0
def build_and_train(game="breakout",
                    run_ID=0,
                    cuda_idx=None,
                    sample_mode="serial",
                    n_parallel=2):
    affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel)))
    gpu_cpu = "CPU" if cuda_idx is None else f"GPU {cuda_idx}"
    if sample_mode == "serial":
        Sampler = IntrinsicSerialSampler  # (Ignores workers_cpus.)
        print(f"Using serial sampler, {gpu_cpu} for sampling and optimizing.")
    elif sample_mode == "gpu":
        Sampler = IntrinsicGpuSampler
        print(
            f"Using GPU parallel sampler (agent in master), {gpu_cpu} for sampling and optimizing."
        )

    env_cls, traj_info_cls = (
        MontezumaEnv,
        MontezumaTrajInfo) if game == "montezuma_revenge" else (AtariEnv,
                                                                AtariTrajInfo)
    env_kwargs = dict(game=game,
                      repeat_action_probability=0.25,
                      horizon=int(45e2))

    sampler = Sampler(EnvCls=env_cls,
                      TrajInfoCls=traj_info_cls,
                      env_kwargs=env_kwargs,
                      batch_T=128,
                      batch_B=64,
                      obs_norm_steps=128 * 50,
                      max_decorrelation_steps=0)

    algo = RndIntrinsicPPO(int_rew_coeff=1.,
                           ext_rew_coeff=0.,
                           ext_rew_clip=(-1, 1),
                           minibatches=4,
                           epochs=4,
                           entropy_loss_coeff=0.001,
                           learning_rate=0.0001,
                           gae_lambda=0.95,
                           discount=0.999,
                           int_discount=0.99)

    rnd_model_kwargs = dict(channels=[32, 64, 64],
                            kernel_sizes=[8, 4, 4],
                            strides=[(4, 4), (2, 2), (1, 1)],
                            hidden_sizes=[512],
                            conv_nonlinearity=torch.nn.ReLU)
    base_model_kwargs = dict(  # Same front-end architecture as RND model, different fc kwarg name
        channels=[32, 64, 64],
        kernel_sizes=[8, 4, 4],
        strides=[(4, 4), (2, 2), (1, 1)],
        paddings=[0, 0, 0],
        fc_sizes=[512]
        # Automatically applies nonlinearity=torch.nn.ReLU in this case,
        # but can't specify due to rlpyt limitations
    )
    agent = RndAtariFfAgent(rnd_model_kwargs=rnd_model_kwargs,
                            model_kwargs=base_model_kwargs)

    runner = MinibatchRlFlex(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=int(
            49152e4
        ),  # this is 30k rollouts per environment at (T, B) = (128, 128)
        log_interval_steps=int(1e3),
        affinity=affinity,
        seed=314)

    config = dict(game=game)
    name = "intrinsicPPO_" + game
    log_dir = "rnd_atari"
    set_snapshot_gap(
        1000)  # Save parameter checkpoint every 1000 training iterations
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="gap"):
        runner.train()