def logger_context(log_dir, run_ID, name, log_params=None, snapshot_mode="none", snapshot_gap=50): logger.set_snapshot_mode(snapshot_mode) logger.set_snapshot_gap(snapshot_gap) logger.set_log_tabular_only(False) log_dir = osp.join(log_dir, f"run_{run_ID}") exp_dir = osp.abspath(log_dir) tabular_log_file = osp.join(exp_dir, "progress.csv") text_log_file = osp.join(exp_dir, "debug.log") params_log_file = osp.join(exp_dir, "params.json") logger.set_snapshot_dir(exp_dir) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.push_prefix(f"{name}_{run_ID} ") if log_params is None: log_params = dict() log_params["name"] = name log_params["run_ID"] = run_ID with open(params_log_file, "w") as f: json.dump(log_params, f) yield logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def initialize_logging(self): self._opt_infos = {k: list() for k in self.algo.opt_info_fields} self._start_time = self._last_time = time.time() self._cum_time = 0.0 if self.snapshot_gap_intervals is not None: logger.set_snapshot_gap(self.snapshot_gap_intervals * self.log_interval_updates) self.pbar = ProgBarCounter(self.log_interval_updates)
def make_logger_ctx(out_dir, algo, orig_env_name, custom_run_name=None, snapshot_gap=10, **kwargs): # for logging & model-saving if custom_run_name is None: run_name = make_unique_run_name(algo, orig_env_name) else: run_name = custom_run_name logger.set_snapshot_gap(snapshot_gap) log_dir = os.path.abspath(out_dir) # this is irrelevant so long as it's a prefix of log_dir # FIXME: update rlpyt so that I can remove this LOG_DIR kludge. log_ctx.LOG_DIR = log_dir os.makedirs(out_dir, exist_ok=True) return log_ctx.logger_context(out_dir, run_ID=run_name, name="mtil", snapshot_mode="gap", **kwargs)
def logger_context(log_dir, run_ID, name, log_params=None, snapshot_mode="none", override_prefix=False, use_summary_writer=False, use_wandb=False, log_gap=1, tags=None): """Use as context manager around calls to the runner's ``train()`` method. Sets up the logger directory and filenames. Unless override_prefix is True, this function automatically prepends ``log_dir`` with the rlpyt logging directory and the date: `path-to-rlpyt/data/yyyymmdd/hhmmss` (`data/` is in the gitignore), and appends with `/run_{run_ID}` to separate multiple runs of the same settings. Saves hyperparameters provided in ``log_params`` to `params.json`, along with experiment `name` and `run_ID`. Input ``snapshot_mode`` refers to how often the logger actually saves the snapshot (e.g. may include agent parameters). The runner calls on the logger to save the snapshot at every iteration, but the input ``snapshot_mode`` sets how often the logger actually saves (e.g. snapshot may include agent parameters). Possible modes include (but check inside the logger itself): * "none": don't save at all * "last": always save and overwrite the previous * "all": always save and keep each iteration * "gap": save periodically and keep each (will also need to set the gap, not done here) The cleanup operations after the ``yield`` close files but might not be strictly necessary if not launching another training session in the same python process. """ logger.set_snapshot_mode(snapshot_mode) logger.set_snapshot_gap(log_gap) logger.set_log_tabular_only(False) log_dir = osp.join(log_dir, f"run_{run_ID}") exp_dir = osp.abspath(log_dir) if LOG_DIR != osp.commonpath([exp_dir, LOG_DIR]) and not override_prefix: print(f"logger_context received log_dir outside of {LOG_DIR}: " f"prepending by {LOG_DIR}/local/<yyyymmdd>/<hhmmss>/") exp_dir = get_log_dir(log_dir) tabular_log_file = osp.join(exp_dir, "progress.csv") text_log_file = osp.join(exp_dir, "debug.log") params_log_file = osp.join(exp_dir, "params.json") logger.set_snapshot_dir(exp_dir) if use_summary_writer: logger.set_tf_summary_writer(SummaryWriter(exp_dir)) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.push_prefix(f"{name}_{run_ID} ") if log_params is None: log_params = dict() log_params["name"] = name log_params["run_ID"] = run_ID with open(params_log_file, "w") as f: json.dump(log_params, f, default=lambda o: type(o).__name__) if use_wandb: logger.use_wandb() wandb.init(name=name, project='gfootball_p8', config=log_params, monitor_gym=True, reinit=True, tags=tags) yield logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def build_and_train(env="Ant-v2", run_ID=0, cuda_idx=None, sample_mode="serial", n_parallel=2): affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))) gpu_cpu = "CPU" if cuda_idx is None else f"GPU {cuda_idx}" if sample_mode == "serial": Sampler = IntrinsicSerialSampler # (Ignores workers_cpus.) print(f"Using serial sampler, {gpu_cpu} for sampling and optimizing.") elif sample_mode == "gpu": Sampler = IntrinsicGpuSampler print( f"Using GPU parallel sampler (agent in master), {gpu_cpu} for sampling and optimizing." ) env_kwargs = dict(id=env) sampler = Sampler( EnvCls=gym_make, env_kwargs=env_kwargs, batch_T=128, batch_B=64, obs_norm_steps=0, #128*50, max_decorrelation_steps=0) algo = RndIntrinsicPPO(int_rew_coeff=1., ext_rew_coeff=0., ext_rew_clip=(-1, 1), minibatches=4, epochs=4, entropy_loss_coeff=0.001, learning_rate=0.0001, gae_lambda=0.95, discount=0.999, int_discount=0.99) rnd_model_kwargs = dict(hidden_sizes=[64, 64], output_size=10, nonlinearity=torch.nn.ReLU) base_model_kwargs = dict( # Same front-end architecture as RND model, different fc kwarg name hidden_sizes=[64, 64], normalize_observation=True) agent = RndMujocoFfAgent(rnd_model_kwargs=rnd_model_kwargs, model_kwargs=base_model_kwargs) runner = MinibatchRlFlex( algo=algo, agent=agent, sampler=sampler, n_steps=int( 49152e4 ), # this is 30k rollouts per environment at (T, B) = (128, 128) log_interval_steps=int(1e3), affinity=affinity) config = dict(game=env) name = "intrinsicPPO_" + env log_dir = "rnd_mujoco" set_snapshot_gap( 1000) # Save parameter checkpoint every 1000 training iterations with logger_context(log_dir, run_ID, name, config, snapshot_mode="gap"): runner.train()
def build_and_train(args, game="", run_ID=0, config=None): """ 1. Parse the args object into dictionaries understood by rlpyt """ config['env']['id'] = args.env_name config["eval_env"]["id"] = args.env_name config["eval_env"]["horizon"] = args.horizon config["env"]["horizon"] = args.horizon if 'procgen' in args.env_name: for k, v in vars(args).items(): if args.env_name.split('-')[1] in k: config['env'][k] = v config['model']['frame_stack'] = args.frame_stack config['model']['nce_loss'] = args.nce_loss config['model']['algo'] = args.algo config['model']['env_name'] = args.env_name config['model']['dueling'] = args.dueling == 1 config['algo']['double_dqn'] = args.double_dqn == 1 config['algo']['prioritized_replay'] = args.prioritized_replay == 1 config['algo']['n_step_return'] = args.n_step_return config['algo']['learning_rate'] = args.learning_rate config['runner']['log_interval_steps'] = args.log_interval_steps config['cmd_args'] = vars(args) """ 2. Create the CatDQN (C51) agent from custom implementation """ agent = AtariCatDqnAgent(ModelCls=AtariCatDqnModel_nce, model_kwargs=config["model"], **config["agent"]) algo = CategoricalDQN_nce(args=config['cmd_args'], ReplayBufferCls=None, optim_kwargs=config["optim"], **config["algo"]) if args.mode == 'parallel': affinity = make_affinity(n_cpu_core=args.n_cpus, n_gpu=args.n_gpus, n_socket=1 # hyperthread_offset=0 ) """ Some architecture require the following block to be uncommented. Try with and without. This is here to allow scheduling of non-sequential CPU IDs """ # import psutil # psutil.Process().cpu_affinity([]) # cpus = tuple(psutil.Process().cpu_affinity()) # affinity['all_cpus'] = affinity['master_cpus'] = cpus # affinity['workers_cpus'] = tuple([tuple([x]) for x in cpus+cpus]) # env_kwargs = config['env'] sampler = GpuSampler(EnvCls=make_env, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) """ If you don't have a GPU, use the CpuSampler """ # sampler = CpuSampler( # EnvCls=AtariEnv if args.game is not None else make_env, # env_kwargs=config["env"], # CollectorCls=CpuWaitResetCollector, # TrajInfoCls=AtariTrajInfo, # eval_env_kwargs=config["eval_env"], # **config["sampler"] # ) elif args.mode == 'serial': affinity = make_affinity( n_cpu_core=1, # Use 16 cores across all experiments. n_gpu=args.n_gpus, # Use 8 gpus across all experiments. n_socket=1, ) """ Some architecture require the following block to be uncommented. Try with and without. """ # import psutil # psutil.Process().cpu_affinity([]) # cpus = tuple(psutil.Process().cpu_affinity()) # affinity['all_cpus'] = affinity['master_cpus'] = cpus # affinity['workers_cpus'] = tuple([tuple([x]) for x in cpus+cpus]) # env_kwargs = config['env'] sampler = SerialSampler( EnvCls=make_env, env_kwargs=config["env"], # CollectorCls=SerialEvalCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) """ 3. Bookkeeping, setting up Comet.ml experiments, etc """ folders_name = [args.output_dir, args.env_name, 'run_' + args.run_ID] path = os.path.join(*folders_name) os.makedirs(path, exist_ok=True) experiment = Experiment(api_key='your_key', auto_output_logging=False, project_name='driml', workspace="your_workspace", disabled=True) experiment.add_tag('C51+DIM' if ( args.lambda_LL > 0 or args.lambda_LG > 0 or args.lambda_GL > 0 or args.lambda_GG > 0) else 'C51') experiment.set_name(args.experiment_name) experiment.log_parameters(config) MinibatchRlEval.TF_logger = Logger(path, use_TFX=True, params=config, comet_experiment=experiment, disable_local=True) MinibatchRlEval.log_diagnostics = log_diagnostics_custom MinibatchRlEval._log_infos = _log_infos MinibatchRlEval.evaluate_agent = evaluate_agent """ 4. Define the runner as minibatch """ runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) runner.algo.opt_info_fields = tuple( list(runner.algo.opt_info_fields) + ['lossNCE'] + ['action%d' % i for i in range(15)]) name = args.mode + "_value_based_nce_" + args.env_name log_dir = os.path.join(args.output_dir, args.env_name) logger.set_snapshot_gap(args.weight_save_interval // config['runner']['log_interval_steps']) """ 6. Run the experiment and optionally save network weights """ with experiment.train(): with logger_context( log_dir, run_ID, name, config, snapshot_mode=( 'last' if args.weight_save_interval == -1 else 'gap' )): # set 'all' to save every it, 'gap' for every X it runner.train()
def build_and_train(game="breakout", run_ID=0, cuda_idx=None, sample_mode="serial", n_parallel=2): affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))) gpu_cpu = "CPU" if cuda_idx is None else f"GPU {cuda_idx}" if sample_mode == "serial": Sampler = IntrinsicSerialSampler # (Ignores workers_cpus.) print(f"Using serial sampler, {gpu_cpu} for sampling and optimizing.") elif sample_mode == "gpu": Sampler = IntrinsicGpuSampler print( f"Using GPU parallel sampler (agent in master), {gpu_cpu} for sampling and optimizing." ) env_cls, traj_info_cls = ( MontezumaEnv, MontezumaTrajInfo) if game == "montezuma_revenge" else (AtariEnv, AtariTrajInfo) env_kwargs = dict(game=game, repeat_action_probability=0.25, horizon=int(45e2)) sampler = Sampler(EnvCls=env_cls, TrajInfoCls=traj_info_cls, env_kwargs=env_kwargs, batch_T=128, batch_B=64, obs_norm_steps=128 * 50, max_decorrelation_steps=0) algo = RndIntrinsicPPO(int_rew_coeff=1., ext_rew_coeff=0., ext_rew_clip=(-1, 1), minibatches=4, epochs=4, entropy_loss_coeff=0.001, learning_rate=0.0001, gae_lambda=0.95, discount=0.999, int_discount=0.99) rnd_model_kwargs = dict(channels=[32, 64, 64], kernel_sizes=[8, 4, 4], strides=[(4, 4), (2, 2), (1, 1)], hidden_sizes=[512], conv_nonlinearity=torch.nn.ReLU) base_model_kwargs = dict( # Same front-end architecture as RND model, different fc kwarg name channels=[32, 64, 64], kernel_sizes=[8, 4, 4], strides=[(4, 4), (2, 2), (1, 1)], paddings=[0, 0, 0], fc_sizes=[512] # Automatically applies nonlinearity=torch.nn.ReLU in this case, # but can't specify due to rlpyt limitations ) agent = RndAtariFfAgent(rnd_model_kwargs=rnd_model_kwargs, model_kwargs=base_model_kwargs) runner = MinibatchRlFlex( algo=algo, agent=agent, sampler=sampler, n_steps=int( 49152e4 ), # this is 30k rollouts per environment at (T, B) = (128, 128) log_interval_steps=int(1e3), affinity=affinity, seed=314) config = dict(game=game) name = "intrinsicPPO_" + game log_dir = "rnd_atari" set_snapshot_gap( 1000) # Save parameter checkpoint every 1000 training iterations with logger_context(log_dir, run_ID, name, config, snapshot_mode="gap"): runner.train()