def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="sac_with_ul", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = SerialSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, # TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = SacWithUl(**config["algo"]) agent = SacAgent(conv_kwargs=config["conv"], fc1_kwargs=config["fc1"], pi_model_kwargs=config["pi_model"], q_model_kwargs=config["q_model"], **config["agent"]) runner = MinibatchRlEvalEnvStep(algo=algo, agent=agent, sampler=sampler, affinity=affinity, frame_skip=config["env"]["frame_skip"], **config["runner"]) name = config["env"]["domain_name"] + "_" + config["env"]["task_name"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): if slot_affinity_code is 'None': # affinity = affinity_from_code(run_slot_affinity_code) slot_affinity_code = prepend_run_slot(0, affinity_code) affinity = affinity_from_code(slot_affinity_code) else: affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] # load variant of experiment (there may not be a variant, though) variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=make_env, env_kwargs={}, CollectorCls=CpuResetCollector, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) print('Config', config) if 'pixel_wrapper_kwargs' in config['env']: info_keys = config.get('info_keys', None) state_keys = config.get('state_keys', None) init_namedtuples(info_keys=info_keys, state_keys=state_keys) sampler = CpuSampler(EnvCls=DMControlEnv, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"]) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "sac_{}_{}".format(config['env']['domain'], config['env']['task']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) assert isinstance(affinity, list) # One for each GPU. config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) runner = SyncRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = get_affinity(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) eval_env_config = config["env"].copy() eval_env_config["start_level"] = config["env"]["num_levels"] + 100 eval_env_config["num_levels"] = 100 sampler = GpuSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=GpuResetCollector, eval_env_kwargs=eval_env_config, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = RADPgAgent(ModelCls=RADModel, model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = CategoricalDQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariCatDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) print('Variant', variant) config = update_config(config, variant) sampler = SerialSampler(EnvCls=DMControlEnv, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "sac_{}_{}".format(config['env']['domain'], config['env']['task']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, seed=int(run_ID) * 1000, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_ul_16env", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = SerialSampler( EnvCls=AtariEnv84, env_kwargs=config["env"], CollectorCls=CpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PpoUl(optim_kwargs=config["optim"], **config["algo"]) agent = AtariPgAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=CpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = Dreamer(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDreamerAgent( model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] + str(config["sampler"]["batch_T"]) with logger_context(log_dir, run_ID, name, config): # Might have to flatten config runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="dmlab_pc", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) algo = PixelControl( optim_kwargs=config["optim"], EncoderCls=DmlabAtcEncoderModel, encoder_kwargs=config["encoder"], pixel_control_model_kwargs=config["pixel_control_model"], **config["algo"] ) runner = UnsupervisedLearning( algo=algo, affinity=affinity, **config["runner"] ) name = config["name"] with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) eval_env_config = config["env"].copy() eval_env_config["start_level"] = config["env"]["num_levels"] + 100 eval_env_config["num_levels"] = 100 sampler = GpuSampler(EnvCls=make, env_kwargs=config["env"], CollectorCls=GpuResetCollector, eval_env_kwargs=eval_env_config, **config["sampler"]) if config["checkpoint"]: model_state_dict = torch.load(config["checkpoint"]) print("Loaded.") else: model_state_dict = None algo = PPO_AUG_VAE(optim_kwargs=config["optim"], **config["algo"]) agent = RADPgVaeAgent(ModelCls=RadVaePolicy, model_kwargs=config["model"], initial_model_state_dict=model_state_dict, **config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = make_affinity( run_slot=0, n_cpu_core=os.cpu_count(), # Use 16 cores across all experiments. n_gpu=1, # Use 8 gpus across all experiments. gpu_per_run=1, sample_gpu_per_run=1, async_sample=True, optim_sample_share_gpu=True) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = GpuSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = CategoricalDQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariCatDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncSerialSampler( EnvCls=gym_make, env_kwargs=config["env"], eval_env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, **config["sampler"] ) algo = DDPG(optim_kwargs=config["optim"], **config["algo"]) agent = DdpgAgent(**config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_ddpg_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["algo_name"] = 'A2OC' t_env = pomdp_interface(**config["env"]) config["algo"]["discount"] = t_env.discount sampler = GpuSampler( EnvCls=pomdp_interface, env_kwargs=config["env"], **config["sampler"] ) algo = A2OC(optim_kwargs=config["optim"], **config["algo"]) agent = PomdpOcFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=gym.make, env_kwargs=config["env"], eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DiscreteSACAE(optim_kwargs=config["optim"], ae_optim_kwargs=config["ae_optim"], **config["algo"]) agent = DiscreteSacAEAgent(**config["agent"], encoder_kwargs=config["encoder"], model_kwargs=config["actor"], critic_kwargs=config["critic"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID): # (Or load from a central store of configs.) config = dict( env=dict(game="pong"), algo=dict(learning_rate=7e-4), sampler=dict(batch_B=16), ) affinity = get_affinity(slot_affinity_code) variant = load_variant(log_dir) global config config = update_config(config, variant) sampler = GpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, batch_T=5, # batch_B=16, # Get from config. max_decorrelation_steps=400, **config["sampler"]) algo = A2C(**config["algo"]) # Run with defaults. agent = AtariFfAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) name = "a2c_" + config["env"]["game"] log_dir = "example_6" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_0gpu_1cpu_1cpr", log_dir="test", run_ID="0", config_key="LSTM", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = CpuSampler( EnvCls=safety_gym_make, env_kwargs=config["env"], TrajInfoCls=SafetyGymTrajInfo, **config["sampler"] ) algo = CppoPID(**config["algo"]) agent = CppoLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"], ) name = "cppo_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="atari_ats", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) algo = AugmentedTemporalSimilarity( optim_kwargs=config["optim"], encoder_kwargs=config["encoder"], **config["algo"] ) runner = UnsupervisedLearning( algo=algo, affinity=affinity, **config["runner"] ) name = config["name"] with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_16env", experiment_title="exp", snapshot_mode="none", snapshot_gap=None, ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # Hack that the first part of the log_dir matches the source of the model model_base_dir = config["pretrain"]["model_dir"] if model_base_dir is not None: raw_log_dir = log_dir.split(experiment_title)[-1].lstrip( "/") # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/ model_sub_dir = raw_log_dir.split("/RlFromUl/")[ 0] # keep the UL part, which comes first config["agent"]["state_dict_filename"] = osp.join( model_base_dir, model_sub_dir, "run_0/params.pkl") pprint.pprint(config) sampler = AlternatingSampler( EnvCls=DmlabEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, # TrajInfoCls=AtariTrajInfo, # eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["level"] if snapshot_gap is not None: snapshot_gap = int(snapshot_gap) with logger_context( log_dir, run_ID, name, config, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, ): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="serial_radsac", experiment_title="exp", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # Hack that the first part of the log_dir matches the source of the model model_base_dir = config["pretrain"]["model_dir"] if model_base_dir is not None: raw_log_dir = log_dir.split(experiment_title)[-1].lstrip( "/") # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/ model_sub_dir = raw_log_dir.split("/RlFromUl/")[ 0] # keep the UL part, which comes first pretrain_ID = config["pretrain"]["run_ID"] config["agent"]["state_dict_filename"] = osp.join( model_base_dir, model_sub_dir, f"run_{pretrain_ID}/params.pkl") pprint.pprint(config) sampler = SerialSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], # Same args! **config["sampler"], ) algo = RadSacFromUl(**config["algo"]) agent = SacAgent( conv_kwargs=config["conv"], fc1_kwargs=config["fc1"], pi_model_kwargs=config["pi_model"], q_model_kwargs=config["q_model"], **config["agent"], ) runner = MinibatchRlEvalEnvStep( algo=algo, agent=agent, sampler=sampler, affinity=affinity, frame_skip=config["env"]["frame_skip"], **config["runner"], ) name = config["env"]["domain_name"] + "_" + config["env"]["task_name"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AlternatingSampler(EnvCls=ProcgenEnv, env_kwargs=config["env"], CollectorCls=GpuResetCollector, **config["sampler"]) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = ProcgenFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["algo_name"] = 'A2OC_D_RNN' env = BatchPOMDPEnv(batch_B=config["sampler"]["batch_B"], **config["env"]) config["algo"]["discount"] = env.discount sampler = BatchPOMDPSampler(env=env, **config["sampler"]) algo = A2OC(optim_kwargs=config["optim"], **config["algo"]) agent = PomdpOcRnnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) env = IsaacGymEnv(config['env']['task']) # Make env import torch.nn as nn config["model"]["hidden_nonlinearity"] = getattr( nn, config["model"] ["hidden_nonlinearity"]) # Replace string with proper activation sampler = IsaacSampler(env, **config["sampler"]) algo = PPOC(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfOcAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "ppo_nv_" + config["env"]["task"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = get_affinity(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["id"] = config["env"]["id"] sampler = SerialSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=ResetCollector, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = TD3(optim_kwargs=config["optim"], **config["algo"]) agent = Td3Agent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "td3_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_ul_16env", snapshot_mode="none", snapshot_gap=None, ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = AlternatingSampler( EnvCls=DmlabEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, **config["sampler"] ) algo = PpoUl(optim_kwargs=config["optim"], **config["algo"]) agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["level"] if snapshot_gap is not None: snapshot_gap = int(snapshot_gap) with logger_context( log_dir, run_ID, name, config, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, ): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=gym.make, env_kwargs=config["env"], CollectorCls=GpuResetCollector, eval_env_kwargs=config["eval_env"], **config["sampler"] ) if config["checkpoint"]: model_state_dict = torch.load(config["checkpoint"]) else: model_state_dict = None algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = CategoricalPgAgent( ModelCls=BaselinePolicy, model_kwargs=config["model"], initial_model_state_dict=model_state_dict, **config["agent"] ) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code=None, log_dir='./data', run_ID=0, serial_mode=True, snapshot: Dict = None, config_update: Dict = None): # default configuration config = dict( sac_kwargs=dict(learning_rate=3e-4, batch_size=512, replay_size=1e6, discount=0.95), ppo_kwargs=dict(minibatches=4, learning_rate=2e-1, discount=0.95, linear_lr_schedule=False, OptimCls=SGD, optim_kwargs=dict(momentum=0.9), gae_lambda=0.95, ratio_clip=0.02, entropy_loss_coeff=0, clip_grad_norm=100), td3_kwargs=dict(), sampler_kwargs=dict(batch_T=32, batch_B=5, env_kwargs=dict(id="TrackEnv-v0"), eval_n_envs=4, eval_max_steps=1e5, eval_max_trajectories=10), sac_agent_kwargs=dict(ModelCls=PiMCPModel, QModelCls=QofMCPModel, model_kwargs=dict(freeze_primitives=False)), ppo_agent_kwargs=dict(ModelCls=PPOMcpModel, model_kwargs=dict(freeze_primitives=False)), runner_kwargs=dict(n_steps=1e9, log_interval_steps=1e5), snapshot=snapshot, algo='sac') # try to update default config try: variant = load_variant(log_dir) config = update_config(config, variant) except FileNotFoundError: if config_update is not None: config = update_config(config, config_update) # select correct affinity for configuration if slot_affinity_code is None: num_cpus = multiprocessing.cpu_count( ) # divide by two due to hyperthreading num_gpus = len(GPUtil.getGPUs()) if config['algo'] == 'sac' and not serial_mode: affinity = make_affinity(n_cpu_core=num_cpus, n_gpu=num_gpus, async_sample=True, set_affinity=False) elif config['algo'] == 'ppo' and not serial_mode: affinity = dict(alternating=True, cuda_idx=0, workers_cpus=2 * list(range(num_cpus)), async_sample=True) else: affinity = make_affinity(n_cpu_core=num_cpus // 2, n_gpu=num_gpus) else: affinity = affinity_from_code(slot_affinity_code) # continue training from saved state_dict if provided agent_state_dict = optimizer_state_dict = None if config['snapshot'] is not None: agent_state_dict = config['snapshot']['agent_state_dict'] optimizer_state_dict = config['snapshot']['optimizer_state_dict'] if config['algo'] == 'ppo': AgentClass = McpPPOAgent AlgoClass = PPO RunnerClass = MinibatchRlEval SamplerClass = CpuSampler if serial_mode else AlternatingSampler algo_kwargs = config['ppo_kwargs'] agent_kwargs = config['ppo_agent_kwargs'] elif config['algo'] == 'sac': AgentClass = SacAgentSafeLoad AlgoClass = SAC algo_kwargs = config['sac_kwargs'] agent_kwargs = config['sac_agent_kwargs'] if serial_mode: SamplerClass = SerialSampler RunnerClass = MinibatchRlEval else: SamplerClass = AsyncCpuSampler RunnerClass = AsyncRlEval affinity['cuda_idx'] = 0 else: raise NotImplementedError('algorithm not implemented') # make debugging easier in serial mode if serial_mode: config['runner_kwargs']['log_interval_steps'] = 1e3 config['sac_kwargs']['min_steps_learn'] = 0 sampler = SamplerClass(**config['sampler_kwargs'], EnvCls=make, eval_env_kwargs=config['sampler_kwargs'] ['env_kwargs']) algo = AlgoClass(**algo_kwargs, initial_optim_state_dict=optimizer_state_dict) agent = AgentClass(initial_model_state_dict=agent_state_dict, **agent_kwargs) runner = RunnerClass(**config['runner_kwargs'], algo=algo, agent=agent, sampler=sampler, affinity=affinity) config_logger(log_dir, name='parkour-training', snapshot_mode='best', log_params=config) # start training runner.train()