def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): if slot_affinity_code is 'None': # affinity = affinity_from_code(run_slot_affinity_code) slot_affinity_code = prepend_run_slot(0, affinity_code) affinity = affinity_from_code(slot_affinity_code) else: affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] # load variant of experiment (there may not be a variant, though) variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=make_env, env_kwargs={}, CollectorCls=CpuResetCollector, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) assert isinstance(affinity, list) # One for each GPU. config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) runner = SyncRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="atari_ats", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) algo = AugmentedTemporalSimilarity( optim_kwargs=config["optim"], encoder_kwargs=config["encoder"], **config["algo"] ) runner = UnsupervisedLearning( algo=algo, affinity=affinity, **config["runner"] ) name = config["name"] with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, seed=int(run_ID) * 1000, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncSerialSampler( EnvCls=gym_make, env_kwargs=config["env"], eval_env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, **config["sampler"] ) algo = DDPG(optim_kwargs=config["optim"], **config["algo"]) agent = DdpgAgent(**config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_ddpg_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) print('Config', config) if 'pixel_wrapper_kwargs' in config['env']: info_keys = config.get('info_keys', None) state_keys = config.get('state_keys', None) init_namedtuples(info_keys=info_keys, state_keys=state_keys) sampler = CpuSampler(EnvCls=DMControlEnv, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"]) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "sac_{}_{}".format(config['env']['domain'], config['env']['task']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID): # (Or load from a central store of configs.) config = dict( env=dict(game="pong"), algo=dict(learning_rate=7e-4), sampler=dict(batch_B=16), ) affinity = affinity_from_code(slot_affinity_code) variant = load_variant(log_dir) global config config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, batch_T=5, # batch_B=16, # Get from config. max_decorrelation_steps=400, **config["sampler"]) algo = A2C(**config["algo"]) # Run with defaults. agent = AtariFfAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) name = "a2c_" + config["env"]["game"] log_dir = "example_6" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="sac_with_ul", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = SerialSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, # TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = SacWithUl(**config["algo"]) agent = SacAgent(conv_kwargs=config["conv"], fc1_kwargs=config["fc1"], pi_model_kwargs=config["pi_model"], q_model_kwargs=config["q_model"], **config["agent"]) runner = MinibatchRlEvalEnvStep(algo=algo, agent=agent, sampler=sampler, affinity=affinity, frame_skip=config["env"]["frame_skip"], **config["runner"]) name = config["env"]["domain_name"] + "_" + config["env"]["task_name"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) print('Variant', variant) config = update_config(config, variant) sampler = SerialSampler(EnvCls=DMControlEnv, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "sac_{}_{}".format(config['env']['domain'], config['env']['task']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=2, n_gpu=1, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) run_slot=0, cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(affinity_code) config = configs["r2d1"] config["env"]["game"] = game config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler(EnvCls=voxel_make, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) config = dict(game=game) name = "r2d1_" + game log_dir = "tower_building" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_ul_16env", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = SerialSampler( EnvCls=AtariEnv84, env_kwargs=config["env"], CollectorCls=CpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PpoUl(optim_kwargs=config["optim"], **config["algo"]) agent = AtariPgAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=gym.make, env_kwargs=config["env"], eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DiscreteSACAE(optim_kwargs=config["optim"], ae_optim_kwargs=config["ae_optim"], **config["algo"]) agent = DiscreteSacAEAgent(**config["agent"], encoder_kwargs=config["encoder"], model_kwargs=config["actor"], critic_kwargs=config["critic"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) eval_env_config = config["env"].copy() eval_env_config["start_level"] = config["env"]["num_levels"] + 100 eval_env_config["num_levels"] = 100 sampler = GpuSampler(EnvCls=make, env_kwargs=config["env"], CollectorCls=GpuResetCollector, eval_env_kwargs=eval_env_config, **config["sampler"]) if config["checkpoint"]: model_state_dict = torch.load(config["checkpoint"]) print("Loaded.") else: model_state_dict = None algo = PPO_AUG_VAE(optim_kwargs=config["optim"], **config["algo"]) agent = RADPgVaeAgent(ModelCls=RadVaePolicy, model_kwargs=config["model"], initial_model_state_dict=model_state_dict, **config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=CpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = Dreamer(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDreamerAgent( model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] + str(config["sampler"]["batch_T"]) with logger_context(log_dir, run_ID, name, config): # Might have to flatten config runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["algo_name"] = 'A2OC' t_env = pomdp_interface(**config["env"]) config["algo"]["discount"] = t_env.discount sampler = GpuSampler( EnvCls=pomdp_interface, env_kwargs=config["env"], **config["sampler"] ) algo = A2OC(optim_kwargs=config["optim"], **config["algo"]) agent = PomdpOcFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = CategoricalDQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariCatDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def launch_experiment(script, run_slot, affinity_code, log_dir, variant, run_ID, args): """Launches one learning run using ``subprocess.Popen()`` to call the python script. Calls the script as: ``python {script} {slot_affinity_code} {log_dir} {run_ID} {*args}`` If ``affinity_code["all_cpus"]`` is provided, then the call is prepended with ``tasket -c ..`` and the listed cpus (this is the most sure way to keep the run limited to these CPU cores). Also saves the `variant` file. Returns the process handle, which can be monitored. """ slot_affinity_code = prepend_run_slot(run_slot, affinity_code) affinity = affinity_from_code(slot_affinity_code) call_list = list() if isinstance(affinity, dict) and affinity.get("all_cpus", False): cpus = ",".join(str(c) for c in affinity["all_cpus"]) elif isinstance(affinity, list) and affinity[0].get("all_cpus", False): cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"]) else: cpus = () if cpus: call_list += ["taskset", "-c", cpus] # PyTorch obeys better than just psutil. call_list += ["python", script, slot_affinity_code, log_dir, str(run_ID)] call_list += [str(a) for a in args] save_variant(variant, log_dir) print("\ncall string:\n", " ".join(call_list)) p = subprocess.Popen(call_list) return p
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) eval_env_config = config["env"].copy() eval_env_config["start_level"] = config["env"]["num_levels"] + 100 eval_env_config["num_levels"] = 100 sampler = GpuSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=GpuResetCollector, eval_env_kwargs=eval_env_config, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = RADPgAgent(ModelCls=RADModel, model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="dmlab_pc", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) algo = PixelControl( optim_kwargs=config["optim"], EncoderCls=DmlabAtcEncoderModel, encoder_kwargs=config["encoder"], pixel_control_model_kwargs=config["pixel_control_model"], **config["algo"] ) runner = UnsupervisedLearning( algo=algo, affinity=affinity, **config["runner"] ) name = config["name"] with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train( slot_affinity_code="0slt_0gpu_1cpu_1cpr", log_dir="test", run_ID="0", config_key="LSTM", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = CpuSampler( EnvCls=safety_gym_make, env_kwargs=config["env"], TrajInfoCls=SafetyGymTrajInfo, **config["sampler"] ) algo = CppoPID(**config["algo"]) agent = CppoLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"], ) name = "cppo_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def launch_experiment( script, run_slot, affinity_code, log_dir, variant, run_ID, args, python_executable=None, set_egl_device=False, ): """Launches one learning run using ``subprocess.Popen()`` to call the python script. Calls the script as: ``python {script} {slot_affinity_code} {log_dir} {run_ID} {*args}`` If ``affinity_code["all_cpus"]`` is provided, then the call is prepended with ``tasket -c ..`` and the listed cpus (this is the most sure way to keep the run limited to these CPU cores). Also saves the `variant` file. Returns the process handle, which can be monitored. Use ``set_egl_device=True`` to set an environment variable ``EGL_DEVICE_ID`` equal to the same value as the cuda index for the algorithm. For example, can use with DMControl environment modified to look for this environment variable when selecting a GPU for headless rendering. """ slot_affinity_code = prepend_run_slot(run_slot, affinity_code) affinity = affinity_from_code(slot_affinity_code) call_list = list() if isinstance(affinity, dict) and affinity.get("all_cpus", False): cpus = ",".join(str(c) for c in affinity["all_cpus"]) elif isinstance(affinity, list) and affinity[0].get("all_cpus", False): cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"]) else: cpus = () if cpus: call_list += ["taskset", "-c", cpus] # PyTorch obeys better than just psutil. py = python_executable if python_executable else sys.executable or "python" call_list += [py, script, slot_affinity_code, log_dir, str(run_ID)] call_list += [str(a) for a in args] save_variant(variant, log_dir) print("\ncall string:\n", " ".join(call_list)) print(os.getcwd()) if set_egl_device and affinity.get("cuda_idx", None) is not None: egl_device_id = str(affinity["cuda_idx"]) egl_env = os.environ.copy() egl_env["EGL_DEVICE_ID"] = egl_device_id print(f"Assigning EGL_DEVICE_ID={egl_device_id}") p = subprocess.Popen(call_list, env=egl_env) else: p = subprocess.Popen(call_list) return p
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_16env", experiment_title="exp", snapshot_mode="none", snapshot_gap=None, ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # Hack that the first part of the log_dir matches the source of the model model_base_dir = config["pretrain"]["model_dir"] if model_base_dir is not None: raw_log_dir = log_dir.split(experiment_title)[-1].lstrip( "/") # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/ model_sub_dir = raw_log_dir.split("/RlFromUl/")[ 0] # keep the UL part, which comes first config["agent"]["state_dict_filename"] = osp.join( model_base_dir, model_sub_dir, "run_0/params.pkl") pprint.pprint(config) sampler = AlternatingSampler( EnvCls=DmlabEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, # TrajInfoCls=AtariTrajInfo, # eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["level"] if snapshot_gap is not None: snapshot_gap = int(snapshot_gap) with logger_context( log_dir, run_ID, name, config, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, ): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="serial_radsac", experiment_title="exp", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # Hack that the first part of the log_dir matches the source of the model model_base_dir = config["pretrain"]["model_dir"] if model_base_dir is not None: raw_log_dir = log_dir.split(experiment_title)[-1].lstrip( "/") # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/ model_sub_dir = raw_log_dir.split("/RlFromUl/")[ 0] # keep the UL part, which comes first pretrain_ID = config["pretrain"]["run_ID"] config["agent"]["state_dict_filename"] = osp.join( model_base_dir, model_sub_dir, f"run_{pretrain_ID}/params.pkl") pprint.pprint(config) sampler = SerialSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], # Same args! **config["sampler"], ) algo = RadSacFromUl(**config["algo"]) agent = SacAgent( conv_kwargs=config["conv"], fc1_kwargs=config["fc1"], pi_model_kwargs=config["pi_model"], q_model_kwargs=config["q_model"], **config["agent"], ) runner = MinibatchRlEvalEnvStep( algo=algo, agent=agent, sampler=sampler, affinity=affinity, frame_skip=config["env"]["frame_skip"], **config["runner"], ) name = config["env"]["domain_name"] + "_" + config["env"]["task_name"] with logger_context(log_dir, run_ID, name, config): runner.train()
def run_task(vv, log_dir, exp_name): vv = update_env_kwargs(vv) run_ID = vv['seed'] config_key = vv['config_key'] slot_affinity_code = encode_affinity( n_cpu_core=20, n_gpu=2, n_socket=2, run_slot=0, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] config.update(**vv) # config["env"] = env_arg_dict[config['env_name']] vv['env_kwargs']['headless'] = True sac_module = 'rlpyt.algos.qpg.{}'.format(config['sac_module']) sac_agent_module = 'rlpyt.agents.qpg.{}'.format(config['sac_agent_module']) sac_module = importlib.import_module(sac_module) sac_agent_module = importlib.import_module(sac_agent_module) SAC = sac_module.SAC SacAgent = sac_agent_module.SacAgent if 'pixel_wrapper_kwargs' in config['env']: info_keys = config.get('info_keys', None) state_keys = config.get('state_keys', None) init_namedtuples(info_keys=info_keys, state_keys=state_keys) sampler = CpuSampler( EnvCls=SOFTGYM_ENVS[vv['env_name']], env_kwargs=vv['env_kwargs'], CollectorCls=CpuResetCollector, eval_env_kwargs=vv['env_kwargs'], **config["sampler"] ) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "sac_{}".format(vv['env_name']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train( slot_affinity_code="0slt_0gpu_4cpu_4cpr", log_dir="test", run_ID="0", config_key="sac_ul_compress", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] # variant = load_variant(log_dir) # config = update_config(config, variant) config["algo"]["min_steps_rl"] = 100 config["algo"]["min_steps_ul"] = 150 config["algo"]["replay_size"] = 1e4 config["algo"]["batch_size"] = 64 config["algo"]["ul_batch_size"] = 32 config["runner"]["n_steps"] = 1e3 config["runner"]["log_interval_steps"] = 1e2 config["sampler"]["eval_n_envs"] = 1 config["sampler"]["eval_max_steps"] = 500 config["algo"]["stop_rl_conv_grad"] = True config["algo"]["ul_update_schedule"] = "cosine_8" pprint.pprint(config) sampler = SerialSampler( EnvCls=make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, # TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = SacUl(**config["algo"]) agent = SacWithUlAgent(conv_kwargs=config["conv"], fc1_kwargs=config["fc1"], pi_model_kwargs=config["pi_model"], q_model_kwargs=config["q_model"], **config["agent"]) runner = MinibatchRlEvalEnvStep(algo=algo, agent=agent, sampler=sampler, affinity=affinity, frame_skip=config["env"]["frame_skip"], **config["runner"]) name = config["env"]["domain_name"] + "_" + config["env"]["task_name"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code="0slt_0gpu_4cpu_4cpr", log_dir="test", run_ID="0", config_key="ppo_ul_16env"): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] # variant = load_variant(log_dir) # config = update_config(config, variant) # config["sampler"]["batch_B"] = 4 # config["sampler"]["batch_T"] = 5 # config["runner"]["log_interval_steps"] = 100 # config["runner"]["n_steps"] = 1000 config["algo"]["ul_update_schedule"] = "constant_1" config["algo"]["min_steps_rl"] = 1e3 config["algo"]["min_steps_ul"] = 200 config["algo"]["max_steps_ul"] = 20e6 config["model"]["stop_conv_grad"] = True config["sampler"]["max_decorrelation_steps"] = 0 config["sampler"]["batch_B"] = 3 config["sampler"]["batch_T"] = 20 config["algo"]["ul_pri_alpha"] = 1. config["algo"]["ul_pri_n_step_return"] = 10 config["algo"]["ul_replay_size"] = 900 pprint.pprint(config) sampler = SerialSampler( EnvCls=AtariEnv84, env_kwargs=config["env"], CollectorCls=CpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PpoUl(optim_kwargs=config["optim"], **config["algo"]) agent = AtariPgRlWithUlAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AlternatingSampler(EnvCls=ProcgenEnv, env_kwargs=config["env"], CollectorCls=GpuResetCollector, **config["sampler"]) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = ProcgenFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def launch_experiment(script, run_slot, affinity_code, log_dir, variant, run_ID, args): slot_affinity_code = prepend_run_slot(run_slot, affinity_code) affinity = affinity_from_code(slot_affinity_code) call_list = list() if isinstance(affinity, dict) and affinity.get("all_cpus", False): cpus = ",".join(str(c) for c in affinity["all_cpus"]) elif isinstance(affinity, list) and affinity[0].get("all_cpus", False): cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"]) else: cpus = () if cpus: call_list += ["taskset", "-c", cpus] # PyTorch obeys better than just psutil. call_list += ["python", script, slot_affinity_code, log_dir, str(run_ID)] call_list += [str(a) for a in args] save_variant(variant, log_dir) print("\ncall string:\n", " ".join(call_list)) p = subprocess.Popen(call_list) return p
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] # variant = load_variant(log_dir) # config = update_config(config, variant) sampler = CpuSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=EpisodicLivesWaitResetCollector, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] + str(config["algo"]["entropy_loss_coeff"]) with logger_context(log_dir, run_ID, name, config): runner.train()