def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # config["eval_env"]["id"] = config["env"]["id"] sampler = SerialSampler( EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"] ) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "sac_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(affinity_code, log_dir, run_ID, **kwargs): # I prefer put all tunable default configs into launch file # acquire affinity asigned by the launcher. # NOTE: If the affinity is a list, it means multiple resources (gpu) # is assigned to current experiment affinity = affinity_from_code(affinity_code) # now you will have `config` as a dictionary in the same # structure you define your default configurations config = load_variant(log_dir) name = "demo_experiment" # This helps you know what GPU is recommand to you for this experiment gpu_idx = affinity["cuda_idx"] # under a logger context, run your experiment. with logger_context(log_dir, run_ID, name, config): logger.log("Start running experiment") for epoch_i in range(10): # log your scalar with this function for example logger.record_tabular("metric1", epoch_i, epoch_i) # dump all logs into csv file (This is the exact function that # write one line into progress.csv file) logger.dump_tabular()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] CollectorCls = config["sampler"].pop("CollectorCls", None) sampler = CpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=CollectorCls or WaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def launch_experiment(script, run_slot, affinity_code, log_dir, variant, run_ID, args, new_process: bool = True): """ Parameters ---------- log_dir: the abspath to save variant and feed to running script script: the name of experiment script you wish to run variant: a dict-like object that tells the experiment configuration new_process: a boolean showing whether to deploy a new process to run the experiment """ slot_affinity_code = prepend_run_slot(run_slot, affinity_code) affinity = affinity_from_code(slot_affinity_code) call_list = list() if isinstance(affinity, dict) and affinity.get("all_cpus", False): cpus = ",".join(str(c) for c in affinity["all_cpus"]) elif isinstance(affinity, list) and affinity[0].get("all_cpus", False): cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"]) else: cpus = () if cpus: call_list += ["taskset", "-c", cpus] # PyTorch obeys better than just psutil. call_command = ["python", script, slot_affinity_code, log_dir, str(run_ID)] call_command += [str(a) for a in args] save_variant(variant, log_dir) if new_process: print("\ncall string:\n", " ".join(call_list + call_command)) p = subprocess.Popen( call_list + call_command) # the script recieve from its script name else: print("\nexperiment function:\n", " ".join(call_command[1:])) # load experiment script as module by its path refering to https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path file_abspath = os.path.abspath(script) module_name = file_abspath.split("/")[-1][:-3] # exclude ".py" chars module_spec = importlib.util.spec_from_file_location( name=module_name, location=file_abspath) module = importlib.util.module_from_spec(module_spec) module_spec.loader.exec_module(module) # call experiment if hasattr(module, "main"): module.main(*call_command[2:] ) # feed the command start from after the script name elif hasattr(module, "build_and_train"): module.build_and_train(*call_command[2:]) p = None return p
def launch_experiment( script, run_slot, affinity_code, log_dir, variant, run_ID, args, python_executable=None, set_egl_device=False, ): """Launches one learning run using ``subprocess.Popen()`` to call the python script. Calls the script as: ``python {script} {slot_affinity_code} {log_dir} {run_ID} {*args}`` If ``affinity_code["all_cpus"]`` is provided, then the call is prepended with ``tasket -c ..`` and the listed cpus (this is the most sure way to keep the run limited to these CPU cores). Also saves the `variant` file. Returns the process handle, which can be monitored. Use ``set_egl_device=True`` to set an environment variable ``EGL_DEVICE_ID`` equal to the same value as the cuda index for the algorithm. For example, can use with DMControl environment modified to look for this environment variable when selecting a GPU for headless rendering. """ slot_affinity_code = prepend_run_slot(run_slot, affinity_code) affinity = affinity_from_code(slot_affinity_code) call_list = list() if isinstance(affinity, dict) and affinity.get("all_cpus", False): cpus = ",".join(str(c) for c in affinity["all_cpus"]) elif isinstance(affinity, list) and affinity[0].get("all_cpus", False): cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"]) else: cpus = () if cpus: call_list += ["taskset", "-c", cpus] # PyTorch obeys better than just psutil. call_list += ["python", script, slot_affinity_code, log_dir, str(run_ID)] call_list += [str(a) for a in args] save_variant(variant, log_dir) print("\ncall string:\n", " ".join(call_list)) if set_egl_device and affinity.get("cuda_idx", None) is not None: egl_device_id = str(affinity["cuda_idx"]) egl_env = os.environ.copy() egl_env["EGL_DEVICE_ID"] = egl_device_id print(f"Assigning EGL_DEVICE_ID={egl_device_id}") p = subprocess.Popen(call_list, env=egl_env) else: p = subprocess.Popen(call_list) return p
def main(affinity_code, log_dir, run_id, *args): # NOTE: I disable all outputs of this single experiment sys.stdout = open(os.devnull, 'w') affinity = affinity_from_code(affinity_code) config = load_variant(log_dir) if config["solution"] == "eGreedy": agent_kwargs = {k: config["agent_kwargs"][k] for k in ('epsilon', )} agent = eGreedyAgent(**agent_kwargs) algo = eGreedyBandit() elif config["solution"] == "ucb": agent_kwargs = {k: config["agent_kwargs"].get(k, 1.0) for k in ('c', )} agent = ucbBanditAgent(**agent_kwargs) algo = eGreedyBandit() elif config["solution"] == "thompson": agent_kwargs = { k: config["agent_kwargs"].get(k, None) for k in ('prior', ) } agent = ThompsonAgent(**agent_kwargs) algo = ThompsonAlgorithm() elif config["solution"] == "gradientBandit": agent_kwargs = { k: config["agent_kwargs"].get(k, False) for k in ( 'random_init', 'beta', 'b', ) } agent = GradientAgent(**agent_kwargs) algo = GradientBanditAlgo(**config["algo_kwargs"]) else: raise NotImplementedError( "Solution {} has not been implemented".format(config["solution"])) sampler = SamplerBase(EnvCls=BanditEnv, env_kwargs=config["env_kwargs"], **config["sampler_kwargs"]) runner = RunnerBase(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner_kwargs"]) name = "Bandit" with logger_context(log_dir, run_id, name, config): runner.run()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuParallelSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=ResetCollector, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncCpuSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"]) algo = TD3(optim_kwargs=config["optim"], **config["algo"]) agent = Td3Agent(**config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "async_td3_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuParallelSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=EpisodicLivesWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] + str(config["sampler"]["batch_T"]) with logger_context(log_dir, run_ID, name, config): # Might have to flatten config runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) assert isinstance(affinity, list) # One for each GPU. config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) runner = SyncRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID): # (Or load from a central store of configs.) config = dict( env=dict(game="pong"), algo=dict(learning_rate=7e-4), sampler=dict(batch_B=16), ) affinity = affinity_from_code(slot_affinity_code) variant = load_variant(log_dir) # global config config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, batch_T=5, # batch_B=16, # Get from config. max_decorrelation_steps=400, **config["sampler"] ) algo = A2C(**config["algo"]) # Run with defaults. agent = AtariFfAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) name = "a2c_" + config["env"]["game"] # log_dir = "example_6" with logger_context(log_dir, run_ID, name, config): runner.train()