def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(affinity_code, log_dir, run_ID, **kwargs): # I prefer put all tunable default configs into launch file # acquire affinity asigned by the launcher. # NOTE: If the affinity is a list, it means multiple resources (gpu) # is assigned to current experiment affinity = affinity_from_code(affinity_code) # now you will have `config` as a dictionary in the same # structure you define your default configurations config = load_variant(log_dir) name = "demo_experiment" # This helps you know what GPU is recommand to you for this experiment gpu_idx = affinity["cuda_idx"] # under a logger context, run your experiment. with logger_context(log_dir, run_ID, name, config): logger.log("Start running experiment") for epoch_i in range(10): # log your scalar with this function for example logger.record_tabular("metric1", epoch_i, epoch_i) # dump all logs into csv file (This is the exact function that # write one line into progress.csv file) logger.dump_tabular()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # config["eval_env"]["id"] = config["env"]["id"] sampler = SerialSampler( EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"] ) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "sac_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] CollectorCls = config["sampler"].pop("CollectorCls", None) sampler = CpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=CollectorCls or WaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def main(affinity_code, log_dir, run_id, *args): # NOTE: I disable all outputs of this single experiment sys.stdout = open(os.devnull, 'w') affinity = affinity_from_code(affinity_code) config = load_variant(log_dir) if config["solution"] == "eGreedy": agent_kwargs = {k: config["agent_kwargs"][k] for k in ('epsilon', )} agent = eGreedyAgent(**agent_kwargs) algo = eGreedyBandit() elif config["solution"] == "ucb": agent_kwargs = {k: config["agent_kwargs"].get(k, 1.0) for k in ('c', )} agent = ucbBanditAgent(**agent_kwargs) algo = eGreedyBandit() elif config["solution"] == "thompson": agent_kwargs = { k: config["agent_kwargs"].get(k, None) for k in ('prior', ) } agent = ThompsonAgent(**agent_kwargs) algo = ThompsonAlgorithm() elif config["solution"] == "gradientBandit": agent_kwargs = { k: config["agent_kwargs"].get(k, False) for k in ( 'random_init', 'beta', 'b', ) } agent = GradientAgent(**agent_kwargs) algo = GradientBanditAlgo(**config["algo_kwargs"]) else: raise NotImplementedError( "Solution {} has not been implemented".format(config["solution"])) sampler = SamplerBase(EnvCls=BanditEnv, env_kwargs=config["env_kwargs"], **config["sampler_kwargs"]) runner = RunnerBase(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner_kwargs"]) name = "Bandit" with logger_context(log_dir, run_id, name, config): runner.run()
def build_and_train(game="pong", run_ID=0): # Change these inputs to match local machine and desired parallelism. affinity = make_affinity( run_slot=0, n_cpu_core=8, # Use 16 cores across all experiments. n_gpu=4, # Use 8 gpus across all experiments. gpu_per_run=1, sample_gpu_per_run=1, async_sample=True, optim_sample_share_gpu=False, # hyperthread_offset=24, # If machine has 24 cores. n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. # gpu_per_run=2, # How many GPUs to parallelize one run across. # cpu_per_run=1, ) sampler = AsyncGpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=dict(game=game), batch_T=5, batch_B=36, max_decorrelation_steps=100, eval_env_kwargs=dict(game=game), eval_n_envs=2, eval_max_steps=int(10e3), eval_max_trajectories=4, ) algo = DQN(replay_ratio=8, min_steps_learn=1e4, replay_size=int(1e5)) agent = AtariDqnAgent() runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=5e7, log_interval_steps=1e4, affinity=affinity, ) config = dict(game=game) name = "async_dqn_" + game log_dir = "async_dqn" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuParallelSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=ResetCollector, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncCpuSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"]) algo = TD3(optim_kwargs=config["optim"], **config["algo"]) agent = Td3Agent(**config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "async_td3_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuParallelSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=EpisodicLivesWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] + str(config["sampler"]["batch_T"]) with logger_context(log_dir, run_ID, name, config): # Might have to flatten config runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) assert isinstance(affinity, list) # One for each GPU. config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) runner = SyncRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="pong", run_ID=0): # Seems like we should be able to skip the intermediate step of the code, # but so far have just always run that way. # Change these inputs to match local machine and desired parallelism. affinity = make_affinity( run_slot=0, n_cpu_core=16, # Use 16 cores across all experiments. n_gpu=8, # Use 8 gpus across all experiments. hyperthread_offset=24, # If machine has 24 cores. n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. gpu_per_run=2, # How many GPUs to parallelize one run across. # cpu_per_run=1, ) sampler = GpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=dict(game=game), CollectorCls=GpuWaitResetCollector, batch_T=5, batch_B=16, max_decorrelation_steps=400, ) algo = A2C() # Run with defaults. agent = AtariFfAgent() runner = SyncRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) config = dict(game=game) name = "a2c_" + game log_dir = "example_7" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID): # (Or load from a central store of configs.) config = dict( env=dict(game="pong"), algo=dict(learning_rate=7e-4), sampler=dict(batch_B=16), ) affinity = affinity_from_code(slot_affinity_code) variant = load_variant(log_dir) # global config config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, batch_T=5, # batch_B=16, # Get from config. max_decorrelation_steps=400, **config["sampler"] ) algo = A2C(**config["algo"]) # Run with defaults. agent = AtariFfAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) name = "a2c_" + config["env"]["game"] # log_dir = "example_6" with logger_context(log_dir, run_ID, name, config): runner.train()