def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # config["eval_env"]["id"] = config["env"]["id"] sampler = SerialSampler( EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"] ) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "sac_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] CollectorCls = config["sampler"].pop("CollectorCls", None) sampler = CpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=CollectorCls or WaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def main(args): experiment_title = "demo_experiment" affinity_code = quick_affinity_code(n_parallel=8) # NOTE: you can also use encode_affinity to specifying how to distribute each # experiment in your computing nodes. # set up variants variant_levels = list() values = [ [ "one", ], [ "two", ], ] dir_names = ["{}".format(*v) for v in values] keys = [("optionA", "choiceB") ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) values = [ ["good", int(1e-3)], ["better", int(1e3)], ] dir_names = ["{}".format(*v) for v in values] keys = [("optionA", "choiceB"), ("optionB", )] variant_levels.append(VariantLevel(keys, values, dir_names)) # get all variants and their own log directory variants, log_dirs = make_variants(*variant_levels) for i, variant in enumerate(variants): variants[i] = update_config(default_config, variant) run_experiments( script="demo.py", affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=1, # how many times to run repeated experiments variants=variants, log_dirs=log_dirs, debug_mode=args. debug, # if greater than 0, the launcher will run one variant in this process) )
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuParallelSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=ResetCollector, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncCpuSampler(EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"]) algo = TD3(optim_kwargs=config["optim"], **config["algo"]) agent = Td3Agent(**config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "async_td3_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuParallelSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=EpisodicLivesWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] + str(config["sampler"]["batch_T"]) with logger_context(log_dir, run_ID, name, config): # Might have to flatten config runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) assert isinstance(affinity, list) # One for each GPU. config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) runner = SyncRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID): # (Or load from a central store of configs.) config = dict( env=dict(game="pong"), algo=dict(learning_rate=7e-4), sampler=dict(batch_B=16), ) affinity = affinity_from_code(slot_affinity_code) variant = load_variant(log_dir) # global config config = update_config(config, variant) sampler = GpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, batch_T=5, # batch_B=16, # Get from config. max_decorrelation_steps=400, **config["sampler"] ) algo = A2C(**config["algo"]) # Run with defaults. agent = AtariFfAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) name = "a2c_" + config["env"]["game"] # log_dir = "example_6" with logger_context(log_dir, run_ID, name, config): runner.train()
def main(args): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=16, n_gpu=1, contexts_per_gpu=16, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) cpu_per_run=1, ) # Or try an automatic one, but results may vary: # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True) default_config = make_default_config() # start building variants variant_levels = list() variant_choice = 3 ############ experiments for eGreedy ############################ if variant_choice == 0: values = [ [ "eGreedy", 0.1, ], [ "eGreedy", 0.5, ], [ "eGreedy", 0.9, ], ] dir_names = ["eGreedy-e{}".format(v[1]) for v in values] keys = [ ("solution", ), ("agent_kwargs", "epsilon"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ############ experiments for UCB ################################ elif variant_choice == 1: values = [ [ "ucb", 1, ], [ "ucb", 5, ], [ "ucb", 10, ], ] dir_names = ["{}-c{}".format(*v) for v in values] keys = [ ("solution", ), ("agent_kwargs", "c"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ############ experiments for Thompson sampling ################## elif variant_choice == 2: values = [ [ "thompson", [[1, 1], [1, 1], [1, 1]], ], [ "thompson", [[601, 401], [401, 601], [2, 3]], ], ] dir_names = ["{}-prior{}".format(v[0], v[1][0][0]) for v in values] keys = [ ("solution", ), ("agent_kwargs", "prior"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ########## experiments for graident bandit ###################### elif variant_choice == 3: values = [ [ "gradientBandit", ], ] dir_names = ["{}".format(*v) for v in values] keys = [ ("solution", ), ] variant_levels.append(VariantLevel(keys, values, dir_names)) values = [ [ 0.2, ], [ 1.0, ], [ 2.0, ], [ 5.0, ], ] dir_names = ["beta{}".format(*v) for v in values] keys = [ ("agent_kwargs", "beta"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) values = [ [ 0.0, ], [ 0.8, ], [ 5.0, ], [ 20.0, ], ] dir_names = ["b{}".format(*v) for v in values] keys = [ ("agent_kwargs", "b"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ######### Done setting hyper-parameters ######################### else: raise ValueError("Wrong experiment choice {}".format(variant_choice)) # get all variants and their own log directory variants, log_dirs = make_variants(*variant_levels) for i, variant in enumerate(variants): variants[i] = update_config(default_config, variant) run_experiments( script="girl/experiments/bandit/bandit.py", affinity_code=affinity_code, experiment_title="Bandit", runs_per_setting=200, variants=variants, log_dirs=log_dirs, # the directory under "${experiment title}" debug_mode=args. debug, # if greater than 0, the launcher will run one variant in this process) )