from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel default_config_key = "td3_1M_serial" script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_serial.py" experiment_title = "td3_mujoco_v3" affinity_code = encode_affinity( n_cpu_core=4, n_gpu=4, hyperthread_offset=20, n_socket=1, cpu_per_run=1, contexts_per_gpu=1, ) runs_per_setting = 2 variant_levels_1M = list() variant_levels_3M = list() n_steps = [3e6] values = list(zip(n_steps)) dir_names = ["3M"] keys = [("runner", "n_steps")] variant_levels_3M.append(VariantLevel(keys, values, dir_names)) n_steps = [1e6] values = list(zip(n_steps)) dir_names = ["1M"] keys = [("runner", "n_steps")] variant_levels_1M.append(VariantLevel(keys, values, dir_names))
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_gpu.py" # default_config_key = "0" affinity_code = encode_affinity( n_cpu_core=6, n_gpu=2, hyperthread_offset=8, n_socket=1, # cpu_per_run=2, ) runs_per_setting = 1 experiment_title = "lstm_4frame_test" variant_levels = list() learning_rate = [1e-4] * 4 entropy_loss_coeff = [0.01, 0.4, 0.04, 0.1] values = list(zip(learning_rate, entropy_loss_coeff)) dir_names = ["test_{}lr_{}ent".format(*v) for v in values] keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")] variant_levels.append(VariantLevel(keys, values, dir_names)) games = ["seaquest"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names))
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel affinity_code = encode_affinity( n_cpu_core=16, n_gpu=8, contexts_per_gpu=2, hyperthread_offset=24, n_socket=2, # cpu_per_run=2, ) runs_per_setting = 5 variant_levels = list() env_ids = ["Hopper-v3", "HalfCheetah-v3", "Walker2d-v3", "Ant-v3"] values = list(zip(env_ids)) dir_names = ["env_{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) # default_config_key = "ppo_1M_serial" # script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py" # experiment_title = "ppo_mujoco" # run_experiments( # script=script, # affinity_code=affinity_code, # experiment_title=experiment_title,
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" affinity_code = encode_affinity( n_cpu_core=16, n_gpu=4, hyperthread_offset=20, n_socket=2, # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_basic_cpusamp_1of2" variant_levels = list() games = ["pong", "seaquest"] #, "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "dqn" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title,
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu_noeval.py" affinity_code = encode_affinity( n_cpu_core=16, n_gpu=8, hyperthread_offset=24, n_socket=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_no_eval" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "no_eval" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting,
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" affinity_code = encode_affinity( n_cpu_core=40, n_gpu=4, async_sample=True, gpu_per_run=1, sample_gpu_per_run=2, # hyperthread_offset=24, # optim_sample_share_gpu=True, n_socket=1, # Force this. alternating=True, ) runs_per_setting = 2 experiment_title = "atari_r2d1_async_alt" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "async_alt_dgx" run_experiments(
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_cpu.py" affinity_code = encode_affinity( n_cpu_core=2, n_gpu=0, hyperthread_offset=2, n_socket=1, cpu_per_run=2, ) runs_per_setting = 1 default_config_key = "ppo_1M" experiment_title = "first_test_mujoco" variant_levels = list() env_ids = ["Hopper-v2"] # , "Swimmer-v3"] values = list(zip(env_ids)) dir_names = ["env_{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) norm_obs = [True] values = list(zip(norm_obs)) dir_names = ["TrueNormObs"] keys = [("model", "normalize_observation")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels)
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_gpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=8, async_sample=True, sample_gpu_per_run=2, gpu_per_run=2, # hyperthread_offset=24, # optim_sample_share_gpu=True, # n_socket=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_async_gpu" variant_levels = list() games = ["pong"] # , "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) priorities = [False, True] values = list(zip(priorities)) dir_names = ["pri_{}".format(*v) for v in values] keys = [("algo", "prioritized_replay")] variant_levels.append(VariantLevel(keys, values, dir_names))
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_cpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=8, async_sample=True, sample_gpu_per_run=0, gpu_per_run=1, hyperthread_offset=24, # n_socket=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_async_cpu" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "cpu" run_experiments( script=script,
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" affinity_code = encode_affinity( n_cpu_core=12, n_gpu=1, hyperthread_offset=20, n_socket=1, ) runs_per_setting = 2 experiment_title = "atari_r2d1_long_4tr" variant_levels = list() games = ["gravitar"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "r2d1_long_4tr" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting,
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=6, # hyperthread_offset=24, n_socket=2, # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_basic_cpu" variant_levels = list() games = ["pong", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "dqn" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title,
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_async_serial.py" affinity_code = encode_affinity( n_cpu_core=16, n_gpu=4, # contexts_per_gpu=2, async_sample=True, # hyperthread_offset=2, # n_socket=1, # cpu_per_run=1, ) runs_per_setting = 2 default_config_key = "async_serial" experiment_title = "ddpg_mujoco_async" variant_levels = list() env_ids = ["Hopper-v3", "HalfCheetah-v3"] # , "Swimmer-v3"] values = list(zip(env_ids)) dir_names = ["env_{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title,
def main(args): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=16, n_gpu=1, contexts_per_gpu=16, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) cpu_per_run=1, ) # Or try an automatic one, but results may vary: # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True) default_config = make_default_config() # start building variants variant_levels = list() variant_choice = 3 ############ experiments for eGreedy ############################ if variant_choice == 0: values = [ [ "eGreedy", 0.1, ], [ "eGreedy", 0.5, ], [ "eGreedy", 0.9, ], ] dir_names = ["eGreedy-e{}".format(v[1]) for v in values] keys = [ ("solution", ), ("agent_kwargs", "epsilon"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ############ experiments for UCB ################################ elif variant_choice == 1: values = [ [ "ucb", 1, ], [ "ucb", 5, ], [ "ucb", 10, ], ] dir_names = ["{}-c{}".format(*v) for v in values] keys = [ ("solution", ), ("agent_kwargs", "c"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ############ experiments for Thompson sampling ################## elif variant_choice == 2: values = [ [ "thompson", [[1, 1], [1, 1], [1, 1]], ], [ "thompson", [[601, 401], [401, 601], [2, 3]], ], ] dir_names = ["{}-prior{}".format(v[0], v[1][0][0]) for v in values] keys = [ ("solution", ), ("agent_kwargs", "prior"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ########## experiments for graident bandit ###################### elif variant_choice == 3: values = [ [ "gradientBandit", ], ] dir_names = ["{}".format(*v) for v in values] keys = [ ("solution", ), ] variant_levels.append(VariantLevel(keys, values, dir_names)) values = [ [ 0.2, ], [ 1.0, ], [ 2.0, ], [ 5.0, ], ] dir_names = ["beta{}".format(*v) for v in values] keys = [ ("agent_kwargs", "beta"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) values = [ [ 0.0, ], [ 0.8, ], [ 5.0, ], [ 20.0, ], ] dir_names = ["b{}".format(*v) for v in values] keys = [ ("agent_kwargs", "b"), ] # each entry in the list is the string path to your config variant_levels.append(VariantLevel(keys, values, dir_names)) ######### Done setting hyper-parameters ######################### else: raise ValueError("Wrong experiment choice {}".format(variant_choice)) # get all variants and their own log directory variants, log_dirs = make_variants(*variant_levels) for i, variant in enumerate(variants): variants[i] = update_config(default_config, variant) run_experiments( script="girl/experiments/bandit/bandit.py", affinity_code=affinity_code, experiment_title="Bandit", runs_per_setting=200, variants=variants, log_dirs=log_dirs, # the directory under "${experiment title}" debug_mode=args. debug, # if greater than 0, the launcher will run one variant in this process) )
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_cpu.py" # default_config_key = "0" affinity_code = encode_affinity( # Let it be kwargs? n_cpu_core=6, n_gpu=2, hyperthread_offset=8, n_socket=1, # cpu_per_run=4, ) runs_per_setting = 2 experiment_title = "lstm_test_gpu" variant_levels = list() # learning_rate = [7e-4] * 4 # entropy_loss_coeff = [0.01, 0.02, 0.04, 0.08] # values = list(zip(learning_rate, entropy_loss_coeff)) # dir_names = ["test_{}lr_{}ent".format(*v) for v in values] # keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")] # variant_levels.append(VariantLevel(keys, values, dir_names)) learning_rate = [7e-4] * 2 batch_T = [5, 20] values = list(zip(learning_rate, batch_T)) dir_names = ["test_{}lr_{}T".format(*v) for v in values] keys = [("algo", "learning_rate"), ("sampler", "batch_T")] variant_levels.append(VariantLevel(keys, values, dir_names))
with many different inputs to encode, and see what comes out. The results will be logged with a folder structure according to the variant levels constructed here. """ from exptools.launching.affinity import encode_affinity, quick_affinity_code from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=4, n_gpu=2, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) # Or try an automatic one, but results may vary: # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True) runs_per_setting = 2 experiment_title = "example_6" variant_levels = list() # Within a variant level, list each combination explicitly. learning_rate = [7e-4, 1e-3] batch_B = [16, 32] values = list(zip(learning_rate, batch_B)) dir_names = ["example6_{}lr_{}B".format(*v) for v in values]
from exptools.launching.affinity import encode_affinity from exptools.launching.exp_launcher import run_experiments from exptools.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py" affinity_code = encode_affinity( n_cpu_core=4, n_gpu=4, # contexts_per_gpu=1, # hyperthread_offset=24, # n_socket=2, # cpu_per_run=2, ) runs_per_setting = 4 default_config_key = "ppo_1M_serial" experiment_title = "ppo_mujoco_v3_serial_hc_tl" # variant_levels_1M = list() variant_levels_3M = list() # n_steps = [1e6] # values = list(zip(n_steps)) # dir_names = ["1M"] # keys = [("runner", "n_steps")] # variant_levels_1M.append(VariantLevel(keys, values, dir_names)) bootstrap_tls = [True] values = list(zip(bootstrap_tls)) dir_names = ["bootstrap_timelimit"] keys = [("algo", "bootstrap_timelimit")] variant_levels_3M.append(VariantLevel(keys, values, dir_names))