script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py" affinity_code = encode_affinity(n_cpu_core=16, n_gpu=4, hyperthread_offset=20, n_socket=2 # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_ff_a2c_basic" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "0" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_config_key, ), )
"explore_goal_locations_large", ] entropies = [ # 0.0003, # I actually don't know if this is the right entropy... # 0.0003, # 0.001, 0.01, ] values = list(zip(levels, entropies)) dir_names = levels keys = [("env", "level"), ("algo", "entropy_loss_coeff")] variant_levels_1.append(VariantLevel(keys, values, dir_names)) # variant_levels_2.append(VariantLevel(keys, values, dir_names)) # variant_levels_3.append(VariantLevel(keys, values, dir_names)) variants_1, log_dirs_1 = make_variants(*variant_levels_1) # variants_2, log_dirs_2 = make_variants(*variant_levels_2) # variants_3, log_dirs_3 = make_variants(*variant_levels_3) variants = variants_1 # + variants_2 + variants_3 log_dirs = log_dirs_1 # + log_dirs_2 + log_dirs_3 num_variants = len(variants) variants_per = num_variants // num_computers my_start = my_computer * variants_per if my_computer == num_computers - 1: my_end = num_variants else: my_end = (my_computer + 1) * variants_per my_variants = variants[my_start:my_end]
] entropies = [ 0.0002, # 0.0003, # I actually don't know if this is the right entropy... # 0.0003, # 0.001, # 0.01, ] values = list(zip(levels, entropies)) dir_names = levels keys = [("env", "level"), ("algo", "entropy_loss_coeff")] variant_levels_1.append(VariantLevel(keys, values, dir_names)) variant_levels_2.append(VariantLevel(keys, values, dir_names)) variant_levels_3.append(VariantLevel(keys, values, dir_names)) variants_1, log_dirs_1 = make_variants(*variant_levels_1) variants_2, log_dirs_2 = make_variants(*variant_levels_2) variants_3, log_dirs_3 = make_variants(*variant_levels_3) variants = variants_1 + variants_2 + variants_3 log_dirs = log_dirs_1 + log_dirs_2 + log_dirs_3 num_variants = len(variants) variants_per = num_variants // num_computers my_start = my_computer * variants_per if my_computer == num_computers - 1: my_end = num_variants else: my_end = (my_computer + 1) * variants_per my_variants = variants[my_start:my_end]
# env_ids = ["Hopper-v3", "Walker2d-v3"] # values = list(zip(env_ids)) # dir_names = ["{}".format(*v) for v in values] # keys = [("env", "id")] # variant_levels_1M.append(VariantLevel(keys, values, dir_names)) # env_ids = ["Ant-v3", "HalfCheetah-v3"] env_ids = ["HalfCheetah-v3"] values = list(zip(env_ids)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "id")] variant_levels_3M.append(VariantLevel(keys, values, dir_names)) # variants_1M, log_dirs_1M = make_variants(*variant_levels_1M) variants_3M, log_dirs_3M = make_variants(*variant_levels_3M) variants = variants_3M # + variants_1M log_dirs = log_dirs_3M # + log_dirs_1M run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_config_key,), )
# variant_levels_3M.append(VariantLevel(keys, values, dir_names)) env_ids = ["Walker2d-v2" ] #, "HalfCheetah-v2", "Humanoid-v2", "Swimmer-v2", "Hopper-v2"] values = list(zip(env_ids)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "id")] variant_levels_1M.append(VariantLevel(keys, values, dir_names)) # env_ids = ["Ant-v3", "HalfCheetah-v3"] # values = list(zip(env_ids)) # dir_names = ["{}".format(*v) for v in values] # keys = [("env", "id")] # variant_levels_3M.append(VariantLevel(keys, values, dir_names)) variants_1M, log_dirs_1M = make_variants(*variant_levels_1M) #variants_3M, log_dirs_3M = make_variants(*variant_levels_3M) variants = variants_1M #+ variants_3M log_dirs = log_dirs_1M #+ log_dirs_3M default_config_key = "ppo_1M_serial" script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_serial.py" experiment_title = "ppo_no_clip" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs,