Ejemplo n.º 1
0
def main(args):
    experiment_title = "demo_experiment"
    affinity_code = quick_affinity_code(n_parallel=8)
    # NOTE: you can also use encode_affinity to specifying how to distribute each
    # experiment in your computing nodes.

    # set up variants
    variant_levels = list()

    values = [
        [
            "one",
        ],
        [
            "two",
        ],
    ]
    dir_names = ["{}".format(*v) for v in values]
    keys = [("optionA", "choiceB")
            ]  # each entry in the list is the string path to your config
    variant_levels.append(VariantLevel(keys, values, dir_names))

    values = [
        ["good", int(1e-3)],
        ["better", int(1e3)],
    ]
    dir_names = ["{}".format(*v) for v in values]
    keys = [("optionA", "choiceB"), ("optionB", )]
    variant_levels.append(VariantLevel(keys, values, dir_names))

    # get all variants and their own log directory
    variants, log_dirs = make_variants(*variant_levels)
    for i, variant in enumerate(variants):
        variants[i] = update_config(default_config, variant)

    run_experiments(
        script="demo.py",
        affinity_code=affinity_code,
        experiment_title=experiment_title,
        runs_per_setting=1,  # how many times to run repeated experiments
        variants=variants,
        log_dirs=log_dirs,
        debug_mode=args.
        debug,  # if greater than 0, the launcher will run one variant in this process)
    )
Ejemplo n.º 2
0
# env_ids = ["Hopper-v2"]  # , "Swimmer-v3"]
# env_ids = ["Ant-v3", "HalfCheetah-v3"]

env_ids = ["Walker2d-v3", "Hopper-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels_1M.append(VariantLevel(keys, values, dir_names))

env_ids = ["Ant-v3", "HalfCheetah-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels_3M.append(VariantLevel(keys, values, dir_names))

variants_3M, log_dirs_3M = make_variants(*variant_levels_3M)
variants_1M, log_dirs_1M = make_variants(*variant_levels_1M)

variants = variants_3M + variants_1M
log_dirs = log_dirs_3M + log_dirs_1M

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_config_key, ),
)
Ejemplo n.º 3
0
    n_gpu=8,
    contexts_per_gpu=2,
    hyperthread_offset=24,
    n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 5
variant_levels = list()

env_ids = ["Hopper-v3", "HalfCheetah-v3", "Walker2d-v3", "Ant-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

# default_config_key = "ppo_1M_serial"
# script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py"
# experiment_title = "ppo_mujoco"

# run_experiments(
#     script=script,
#     affinity_code=affinity_code,
#     experiment_title=experiment_title,
#     runs_per_setting=runs_per_setting,
#     variants=variants,
#     log_dirs=log_dirs,
#     common_args=(default_config_key,),
# )
Ejemplo n.º 4
0
def main(args):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=16,
        n_gpu=1,
        contexts_per_gpu=16,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        cpu_per_run=1,
    )
    # Or try an automatic one, but results may vary:
    # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True)

    default_config = make_default_config()

    # start building variants
    variant_levels = list()

    variant_choice = 3
    ############ experiments for eGreedy ############################
    if variant_choice == 0:
        values = [
            [
                "eGreedy",
                0.1,
            ],
            [
                "eGreedy",
                0.5,
            ],
            [
                "eGreedy",
                0.9,
            ],
        ]
        dir_names = ["eGreedy-e{}".format(v[1]) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "epsilon"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ############ experiments for UCB ################################
    elif variant_choice == 1:
        values = [
            [
                "ucb",
                1,
            ],
            [
                "ucb",
                5,
            ],
            [
                "ucb",
                10,
            ],
        ]
        dir_names = ["{}-c{}".format(*v) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "c"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ############ experiments for Thompson sampling ##################
    elif variant_choice == 2:
        values = [
            [
                "thompson",
                [[1, 1], [1, 1], [1, 1]],
            ],
            [
                "thompson",
                [[601, 401], [401, 601], [2, 3]],
            ],
        ]
        dir_names = ["{}-prior{}".format(v[0], v[1][0][0]) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "prior"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ########## experiments for graident bandit ######################
    elif variant_choice == 3:
        values = [
            [
                "gradientBandit",
            ],
        ]
        dir_names = ["{}".format(*v) for v in values]
        keys = [
            ("solution", ),
        ]
        variant_levels.append(VariantLevel(keys, values, dir_names))

        values = [
            [
                0.2,
            ],
            [
                1.0,
            ],
            [
                2.0,
            ],
            [
                5.0,
            ],
        ]
        dir_names = ["beta{}".format(*v) for v in values]
        keys = [
            ("agent_kwargs", "beta"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

        values = [
            [
                0.0,
            ],
            [
                0.8,
            ],
            [
                5.0,
            ],
            [
                20.0,
            ],
        ]
        dir_names = ["b{}".format(*v) for v in values]
        keys = [
            ("agent_kwargs", "b"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ######### Done setting hyper-parameters #########################
    else:
        raise ValueError("Wrong experiment choice {}".format(variant_choice))

    # get all variants and their own log directory
    variants, log_dirs = make_variants(*variant_levels)
    for i, variant in enumerate(variants):
        variants[i] = update_config(default_config, variant)

    run_experiments(
        script="girl/experiments/bandit/bandit.py",
        affinity_code=affinity_code,
        experiment_title="Bandit",
        runs_per_setting=200,
        variants=variants,
        log_dirs=log_dirs,  # the directory under "${experiment title}"
        debug_mode=args.
        debug,  # if greater than 0, the launcher will run one variant in this process)
    )