Esempio n. 1
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=DbGpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_gpu_" + config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 2
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=WaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        **config["sampler"]
    )
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 3
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    # config["eval_env"]["id"] = config["env"]["id"]

    sampler = SerialSampler(
        EnvCls=gym_make,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        eval_env_kwargs=config["env"],
        **config["sampler"]
    )
    algo = SAC(optim_kwargs=config["optim"], **config["algo"])
    agent = SacAgent(**config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "sac_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 4
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    CollectorCls = config["sampler"].pop("CollectorCls", None)
    sampler = CpuParallelSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=CollectorCls or WaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = DQN(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 5
0
def main(args):
    experiment_title = "demo_experiment"
    affinity_code = quick_affinity_code(n_parallel=8)
    # NOTE: you can also use encode_affinity to specifying how to distribute each
    # experiment in your computing nodes.

    # set up variants
    variant_levels = list()

    values = [
        [
            "one",
        ],
        [
            "two",
        ],
    ]
    dir_names = ["{}".format(*v) for v in values]
    keys = [("optionA", "choiceB")
            ]  # each entry in the list is the string path to your config
    variant_levels.append(VariantLevel(keys, values, dir_names))

    values = [
        ["good", int(1e-3)],
        ["better", int(1e3)],
    ]
    dir_names = ["{}".format(*v) for v in values]
    keys = [("optionA", "choiceB"), ("optionB", )]
    variant_levels.append(VariantLevel(keys, values, dir_names))

    # get all variants and their own log directory
    variants, log_dirs = make_variants(*variant_levels)
    for i, variant in enumerate(variants):
        variants[i] = update_config(default_config, variant)

    run_experiments(
        script="demo.py",
        affinity_code=affinity_code,
        experiment_title=experiment_title,
        runs_per_setting=1,  # how many times to run repeated experiments
        variants=variants,
        log_dirs=log_dirs,
        debug_mode=args.
        debug,  # if greater than 0, the launcher will run one variant in this process)
    )
Esempio n. 6
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuParallelSampler(EnvCls=gym_make,
                                 env_kwargs=config["env"],
                                 CollectorCls=ResetCollector,
                                 **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 7
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AsyncCpuSampler(EnvCls=gym_make,
                              env_kwargs=config["env"],
                              CollectorCls=DbCpuResetCollector,
                              eval_env_kwargs=config["env"],
                              **config["sampler"])
    algo = TD3(optim_kwargs=config["optim"], **config["algo"])
    agent = Td3Agent(**config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = "async_td3_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 8
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuParallelSampler(EnvCls=AtariEnv,
                                 env_kwargs=config["env"],
                                 CollectorCls=EpisodicLivesWaitResetCollector,
                                 TrajInfoCls=AtariTrajInfo,
                                 **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"] + str(config["sampler"]["batch_T"])
    with logger_context(log_dir, run_ID, name,
                        config):  # Might have to flatten config
        runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    assert isinstance(affinity, list)  # One for each GPU.
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = GpuSampler(EnvCls=AtariEnv,
                         env_kwargs=config["env"],
                         CollectorCls=GpuWaitResetCollector,
                         TrajInfoCls=AtariTrajInfo,
                         **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = SyncRl(algo=algo,
                    agent=agent,
                    sampler=sampler,
                    affinity=affinity,
                    **config["runner"])
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 10
0
def build_and_train(slot_affinity_code, log_dir, run_ID):
    # (Or load from a central store of configs.)
    config = dict(
        env=dict(game="pong"),
        algo=dict(learning_rate=7e-4),
        sampler=dict(batch_B=16),
    )

    affinity = affinity_from_code(slot_affinity_code)
    variant = load_variant(log_dir)
    # global config
    config = update_config(config, variant)

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        batch_T=5,
        # batch_B=16,  # Get from config.
        max_decorrelation_steps=400,
        **config["sampler"]
    )
    algo = A2C(**config["algo"])  # Run with defaults.
    agent = AtariFfAgent()
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e5,
        affinity=affinity,
    )
    name = "a2c_" + config["env"]["game"]
    # log_dir = "example_6"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Esempio n. 11
0
def main(args):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=16,
        n_gpu=1,
        contexts_per_gpu=16,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        cpu_per_run=1,
    )
    # Or try an automatic one, but results may vary:
    # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True)

    default_config = make_default_config()

    # start building variants
    variant_levels = list()

    variant_choice = 3
    ############ experiments for eGreedy ############################
    if variant_choice == 0:
        values = [
            [
                "eGreedy",
                0.1,
            ],
            [
                "eGreedy",
                0.5,
            ],
            [
                "eGreedy",
                0.9,
            ],
        ]
        dir_names = ["eGreedy-e{}".format(v[1]) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "epsilon"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ############ experiments for UCB ################################
    elif variant_choice == 1:
        values = [
            [
                "ucb",
                1,
            ],
            [
                "ucb",
                5,
            ],
            [
                "ucb",
                10,
            ],
        ]
        dir_names = ["{}-c{}".format(*v) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "c"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ############ experiments for Thompson sampling ##################
    elif variant_choice == 2:
        values = [
            [
                "thompson",
                [[1, 1], [1, 1], [1, 1]],
            ],
            [
                "thompson",
                [[601, 401], [401, 601], [2, 3]],
            ],
        ]
        dir_names = ["{}-prior{}".format(v[0], v[1][0][0]) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "prior"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ########## experiments for graident bandit ######################
    elif variant_choice == 3:
        values = [
            [
                "gradientBandit",
            ],
        ]
        dir_names = ["{}".format(*v) for v in values]
        keys = [
            ("solution", ),
        ]
        variant_levels.append(VariantLevel(keys, values, dir_names))

        values = [
            [
                0.2,
            ],
            [
                1.0,
            ],
            [
                2.0,
            ],
            [
                5.0,
            ],
        ]
        dir_names = ["beta{}".format(*v) for v in values]
        keys = [
            ("agent_kwargs", "beta"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

        values = [
            [
                0.0,
            ],
            [
                0.8,
            ],
            [
                5.0,
            ],
            [
                20.0,
            ],
        ]
        dir_names = ["b{}".format(*v) for v in values]
        keys = [
            ("agent_kwargs", "b"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ######### Done setting hyper-parameters #########################
    else:
        raise ValueError("Wrong experiment choice {}".format(variant_choice))

    # get all variants and their own log directory
    variants, log_dirs = make_variants(*variant_levels)
    for i, variant in enumerate(variants):
        variants[i] = update_config(default_config, variant)

    run_experiments(
        script="girl/experiments/bandit/bandit.py",
        affinity_code=affinity_code,
        experiment_title="Bandit",
        runs_per_setting=200,
        variants=variants,
        log_dirs=log_dirs,  # the directory under "${experiment title}"
        debug_mode=args.
        debug,  # if greater than 0, the launcher will run one variant in this process)
    )