コード例 #1
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=DbGpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_gpu_" + config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #2
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=WaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        **config["sampler"]
    )
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #3
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    # config["eval_env"]["id"] = config["env"]["id"]

    sampler = SerialSampler(
        EnvCls=gym_make,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        eval_env_kwargs=config["env"],
        **config["sampler"]
    )
    algo = SAC(optim_kwargs=config["optim"], **config["algo"])
    agent = SacAgent(**config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "sac_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #4
0
def build_and_train(affinity_code, log_dir, run_ID, **kwargs):
    # I prefer put all tunable default configs into launch file

    # acquire affinity asigned by the launcher.
    # NOTE: If the affinity is a list, it means multiple resources (gpu)
    # is assigned to current experiment
    affinity = affinity_from_code(affinity_code)

    # now you will have `config` as a dictionary in the same
    # structure you define your default configurations
    config = load_variant(log_dir)

    name = "demo_experiment"
    # This helps you know what GPU is recommand to you for this experiment
    gpu_idx = affinity["cuda_idx"]

    # under a logger context, run your experiment.
    with logger_context(log_dir, run_ID, name, config):
        logger.log("Start running experiment")
        for epoch_i in range(10):
            # log your scalar with this function for example
            logger.record_tabular("metric1", epoch_i, epoch_i)
            # dump all logs into csv file (This is the exact function that
            # write one line into progress.csv file)
            logger.dump_tabular()
コード例 #5
0
ファイル: atari_dqn_cpu.py プロジェクト: ZiwenZhuang/rlpyt
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    CollectorCls = config["sampler"].pop("CollectorCls", None)
    sampler = CpuParallelSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=CollectorCls or WaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = DQN(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #6
0
ファイル: exp_launcher.py プロジェクト: ZiwenZhuang/exptools
def launch_experiment(script,
                      run_slot,
                      affinity_code,
                      log_dir,
                      variant,
                      run_ID,
                      args,
                      new_process: bool = True):
    """ 
    Parameters
    ----------
        log_dir: the abspath to save variant and feed to running script
        script: the name of experiment script you wish to run
        variant: a dict-like object that tells the experiment configuration
        new_process: a boolean showing whether to deploy a new process to run the experiment
    """
    slot_affinity_code = prepend_run_slot(run_slot, affinity_code)
    affinity = affinity_from_code(slot_affinity_code)
    call_list = list()
    if isinstance(affinity, dict) and affinity.get("all_cpus", False):
        cpus = ",".join(str(c) for c in affinity["all_cpus"])
    elif isinstance(affinity, list) and affinity[0].get("all_cpus", False):
        cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"])
    else:
        cpus = ()
    if cpus:
        call_list += ["taskset", "-c",
                      cpus]  # PyTorch obeys better than just psutil.
    call_command = ["python", script, slot_affinity_code, log_dir, str(run_ID)]
    call_command += [str(a) for a in args]
    save_variant(variant, log_dir)
    if new_process:
        print("\ncall string:\n", " ".join(call_list + call_command))
        p = subprocess.Popen(
            call_list +
            call_command)  # the script recieve from its script name
    else:
        print("\nexperiment function:\n", " ".join(call_command[1:]))
        # load experiment script as module by its path refering to https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
        file_abspath = os.path.abspath(script)
        module_name = file_abspath.split("/")[-1][:-3]  # exclude ".py" chars
        module_spec = importlib.util.spec_from_file_location(
            name=module_name, location=file_abspath)
        module = importlib.util.module_from_spec(module_spec)
        module_spec.loader.exec_module(module)
        # call experiment
        if hasattr(module, "main"):
            module.main(*call_command[2:]
                        )  # feed the command start from after the script name
        elif hasattr(module, "build_and_train"):
            module.build_and_train(*call_command[2:])
        p = None
    return p
コード例 #7
0
def launch_experiment(
    script,
    run_slot,
    affinity_code,
    log_dir,
    variant,
    run_ID,
    args,
    python_executable=None,
    set_egl_device=False,
):
    """Launches one learning run using ``subprocess.Popen()`` to call the
    python script.  Calls the script as:
    ``python {script} {slot_affinity_code} {log_dir} {run_ID} {*args}``

    If ``affinity_code["all_cpus"]`` is provided, then the call is prepended
    with ``tasket -c ..`` and the listed cpus (this is the most sure way to
    keep the run limited to these CPU cores).  Also saves the `variant` file.
    Returns the process handle, which can be monitored.
   
    Use ``set_egl_device=True`` to set an environment variable
    ``EGL_DEVICE_ID`` equal to the same value as the cuda index for the
    algorithm.  For example, can use with DMControl environment modified
    to look for this environment variable when selecting a GPU for headless
    rendering.
    """
    slot_affinity_code = prepend_run_slot(run_slot, affinity_code)
    affinity = affinity_from_code(slot_affinity_code)
    call_list = list()
    if isinstance(affinity, dict) and affinity.get("all_cpus", False):
        cpus = ",".join(str(c) for c in affinity["all_cpus"])
    elif isinstance(affinity, list) and affinity[0].get("all_cpus", False):
        cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"])
    else:
        cpus = ()
    if cpus:
        call_list += ["taskset", "-c",
                      cpus]  # PyTorch obeys better than just psutil.
    call_list += ["python", script, slot_affinity_code, log_dir, str(run_ID)]
    call_list += [str(a) for a in args]
    save_variant(variant, log_dir)
    print("\ncall string:\n", " ".join(call_list))
    if set_egl_device and affinity.get("cuda_idx", None) is not None:
        egl_device_id = str(affinity["cuda_idx"])
        egl_env = os.environ.copy()
        egl_env["EGL_DEVICE_ID"] = egl_device_id
        print(f"Assigning EGL_DEVICE_ID={egl_device_id}")
        p = subprocess.Popen(call_list, env=egl_env)
    else:
        p = subprocess.Popen(call_list)
    return p
コード例 #8
0
ファイル: bandit.py プロジェクト: ZiwenZhuang/girl
def main(affinity_code, log_dir, run_id, *args):
    # NOTE: I disable all outputs of this single experiment
    sys.stdout = open(os.devnull, 'w')

    affinity = affinity_from_code(affinity_code)
    config = load_variant(log_dir)

    if config["solution"] == "eGreedy":
        agent_kwargs = {k: config["agent_kwargs"][k] for k in ('epsilon', )}
        agent = eGreedyAgent(**agent_kwargs)
        algo = eGreedyBandit()
    elif config["solution"] == "ucb":
        agent_kwargs = {k: config["agent_kwargs"].get(k, 1.0) for k in ('c', )}
        agent = ucbBanditAgent(**agent_kwargs)
        algo = eGreedyBandit()
    elif config["solution"] == "thompson":
        agent_kwargs = {
            k: config["agent_kwargs"].get(k, None)
            for k in ('prior', )
        }
        agent = ThompsonAgent(**agent_kwargs)
        algo = ThompsonAlgorithm()
    elif config["solution"] == "gradientBandit":
        agent_kwargs = {
            k: config["agent_kwargs"].get(k, False)
            for k in (
                'random_init',
                'beta',
                'b',
            )
        }
        agent = GradientAgent(**agent_kwargs)
        algo = GradientBanditAlgo(**config["algo_kwargs"])
    else:
        raise NotImplementedError(
            "Solution {} has not been implemented".format(config["solution"]))

    sampler = SamplerBase(EnvCls=BanditEnv,
                          env_kwargs=config["env_kwargs"],
                          **config["sampler_kwargs"])
    runner = RunnerBase(algo=algo,
                        agent=agent,
                        sampler=sampler,
                        affinity=affinity,
                        **config["runner_kwargs"])

    name = "Bandit"
    with logger_context(log_dir, run_id, name, config):
        runner.run()
コード例 #9
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuParallelSampler(EnvCls=gym_make,
                                 env_kwargs=config["env"],
                                 CollectorCls=ResetCollector,
                                 **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #10
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AsyncCpuSampler(EnvCls=gym_make,
                              env_kwargs=config["env"],
                              CollectorCls=DbCpuResetCollector,
                              eval_env_kwargs=config["env"],
                              **config["sampler"])
    algo = TD3(optim_kwargs=config["optim"], **config["algo"])
    agent = Td3Agent(**config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = "async_td3_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #11
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuParallelSampler(EnvCls=AtariEnv,
                                 env_kwargs=config["env"],
                                 CollectorCls=EpisodicLivesWaitResetCollector,
                                 TrajInfoCls=AtariTrajInfo,
                                 **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"] + str(config["sampler"]["batch_T"])
    with logger_context(log_dir, run_ID, name,
                        config):  # Might have to flatten config
        runner.train()
コード例 #12
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    assert isinstance(affinity, list)  # One for each GPU.
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = GpuSampler(EnvCls=AtariEnv,
                         env_kwargs=config["env"],
                         CollectorCls=GpuWaitResetCollector,
                         TrajInfoCls=AtariTrajInfo,
                         **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = SyncRl(algo=algo,
                    agent=agent,
                    sampler=sampler,
                    affinity=affinity,
                    **config["runner"])
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #13
0
def build_and_train(slot_affinity_code, log_dir, run_ID):
    # (Or load from a central store of configs.)
    config = dict(
        env=dict(game="pong"),
        algo=dict(learning_rate=7e-4),
        sampler=dict(batch_B=16),
    )

    affinity = affinity_from_code(slot_affinity_code)
    variant = load_variant(log_dir)
    # global config
    config = update_config(config, variant)

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        batch_T=5,
        # batch_B=16,  # Get from config.
        max_decorrelation_steps=400,
        **config["sampler"]
    )
    algo = A2C(**config["algo"])  # Run with defaults.
    agent = AtariFfAgent()
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e5,
        affinity=affinity,
    )
    name = "a2c_" + config["env"]["game"]
    # log_dir = "example_6"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()