예제 #1
0
def run_experiment(name,
                   config,
                   readme,
                   distributions=None,
                   durations=None,
                   alg=None,
                   task="grid",
                   name_variables=None,
                   env_kwargs=None):

    name = sanitize(name)
    durations = durations or {}

    parser = argparse.ArgumentParser()
    parser.add_argument("duration", choices=list(durations.keys()) + ["local"])

    args, _ = parser.parse_known_args()

    _config = DEFAULT_CONFIG.copy()

    env_kwargs = env_kwargs or {}

    env_kwargs['task'] = task
    env_config = get_env_config(**env_kwargs)
    _config.update(env_config)

    if alg:
        alg_config = getattr(alg_module, "{}_config".format(alg))
        _config.update(alg_config)
        alg_name = sanitize(alg_config.alg_name)
    else:
        alg_name = ""

    _config.update(config)
    _config.update_from_command_line()

    _config.env_name = "{}_env={}".format(name, sanitize(env_config.env_name))

    if args.duration == "local":
        _config.exp_name = "alg={}".format(alg_name)
        with _config:
            return training_loop()
    else:
        run_kwargs = Config(
            kind="slurm",
            pmem=5000,
            ignore_gpu=False,
        )

        duration_args = durations[args.duration]

        if 'config' in duration_args:
            _config.update(duration_args['config'])
            del duration_args['config']

        run_kwargs.update(durations[args.duration])
        run_kwargs.update_from_command_line()

    if name_variables is not None:
        name_variables_str = "_".join("{}={}".format(
            sanitize(str(k)), sanitize(str(getattr(_config, k))))
                                      for k in name_variables.split(","))
        _config.env_name = "{}_{}".format(_config.env_name, name_variables_str)

    exp_name = "{}_alg={}_duration={}".format(_config.env_name, alg_name,
                                              args.duration)

    build_and_submit(name=exp_name,
                     config=_config,
                     distributions=distributions,
                     **run_kwargs)
예제 #2
0
파일: test_rl.py 프로젝트: alcinos/dps
def test_simple_add(test_config):
    # Fully specify the config here so that this test is not affected by config changes external to this file.
    config = Config(
        env_name="test_simple_add_a2c",
        name="test_simple_add_a2c",
        get_updater=a2c.A2C,
        n_controller_units=32,
        batch_size=16,
        optimizer_spec="adam",
        opt_steps_per_update=20,
        sub_batch_size=0,
        epsilon=0.2,
        lr_schedule=1e-4,
        max_steps=501,
        build_policy=BuildEpsilonSoftmaxPolicy(),
        build_controller=BuildLstmController(),
        exploration_schedule=0.1,
        val_exploration_schedule=0.0,
        actor_exploration_schedule=None,
        policy_weight=1.0,
        value_weight=0.0,
        value_reg_weight=0.0,
        entropy_weight=0.01,
        split=False,
        q_lmbda=1.0,
        v_lmbda=1.0,
        policy_importance_c=0,
        q_importance_c=None,
        v_importance_c=None,
        max_grad_norm=None,
        gamma=1.0,
        use_differentiable_loss=False,
        use_gpu=False,
        display_step=500,
        seed=0,

        # env-specific
        build_env=simple_addition.build_env,
        T=30,
        curriculum=[
            dict(width=1),
            dict(width=2),
            dict(width=3),
        ],
        base=10,
        final_reward=True,
    )

    config.update(test_config)

    n_repeats = 1  # Haven't made it completely deterministic yet, so keep it at 1.

    results = defaultdict(int)

    threshold = 0.15

    for i in range(n_repeats):
        config = config.copy()
        output = _raw_run(config)
        stdout = output.path_for('stdout')
        result = _get_deterministic_output(stdout)
        results[result] += 1
        assert output.history[-1]['best_01_loss'] < threshold

    if len(results) != 1:
        for r in sorted(results):
            print("\n" + "*" * 80)
            print("The following occurred {} times:\n".format(results[r]))
            print(r)
        raise Exception("Results were not deterministic.")

    assert len(output.config.curriculum) == 3
    config.load_path = output.path_for('weights/best_of_stage_2')
    assert os.path.exists(config.load_path + ".index")
    assert os.path.exists(config.load_path + ".meta")

    # Load one of the hypotheses, train it for a bit, make sure the accuracy is still high.
    config.curriculum = [output.config.curriculum[-1]]
    config = config.copy()
    output = _raw_run(config)
    stdout = output.path_for('stdout')
    result = _get_deterministic_output(stdout)
    results[result] += 1
    assert output.history[-1]['best_01_loss'] < threshold

    # Load one of the hypotheses, don't train it at all, make sure the accuracy is still high.
    config.do_train = False
    config = config.copy()
    output = _raw_run(config)
    stdout = output.path_for('stdout')
    result = _get_deterministic_output(stdout)
    results[result] += 1
    assert output.history[-1]['best_01_loss'] < threshold
예제 #3
0
파일: test_config.py 프로젝트: alcinos/dps
def test_nested():
    config = Config(a=1,
                    b=2,
                    c=Config(z=2, d=1),
                    e=Config(u=Config(y=3, f=2), x=4),
                    g=dict(h=10),
                    i=dict(j=Config(k=11), r=1),
                    l=Config(m=dict(n=12)))

    config.update(a=10)
    assert config.a == 10

    config.update(dict(a=100))
    assert config.a == 100

    config.update(dict(c=dict(d=100)))
    assert config.c.d == 100
    assert config.c.z == 2

    config.update(dict(e=dict(x=5)))
    assert config.e.x == 5
    assert config.e.u.y == 3
    assert config.e.u.f == 2

    config.update(dict(e=dict(u=dict(y=4))))
    assert config.e.x == 5
    assert config.e.u.y == 4
    assert config.e.u.f == 2

    config.update(dict(e=dict(u='a')))
    assert config.e.x == 5
    assert config.e.u == 'a'

    assert config.a == 100
    assert config.b == 2
    assert config.c.d == 100
    assert config.c.z == 2

    config.update(dict(i=dict(j=dict(k=120))))
    assert config.i['j'].k == 120
    assert config.i['r'] == 1

    config.update(dict(i=dict(r=dict(k=120))))
    assert config.i['j'].k == 120
    assert config.i['r']['k'] == 120
예제 #4
0
파일: base.py 프로젝트: lqiang2003cn/dps
def run_experiment(name,
                   base_config,
                   readme,
                   distributions=None,
                   durations=None,
                   name_variables=None,
                   alg_configs=None,
                   env_configs=None,
                   late_config=None,
                   cl_mode='lax',
                   run_kwargs_base=None):

    name = sanitize(name)
    durations = durations or {}

    parser = argparse.ArgumentParser()
    if env_configs is not None:
        parser.add_argument('env')
    if alg_configs is not None:
        parser.add_argument('alg')
    parser.add_argument("duration",
                        choices=list(durations.keys()) + ["local"],
                        default="local",
                        nargs="?")

    args, _ = parser.parse_known_args()

    config = get_default_config()
    config.update(base_config)

    if env_configs is not None:
        env_config = env_configs[args.env]
        config.update(env_config)

    if alg_configs is not None:
        alg_config = alg_configs[args.alg]
        config.update(alg_config)

    if late_config is not None:
        config.update(late_config)
    env_name = sanitize(config.get('env_name', ''))
    alg_name = sanitize(config.get("alg_name", ""))

    run_kwargs = Config(
        kind="slurm",
        ignore_gpu=False,
    )
    if run_kwargs_base is not None:
        run_kwargs.update(run_kwargs_base)

    if args.duration == "local":
        run_kwargs.update(durations.get('local', {}))
    else:
        run_kwargs.update(durations[args.duration])

    if 'config' in run_kwargs:
        config.update(run_kwargs.config)
        del run_kwargs['config']

    if cl_mode is not None:
        if cl_mode == 'strict':
            config.update_from_command_line(strict=True)
        elif cl_mode == 'lax':
            config.update_from_command_line(strict=False)
        else:
            raise Exception("Unknown value for cl_mode: {}".format(cl_mode))

    if args.duration == "local":
        config.exp_name = "alg={}".format(alg_name)
        with config:
            return training_loop()
    else:
        if 'distributions' in run_kwargs:
            distributions = run_kwargs['distributions']
            del run_kwargs['distributions']

        if name_variables is not None:
            name_variables_str = "_".join(
                "{}={}".format(sanitize(k), sanitize(getattr(config, k)))
                for k in name_variables.split(","))
            env_name = "{}_{}".format(env_name, name_variables_str)

        config.env_name = env_name

        exp_name = "env={}_alg={}_duration={}".format(env_name, alg_name,
                                                      args.duration)

        init()

        build_and_submit(name,
                         exp_name,
                         config,
                         distributions=distributions,
                         **run_kwargs)
예제 #5
0
def run_experiment(
        name, base_config, readme, distributions=None, durations=None,
        name_variables=None, alg_configs=None, env_configs=None, late_config=None):

    name = sanitize(name)
    durations = durations or {}

    parser = argparse.ArgumentParser()
    if env_configs is not None:
        parser.add_argument('env')
    if alg_configs is not None:
        parser.add_argument('alg')
    parser.add_argument("duration", choices=list(durations.keys()) + ["local"], default="local", nargs="?")

    args, _ = parser.parse_known_args()

    config = DEFAULT_CONFIG.copy()

    config.update(base_config)

    if env_configs is not None:
        env_config = env_configs[args.env]
        config.update(env_config)

    if alg_configs is not None:
        alg_config = alg_configs[args.alg]
        config.update(alg_config)

    if late_config is not None:
        config.update(late_config)

    config.update_from_command_line()

    env_name = sanitize(config.get('env_name', ''))
    if name:
        config.env_name = "{}_env={}".format(name, env_name)
    else:
        config.env_name = "env={}".format(env_name)
    alg_name = sanitize(config.get("alg_name", ""))

    if args.duration == "local":
        config.exp_name = "alg={}".format(alg_name)
        with config:
            return training_loop()

    run_kwargs = Config(
        kind="slurm",
        pmem=5000,
        ignore_gpu=False,
    )

    duration_args = durations[args.duration]

    if 'config' in duration_args:
        config.update(duration_args['config'])
        del duration_args['config']

    run_kwargs.update(durations[args.duration])
    run_kwargs.update_from_command_line()

    if name_variables is not None:
        name_variables_str = "_".join(
            "{}={}".format(sanitize(str(k)), sanitize(str(getattr(config, k))))
            for k in name_variables.split(","))
        config.env_name = "{}_{}".format(config.env_name, name_variables_str)

    exp_name = "{}_alg={}_duration={}".format(config.env_name, alg_name, args.duration)

    build_and_submit(name=exp_name, config=config, distributions=distributions, **run_kwargs)