Example #1
0
    def __call__(self, inp):
        import os
        import datetime
        import dps
        from dps import cfg  # noqa
        from dps.config import DEFAULT_CONFIG
        from dps.utils import ExperimentStore
        os.nice(10)

        print("Entered _BuildDataset at: ")
        print(datetime.datetime.now())

        idx, seed, n_examples = inp
        print("idx: {}, seed: {}, n_examples: {}".format(idx, seed, n_examples))

        dps.reset_config()
        params = self.params.copy()
        params.update(seed=seed, n_examples=n_examples)

        with DEFAULT_CONFIG.copy():
            cfg.update_from_command_line()
            print(cfg)

            experiment_store = ExperimentStore(os.path.join(cfg.local_experiments_dir, cfg.env_name))
            exp_dir = experiment_store.new_experiment("", seed, add_date=1, force_fresh=1, update_latest=False)
            params["data_dir"] = exp_dir.path

            print(params)

            self.cls(**params)

        print("Leaving _BuildDataset at: ")
        print(datetime.datetime.now())
Example #2
0
def test_hyper_bare(test_config):
    config = DEFAULT_CONFIG.copy()
    config.update(a2c.config)
    config.update(simple_addition.config)
    config.update(test_config)
    config['max_steps'] = 101
    config['checkpoint_step'] = 43
    config['n_train'] = 2**5

    _raw_run(config)
Example #3
0
def test_time_limit(test_config):
    config = DEFAULT_CONFIG.copy()
    config.update(simple_addition.config)
    config.update(reinforce_config)
    config.update(max_time=120, max_steps=10000, seed=100)
    config.update(test_config)

    start = time.time()
    with config:
        training_loop()
    elapsed = start - time.time()
    assert elapsed < config.max_time + 1
Example #4
0
def test_time_limit_between_stages(test_config):
    config = DEFAULT_CONFIG.copy()
    config.update(simple_addition.config)
    config.update(reinforce_config)
    config.update(max_time=120, max_steps=10, seed=100)
    config.update(hooks=[AlarmHook(False, 0)])
    config.update(test_config)

    start = time.time()
    with config:
        result = training_loop()
    print(result)
    elapsed = start - time.time()
    assert elapsed < 20
Example #5
0
File: run.py Project: alcinos/dps
def _run(env_str, alg_str, _config=None, **kwargs):
    env_config, alg_config = parse_env_alg(env_str, alg_str)

    config = DEFAULT_CONFIG.copy()
    config.update(alg_config)
    config.update(env_config)

    if _config is not None:
        config.update(_config)
    config.update(kwargs)

    with config:
        cfg.update_from_command_line()
        return training_loop()
Example #6
0
def test_stage_hook(test_config):
    """ Test that we can safely use hooks to add new stages. """
    config = DEFAULT_CONFIG.copy()
    config.update(simple_addition.config)
    config.update(reinforce_config)
    config.update(
        max_steps=11,
        eval_step=10,
        n_train=100,
        seed=100,
        hooks=[DummyHook(3, dict(max_steps=21))],
        curriculum=[dict()],
        width=1,
    )
    config.update(test_config)

    with config:
        data = training_loop()
        assert data.n_stages == 3
        assert not data.history[0]["stage_config"]
        assert data.history[1]["stage_config"]["max_steps"] == 21
        assert data.history[2]["stage_config"]["max_steps"] == 21
Example #7
0
def test_hyper_time_limited(test_config):
    config = DEFAULT_CONFIG.copy()
    config.update(a2c.config)
    config.update(simple_addition.config)
    config.update(test_config)
    config['max_steps'] = 100000
    config['checkpoint_step'] = 43

    distributions = dict(n_train=2**np.array([5, 6, 7]))
    n_repeats = 2

    session = build_and_submit(
        name="test_hyper", config=config, distributions=distributions,
        n_repeats=n_repeats, kind='parallel', host_pool=':', wall_time='1min',
        cleanup_time='10seconds', slack_time='5seconds', ppn=2, load_avg_threshold=1e6)

    path = session.job_path
    files = os.listdir(path)
    assert set(files) == set(
        ['orig.zip', 'experiments', 'os_environ.txt', 'results.zip', 'pip_freeze.txt',
         'dps_git_summary.txt', 'nodefile.txt', 'results.txt', 'job_log.txt', 'uname.txt', 'lscpu.txt']
    )
    experiments = os.listdir(os.path.join(path, 'experiments'))
    for exp in experiments:
        assert exp.startswith('exp_')
        assert os.path.isdir(os.path.join(path, 'experiments', exp))
    assert len(experiments) == n_repeats * len(distributions['n_train'])

    with open(os.path.join(path, 'results.txt'), 'r') as f:
        results = f.read()

    assert "n_ops: 6" in results
    assert "n_completed_ops: 6" in results
    assert "n_partially_completed_ops: 0" in results
    assert "n_ready_incomplete_ops: 0" in results
    assert "n_not_ready_incomplete_ops: 0" in results
Example #8
0
    def __call__(self, new):
        start_time = time.time()

        print("Entered _RunTrainingLoop at: ")
        print(datetime.datetime.now())

        os.nice(10)

        print("Sampled values: ")
        print(new)

        print("Base config: ")
        print(self.base_config)

        exp_name = '_'.join("{}={}".format(k, new[k]) for k in 'idx repeat'.split())

        dps.reset_config()

        config = DEFAULT_CONFIG.copy()
        config.update(self.base_config)
        config.update(new)
        config.update(
            start_tensorboard=False,
            show_plots=False,
        )

        with config:
            cfg.update_from_command_line()

            from dps.train import training_loop
            result = training_loop(exp_name=exp_name, start_time=start_time)

        print("Leaving _RunTrainingLoop at: ")
        print(datetime.datetime.now())

        return result
Example #9
0
parser = argparse.ArgumentParser()
parser.add_argument("kind", choices="long med".split())
parser.add_argument("size", choices="14 21".split())
parser.add_argument("task", choices="addition arithmetic".split())
parser.add_argument("--c", action="store_true")
args, _ = parser.parse_known_args()
kind = args.kind


distributions = dict(
    math_weight=list(2**np.linspace(-2, 2, 8)),
)

env_config = envs.get_mnist_config(size=args.size, colour=args.c, task=args.task)

config = DEFAULT_CONFIG.copy()
config.update(alg_config)
config.update(env_config)
config.update(
    render_step=10000,
    eval_step=1000,
    per_process_gpu_memory_fraction=0.23,

    patience=1000000,
    max_experiences=100000000,
    max_steps=100000000,

    count_prior_decay_steps=1000,
    final_count_prior_log_odds=0.0125,
    hw_prior_std=0.5,
    kernel_size=1,
Example #10
0
def run_experiment(name,
                   config,
                   readme,
                   distributions=None,
                   durations=None,
                   alg=None,
                   task="grid",
                   name_variables=None,
                   env_kwargs=None):

    name = sanitize(name)
    durations = durations or {}

    parser = argparse.ArgumentParser()
    parser.add_argument("duration", choices=list(durations.keys()) + ["local"])

    args, _ = parser.parse_known_args()

    _config = DEFAULT_CONFIG.copy()

    env_kwargs = env_kwargs or {}

    env_kwargs['task'] = task
    env_config = get_env_config(**env_kwargs)
    _config.update(env_config)

    if alg:
        alg_config = getattr(alg_module, "{}_config".format(alg))
        _config.update(alg_config)
        alg_name = sanitize(alg_config.alg_name)
    else:
        alg_name = ""

    _config.update(config)
    _config.update_from_command_line()

    _config.env_name = "{}_env={}".format(name, sanitize(env_config.env_name))

    if args.duration == "local":
        _config.exp_name = "alg={}".format(alg_name)
        with _config:
            return training_loop()
    else:
        run_kwargs = Config(
            kind="slurm",
            pmem=5000,
            ignore_gpu=False,
        )

        duration_args = durations[args.duration]

        if 'config' in duration_args:
            _config.update(duration_args['config'])
            del duration_args['config']

        run_kwargs.update(durations[args.duration])
        run_kwargs.update_from_command_line()

    if name_variables is not None:
        name_variables_str = "_".join("{}={}".format(
            sanitize(str(k)), sanitize(str(getattr(_config, k))))
                                      for k in name_variables.split(","))
        _config.env_name = "{}_{}".format(_config.env_name, name_variables_str)

    exp_name = "{}_alg={}_duration={}".format(_config.env_name, alg_name,
                                              args.duration)

    build_and_submit(name=exp_name,
                     config=_config,
                     distributions=distributions,
                     **run_kwargs)
Example #11
0
        lambda inp, output_size: fully_connected(
            inp, output_size, activation_fn=None), 1)
    estimator = NeuralValueEstimator(controller, env.obs_shape)
    alg = cfg.alg_class(estimator, name="critic")
    updater = RLUpdater(env, policy, alg)
    return updater


config = DEFAULT_CONFIG.copy(
    get_updater=get_updater,
    build_env=build_env,
    log_name="policy_evaluation",
    max_steps=100000,
    display_step=100,
    T=3,
    reward_radius=0.2,
    max_step=0.1,
    restart_prob=0.0,
    l2l=False,
    n_val=200,
    threshold=1e-4,
    verbose=False,
)

x = int(sys.argv[1])

if x == 0:
    print("TRPE")
    config.update(name="TRPE",
                  delta_schedule='0.01',
                  max_cg_steps=10,
Example #12
0
File: train.py Project: alcinos/dps
    return ClassificationEnv(train, val, one_hot=True)


# For training networks on EMNIST datasets.
EMNIST_CONFIG = DEFAULT_CONFIG.copy(
    name="emnist",
    env_name='emnist_pretrained',
    get_updater=get_differentiable_updater,
    build_env=build_emnist_env,
    shape=(14, 14),
    batch_size=128,
    eval_step=100,
    max_steps=100000,
    patience=10000,
    lr_schedule="Exp(0.001, 0, 10000, 0.9)",
    optimizer_spec="adam",
    threshold=-np.inf,
    n_train=60000,
    n_val=100,
    include_blank=True,
    classes=list(range(10)),
    n_controller_units=100,
    use_gpu=True,
    gpu_allow_growth=True,
    seed=347405995,
    stopping_criteria="01_loss,min",
)

# OMNIGLOT ***************************************

Example #13
0
def run_experiment(
        name, base_config, readme, distributions=None, durations=None,
        name_variables=None, alg_configs=None, env_configs=None, late_config=None):

    name = sanitize(name)
    durations = durations or {}

    parser = argparse.ArgumentParser()
    if env_configs is not None:
        parser.add_argument('env')
    if alg_configs is not None:
        parser.add_argument('alg')
    parser.add_argument("duration", choices=list(durations.keys()) + ["local"], default="local", nargs="?")

    args, _ = parser.parse_known_args()

    config = DEFAULT_CONFIG.copy()

    config.update(base_config)

    if env_configs is not None:
        env_config = env_configs[args.env]
        config.update(env_config)

    if alg_configs is not None:
        alg_config = alg_configs[args.alg]
        config.update(alg_config)

    if late_config is not None:
        config.update(late_config)

    config.update_from_command_line()

    env_name = sanitize(config.get('env_name', ''))
    if name:
        config.env_name = "{}_env={}".format(name, env_name)
    else:
        config.env_name = "env={}".format(env_name)
    alg_name = sanitize(config.get("alg_name", ""))

    if args.duration == "local":
        config.exp_name = "alg={}".format(alg_name)
        with config:
            return training_loop()

    run_kwargs = Config(
        kind="slurm",
        pmem=5000,
        ignore_gpu=False,
    )

    duration_args = durations[args.duration]

    if 'config' in duration_args:
        config.update(duration_args['config'])
        del duration_args['config']

    run_kwargs.update(durations[args.duration])
    run_kwargs.update_from_command_line()

    if name_variables is not None:
        name_variables_str = "_".join(
            "{}={}".format(sanitize(str(k)), sanitize(str(getattr(config, k))))
            for k in name_variables.split(","))
        config.env_name = "{}_{}".format(config.env_name, name_variables_str)

    exp_name = "{}_alg={}_duration={}".format(config.env_name, alg_name, args.duration)

    build_and_submit(name=exp_name, config=config, distributions=distributions, **run_kwargs)
Example #14
0
from dps.utils import Config
from dps.env import simple_addition
from dps.rl.algorithms import a2c
from dps.rl.policy import BuildEpsilonSoftmaxPolicy, BuildLstmController
from dps.config import DEFAULT_CONFIG

config = DEFAULT_CONFIG.copy(
    name="SimpleAddition",
    n_train=10000,
    n_val=100,
    max_steps=1000000,
    display_step=10,
    eval_step=10,
    patience=np.inf,
    power_through=False,
    load_path=-1,
    start_tensorboard=True,
    verbose=False,
    show_plots=True,
    use_gpu=False,
    threshold=0.01,
    # render_hook=rl_render_hook,
    render_hook=None,
    cpu_ram_limit_mb=5 * 1024,
)

alg_config = Config(
    get_updater=a2c.A2C,
    build_policy=BuildEpsilonSoftmaxPolicy(),
    build_controller=BuildLstmController(),
    optimizer_spec="adam",