Ejemplo n.º 1
0
    reconstruction_weight=1.0,
    kl_weight=1.0,
    math_weight=0.0,
    train_math=False,
    math_A=None,
    noisy=True,
    build_background_encoder=lambda scope: MLP(n_units=[10, 10], scope=scope),
    build_background_decoder=IdentityFunction,
    max_possible_objects=None)

simple_config = alg_config.copy(
    alg_name="simple",
    build_network=simple.SimpleVAE,
    render_hook=simple.SimpleVAE_RenderHook(),
    build_encoder=networks.Backbone,
    build_decoder=networks.InverseBackbone,
    n_channels=128,
    n_final_layers=3,
    kernel_size=1,
    pixels_per_cell=(12, 12),
)

baseline_config = alg_config.copy(
    alg_name="baseline",
    build_network=baseline.Baseline_Network,
    render_hook=baseline.Baseline_RenderHook(),
    build_object_encoder=lambda scope: MLP(n_units=[512, 256], scope=scope),
    build_object_decoder=lambda scope: MLP(n_units=[256, 512], scope=scope),
    cc_threshold=1e-3,
    object_shape=(21, 21),
)
Ejemplo n.º 2
0
def test_simple_add(test_config):
    # Fully specify the config here so that this test is not affected by config changes external to this file.
    config = Config(
        env_name="test_simple_add_a2c",
        name="test_simple_add_a2c",
        get_updater=a2c.A2C,
        n_controller_units=32,
        batch_size=16,
        optimizer_spec="adam",
        opt_steps_per_update=20,
        sub_batch_size=0,
        epsilon=0.2,
        lr_schedule=1e-4,
        max_steps=501,
        build_policy=BuildEpsilonSoftmaxPolicy(),
        build_controller=BuildLstmController(),
        exploration_schedule=0.1,
        val_exploration_schedule=0.0,
        actor_exploration_schedule=None,
        policy_weight=1.0,
        value_weight=0.0,
        value_reg_weight=0.0,
        entropy_weight=0.01,
        split=False,
        q_lmbda=1.0,
        v_lmbda=1.0,
        policy_importance_c=0,
        q_importance_c=None,
        v_importance_c=None,
        max_grad_norm=None,
        gamma=1.0,
        use_differentiable_loss=False,
        use_gpu=False,
        display_step=500,
        seed=0,

        # env-specific
        build_env=simple_addition.build_env,
        T=30,
        curriculum=[
            dict(width=1),
            dict(width=2),
            dict(width=3),
        ],
        base=10,
        final_reward=True,
    )

    config.update(test_config)

    n_repeats = 1  # Haven't made it completely deterministic yet, so keep it at 1.

    results = defaultdict(int)

    threshold = 0.15

    for i in range(n_repeats):
        config = config.copy()
        output = _raw_run(config)
        stdout = output.path_for('stdout')
        result = _get_deterministic_output(stdout)
        results[result] += 1
        assert output.history[-1]['best_01_loss'] < threshold

    if len(results) != 1:
        for r in sorted(results):
            print("\n" + "*" * 80)
            print("The following occurred {} times:\n".format(results[r]))
            print(r)
        raise Exception("Results were not deterministic.")

    assert len(output.config.curriculum) == 3
    config.load_path = output.path_for('weights/best_of_stage_2')
    assert os.path.exists(config.load_path + ".index")
    assert os.path.exists(config.load_path + ".meta")

    # Load one of the hypotheses, train it for a bit, make sure the accuracy is still high.
    config.curriculum = [output.config.curriculum[-1]]
    config = config.copy()
    output = _raw_run(config)
    stdout = output.path_for('stdout')
    result = _get_deterministic_output(stdout)
    results[result] += 1
    assert output.history[-1]['best_01_loss'] < threshold

    # Load one of the hypotheses, don't train it at all, make sure the accuracy is still high.
    config.do_train = False
    config = config.copy()
    output = _raw_run(config)
    stdout = output.path_for('stdout')
    result = _get_deterministic_output(stdout)
    results[result] += 1
    assert output.history[-1]['best_01_loss'] < threshold
Ejemplo n.º 3
0
grid_config = env_config.copy(
    env_name="nips_2018_grid",
    build_env=Nips2018Grid,

    # dataset params
    min_chars=16,
    max_chars=25,
    n_patch_examples=0,
    image_shape=(6 * 14, 6 * 14),
    patch_shape=(14, 14),
    characters=list(range(10)),
    patch_size_std=0.0,
    colours="white",
    grid_shape=(6, 6),
    spacing=(-3, -3),
    random_offset_range=(15, 15),
    n_distractors_per_image=0,
    backgrounds="",
    backgrounds_sample_every=False,
    background_colours="",
    background_cfg=dict(mode="colour", colour="black"),
    object_shape=(14, 14),
    postprocessing="",
    preserve_env=False,
    n_train=25000,
    n_val=1e3,
    eval_step=1000,
    display_step=1000,
    render_step=5000,
    patience=1000000,
    max_steps=110000,
)
Ejemplo n.º 4
0
Archivo: a2c.py Proyecto: alcinos/dps
    split=False,
    q_lmbda=1.0,
    v_lmbda=1.0,
    policy_importance_c=0,
    q_importance_c=None,
    v_importance_c=None,
    max_grad_norm=None,
    gamma=1.0,

    use_differentiable_loss=False,
    render_n_rollouts=4,
)


actor_critic_config = config.copy(
    exp_name="ActorCritic",
    split=True
)


ppo_config = config.copy(
    exp_name="PPO",
    opt_steps_per_update=10,
    epsilon=0.2,
    value_weight=0.0,
)


# Same config that is used in the test.
test_config = config.copy(
    exp_name="TestA2C",
    opt_steps_per_update=20,