Example #1
0
def test_batch_normalization_node():
    network = tn.AdamNode(
        "adam", {
            "subtree":
            tn.SequentialNode("seq", [
                tn.InputNode("x", shape=(None, 10)),
                batch_normalization.BatchNormalizationNode("bn"),
                tn.DenseNode("d", num_units=1),
            ]),
            "cost":
            tn.TotalCostNode(
                "cost", {
                    "target": tn.InputNode("y", shape=(None, 1)),
                    "pred": tn.ReferenceNode("pred_ref", reference="d"),
                },
                cost_function=treeano.utils.squared_error)
        }).network()

    fn = network.function(["x", "y"], ["cost"], include_updates=True)

    x = 100 + 100 * np.random.randn(100, 10).astype(fX)
    y = np.random.randn(100, 1).astype(fX)
    prev_cost = fn(x, y)[0]
    for _ in range(3):
        cost = fn(x, y)[0]
        assert cost < prev_cost
        prev_cost = cost
Example #2
0
def test_total_cost_node():
    network = tn.TotalCostNode(
        "cost", {
            "pred": tn.InputNode("x", shape=(3, 4, 5)),
            "target": tn.InputNode("y", shape=(3, 4, 5))
        },
        cost_function=treeano.utils.squared_error).network()
    fn = network.function(["x", "y"], ["cost"])
    x = np.random.rand(3, 4, 5).astype(fX)
    y = np.random.rand(3, 4, 5).astype(fX)
    np.testing.assert_allclose(fn(x, y)[0], ((x - y)**2).mean(), rtol=1e-5)
    np.testing.assert_allclose(fn(x, x)[0], 0)
    np.testing.assert_allclose(fn(y, y)[0], 0)
def test_affine_spatial_transformer_node_build():
    localization_network = tn.HyperparameterNode(
        "loc",
        tn.SequentialNode(
            "loc_seq",
            [tn.DenseNode("loc_fc1", num_units=50),
             tn.ReLUNode("loc_relu3"),
             tn.DenseNode("loc_fc2",
                          num_units=6,
                          inits=[treeano.inits.ZeroInit()])]),
        num_filters=32,
        filter_size=(5, 5),
        pool_size=(2, 2),
    )

    model = tn.HyperparameterNode(
        "model",
        tn.SequentialNode(
            "seq",
            [tn.InputNode("x", shape=(None, 1, 60, 60)),
             spatial_transformer.AffineSpatialTransformerNode(
                 "st",
                 localization_network,
                 output_shape=(20, 20)),
             tn.DenseNode("fc1"),
             tn.ReLUNode("relu1"),
             tn.DropoutNode("do1"),
             tn.DenseNode("fc2", num_units=10),
             tn.SoftmaxNode("pred"),
             ]),
        num_filters=32,
        filter_size=(3, 3),
        pool_size=(2, 2),
        num_units=256,
        dropout_probability=0.5,
        inits=[treeano.inits.HeNormalInit()],
    )

    with_updates = tn.HyperparameterNode(
        "with_updates",
        tn.AdamNode(
            "adam",
            {"subtree": model,
             "cost": tn.TotalCostNode("cost", {
                 "pred": tn.ReferenceNode("pred_ref", reference="model"),
                 "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             )}),
        cost_function=treeano.utils.categorical_crossentropy_i32,
    )
    network = with_updates.network()
    network.build()  # build eagerly to share weights
Example #4
0
    pool_stride=(2, 2),
    pool_pad=(1, 1),
    inits=[treeano.inits.OrthogonalInit()],
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam", {
            "subtree":
            model,
            "cost":
            tn.TotalCostNode(
                "cost",
                {
                    "pred": tn.ReferenceNode("pred_ref", reference="model"),
                    "target": tn.InputNode("y", shape=(None, ), dtype="int32")
                },
            )
        }),
    cost_function=treeano.utils.categorical_crossentropy_i32,
)
network = with_updates.network()
network.build()  # build eagerly to share weights

valid_fn = canopy.handled_fn(network, [
    canopy.handlers.time_call(key="valid_time"),
    canopy.handlers.override_hyperparameters(dropout_probability=0),
    canopy.handlers.batch_pad(BATCH_SIZE, keys=["x", "y"]),
    canopy.handlers.chunk_variables(batch_size=BATCH_SIZE,
                                    variables=["x", "y"])
Example #5
0
         # tn.DropoutNode("do1"),
         cp.AuxiliaryContractionPenaltyNode(
             "cp2",
             tn.SequentialNode(
                 "cp_seq2",
                 [tn.DenseNode("fc2"),
                  # the cost has nan's when using ReLU's
                  # TODO look into why
                  tn.AbsNode("abs2")]),
             cost_weight=1e1),
         tn.DropoutNode("do2"),
         tn.DenseNode("fc3", num_units=10),
         tn.SoftmaxNode("pred"),
         tn.TotalCostNode(
             "cost",
             {"pred": tn.IdentityNode("pred_id"),
              "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             cost_function=treeano.utils.categorical_crossentropy_i32),
         tn.InputElementwiseSumNode("total_cost")]),
    num_units=32,
    cost_reference="total_cost",
    dropout_probability=0.5,
    inits=[treeano.inits.XavierNormalInit()],
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam",
        {"subtree": model,
         "cost": tn.ReferenceNode("cost_ref", reference="total_cost")}),
Example #6
0
def test_total_cost_node_serialization():
    tn.check_serialization(
        tn.TotalCostNode("foo", {
            "pred": tn.IdentityNode("foo"),
            "target": tn.IdentityNode("bar")
        }))
Example #7
0
def SampleVariancePenalizationNode(*args, **kwargs):
    # TODO convert to node that takes in appropriate hyperparameters
    assert "aggregator" not in kwargs
    kwargs["aggregator"] = sample_variance_penalty_aggregator
    return tn.TotalCostNode(*args, **kwargs)
Example #8
0
         tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)),
         tn.SigmoidNode("sigmoid"),
         ]),
    inits=[treeano.inits.OrthogonalInit()],
    num_units=HIDDEN_STATE_SIZE,
    learn_init=True,
    grad_clip=1,
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam",
        {"subtree": model,
         "cost": tn.TotalCostNode("cost", {
             "pred": tn.ReferenceNode("pred_ref", reference="model"),
             "target": tn.InputNode("y", shape=(None, None, 1))},
         )}),
    cost_function=treeano.utils.squared_error,
)
network = with_updates.network()

train_fn = network.function(["x", "y"], ["cost"], include_updates=True)
valid_fn = network.function(["x"], ["model"])


# ################################# training #################################

print("Starting training...")

import time
st = time.time()
Example #9
0
def load_network(update_scale_factor):
    localization_network = tn.HyperparameterNode(
        "loc",
        tn.SequentialNode(
            "loc_seq",
            [tn.DnnMaxPoolNode("loc_pool1"),
             tn.DnnConv2DWithBiasNode("loc_conv1"),
             tn.DnnMaxPoolNode("loc_pool2"),
             bn.NoScaleBatchNormalizationNode("loc_bn1"),
             tn.ReLUNode("loc_relu1"),
             tn.DnnConv2DWithBiasNode("loc_conv2"),
             bn.NoScaleBatchNormalizationNode("loc_bn2"),
             tn.ReLUNode("loc_relu2"),
             tn.DenseNode("loc_fc1", num_units=50),
             bn.NoScaleBatchNormalizationNode("loc_bn3"),
             tn.ReLUNode("loc_relu3"),
             tn.DenseNode("loc_fc2",
                          num_units=6,
                          inits=[treeano.inits.NormalWeightInit(std=0.001)])]),
        num_filters=20,
        filter_size=(5, 5),
        pool_size=(2, 2),
    )

    st_node = st.AffineSpatialTransformerNode(
        "st",
        localization_network,
        output_shape=(20, 20))

    model = tn.HyperparameterNode(
        "model",
        tn.SequentialNode(
            "seq",
            [tn.InputNode("x", shape=(None, 1, 60, 60)),
             # scaling the updates of the spatial transformer
             # seems to be very helpful, to allow the clasification
             # net to learn what to look for, before prematurely
             # looking
             tn.UpdateScaleNode(
                 "st_update_scale",
                 st_node,
                 update_scale_factor=update_scale_factor),
             tn.Conv2DWithBiasNode("conv1"),
             tn.MaxPool2DNode("mp1"),
             bn.NoScaleBatchNormalizationNode("bn1"),
             tn.ReLUNode("relu1"),
             tn.Conv2DWithBiasNode("conv2"),
             tn.MaxPool2DNode("mp2"),
             bn.NoScaleBatchNormalizationNode("bn2"),
             tn.ReLUNode("relu2"),
             tn.GaussianDropoutNode("do1"),
             tn.DenseNode("fc1"),
             bn.NoScaleBatchNormalizationNode("bn3"),
             tn.ReLUNode("relu3"),
             tn.DenseNode("fc2", num_units=10),
             tn.SoftmaxNode("pred"),
             ]),
        num_filters=32,
        filter_size=(3, 3),
        pool_size=(2, 2),
        num_units=256,
        dropout_probability=0.5,
        inits=[treeano.inits.HeUniformInit()],
        bn_update_moving_stats=True,
    )

    with_updates = tn.HyperparameterNode(
        "with_updates",
        tn.AdamNode(
            "adam",
            {"subtree": model,
             "cost": tn.TotalCostNode("cost", {
                 "pred": tn.ReferenceNode("pred_ref", reference="model"),
                 "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             )}),
        cost_function=treeano.utils.categorical_crossentropy_i32,
        learning_rate=2e-3,
    )
    network = with_updates.network()
    network.build()  # build eagerly to share weights
    return network
Example #10
0
                  "z_vars",
                  [tn.DenseNode("fc_z", num_units=LATENT_SIZE),
                   tn.AuxiliaryCostNode(
                       "xcov_cost",
                       {"target": tn.ReferenceNode("y_ref",
                                                   reference="y_pred")},
                       cost_function=cross_covariance)])],
             axis=1),
         tn.DenseNode("fc3"),
         tn.ReLUNode("relu3"),
         tn.DenseNode("fc4"),
         tn.ReLUNode("relu4"),
         tn.DenseNode("reconstruction", num_units=28 * 28),
         tn.TotalCostNode(
             "cost",
             {"pred": tn.IdentityNode("recon_id"),
              "target": tn.ReferenceNode("in_ref", reference="x")},
             cost_function=treeano.utils.squared_error),
         tn.MultiplyConstantNode("mul_reconstruction_error", value=0.1),
         tn.InputElementwiseSumNode("total_cost")]),
    num_units=512,
    cost_reference="total_cost",
    dropout_probability=0.5,
    inits=[treeano.inits.XavierNormalInit()],
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam",
        {"subtree": model,
Example #11
0
    pool_size=(2, 2),
    num_units=256,
    inits=[treeano.inits.XavierNormalInit()],
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam", {
            "subtree":
            model,
            "cost":
            tn.TotalCostNode(
                "cost",
                {
                    "pred": tn.ReferenceNode("pred_ref", reference="model"),
                    "target": tn.InputNode("y", shape=(None, ), dtype="int32")
                },
                cost_function=treeano.utils.categorical_crossentropy_i32,
            )
        }),
)
network = with_updates.network()
network.build()  # build eagerly to share weights

BATCH_SIZE = 500

valid_fn = canopy.handled_fn(network, [
    canopy.handlers.time_call(key="valid_time"),
    canopy.handlers.override_hyperparameters(bn_use_moving_stats=True),
    canopy.handlers.chunk_variables(batch_size=BATCH_SIZE,
                                    variables=["x", "y"])