def test_batch_normalization_node(): network = tn.AdamNode( "adam", { "subtree": tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 10)), batch_normalization.BatchNormalizationNode("bn"), tn.DenseNode("d", num_units=1), ]), "cost": tn.TotalCostNode( "cost", { "target": tn.InputNode("y", shape=(None, 1)), "pred": tn.ReferenceNode("pred_ref", reference="d"), }, cost_function=treeano.utils.squared_error) }).network() fn = network.function(["x", "y"], ["cost"], include_updates=True) x = 100 + 100 * np.random.randn(100, 10).astype(fX) y = np.random.randn(100, 1).astype(fX) prev_cost = fn(x, y)[0] for _ in range(3): cost = fn(x, y)[0] assert cost < prev_cost prev_cost = cost
def test_total_cost_node(): network = tn.TotalCostNode( "cost", { "pred": tn.InputNode("x", shape=(3, 4, 5)), "target": tn.InputNode("y", shape=(3, 4, 5)) }, cost_function=treeano.utils.squared_error).network() fn = network.function(["x", "y"], ["cost"]) x = np.random.rand(3, 4, 5).astype(fX) y = np.random.rand(3, 4, 5).astype(fX) np.testing.assert_allclose(fn(x, y)[0], ((x - y)**2).mean(), rtol=1e-5) np.testing.assert_allclose(fn(x, x)[0], 0) np.testing.assert_allclose(fn(y, y)[0], 0)
def test_affine_spatial_transformer_node_build(): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DenseNode("loc_fc1", num_units=50), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.ZeroInit()])]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), ) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), spatial_transformer.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, ) network = with_updates.network() network.build() # build eagerly to share weights
pool_stride=(2, 2), pool_pad=(1, 1), inits=[treeano.inits.OrthogonalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree": model, "cost": tn.TotalCostNode( "cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None, ), dtype="int32") }, ) }), cost_function=treeano.utils.categorical_crossentropy_i32, ) network = with_updates.network() network.build() # build eagerly to share weights valid_fn = canopy.handled_fn(network, [ canopy.handlers.time_call(key="valid_time"), canopy.handlers.override_hyperparameters(dropout_probability=0), canopy.handlers.batch_pad(BATCH_SIZE, keys=["x", "y"]), canopy.handlers.chunk_variables(batch_size=BATCH_SIZE, variables=["x", "y"])
# tn.DropoutNode("do1"), cp.AuxiliaryContractionPenaltyNode( "cp2", tn.SequentialNode( "cp_seq2", [tn.DenseNode("fc2"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs2")]), cost_weight=1e1), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), tn.TotalCostNode( "cost", {"pred": tn.IdentityNode("pred_id"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, cost_function=treeano.utils.categorical_crossentropy_i32), tn.InputElementwiseSumNode("total_cost")]), num_units=32, cost_reference="total_cost", dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.ReferenceNode("cost_ref", reference="total_cost")}),
def test_total_cost_node_serialization(): tn.check_serialization( tn.TotalCostNode("foo", { "pred": tn.IdentityNode("foo"), "target": tn.IdentityNode("bar") }))
def SampleVariancePenalizationNode(*args, **kwargs): # TODO convert to node that takes in appropriate hyperparameters assert "aggregator" not in kwargs kwargs["aggregator"] = sample_variance_penalty_aggregator return tn.TotalCostNode(*args, **kwargs)
tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)), tn.SigmoidNode("sigmoid"), ]), inits=[treeano.inits.OrthogonalInit()], num_units=HIDDEN_STATE_SIZE, learn_init=True, grad_clip=1, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None, None, 1))}, )}), cost_function=treeano.utils.squared_error, ) network = with_updates.network() train_fn = network.function(["x", "y"], ["cost"], include_updates=True) valid_fn = network.function(["x"], ["model"]) # ################################# training ################################# print("Starting training...") import time st = time.time()
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network
"z_vars", [tn.DenseNode("fc_z", num_units=LATENT_SIZE), tn.AuxiliaryCostNode( "xcov_cost", {"target": tn.ReferenceNode("y_ref", reference="y_pred")}, cost_function=cross_covariance)])], axis=1), tn.DenseNode("fc3"), tn.ReLUNode("relu3"), tn.DenseNode("fc4"), tn.ReLUNode("relu4"), tn.DenseNode("reconstruction", num_units=28 * 28), tn.TotalCostNode( "cost", {"pred": tn.IdentityNode("recon_id"), "target": tn.ReferenceNode("in_ref", reference="x")}, cost_function=treeano.utils.squared_error), tn.MultiplyConstantNode("mul_reconstruction_error", value=0.1), tn.InputElementwiseSumNode("total_cost")]), num_units=512, cost_reference="total_cost", dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model,
pool_size=(2, 2), num_units=256, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree": model, "cost": tn.TotalCostNode( "cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None, ), dtype="int32") }, cost_function=treeano.utils.categorical_crossentropy_i32, ) }), ) network = with_updates.network() network.build() # build eagerly to share weights BATCH_SIZE = 500 valid_fn = canopy.handled_fn(network, [ canopy.handlers.time_call(key="valid_time"), canopy.handlers.override_hyperparameters(bn_use_moving_stats=True), canopy.handlers.chunk_variables(batch_size=BATCH_SIZE, variables=["x", "y"])