def test_dense_node_and_dense_combine_node1(): # testing that dense node and dense combine node with identity child # return the same thing network1 = tn.HyperparameterNode("hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("fc1", num_units=6), tn.DenseNode("fc2", num_units=7), tn.DenseNode("fc3", num_units=8) ]), inits=[treeano.inits.ConstantInit(1) ]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc1", [tn.IdentityNode("i1")], num_units=6), tn.DenseCombineNode("fc2", [tn.IdentityNode("i2")], num_units=7), tn.DenseCombineNode("fc3", [tn.IdentityNode("i3")], num_units=8) ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn1 = network1.function(["in"], ["fc3"]) fn2 = network2.function(["in"], ["fc3"]) np.testing.assert_allclose(fn1(x), fn2(x))
def test_dense_combine_node_uses_children(): network1 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.MultiplyConstantNode("mul", value=2), tn.DenseCombineNode("fc", [tn.IdentityNode("i1"), tn.IdentityNode("i2")], num_units=6) ]), inits=[treeano.inits.ConstantInit(1)]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc", [ tn.MultiplyConstantNode("mul1", value=2), tn.MultiplyConstantNode("mul2", value=2) ], num_units=6) ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn1 = network1.function(["in"], ["hp"]) fn2 = network2.function(["in"], ["hp"]) np.testing.assert_allclose(fn1(x), fn2(x))
def test_postwalk_node(): names = [] def f(node): names.append(node.name) return node node = tn.HyperparameterNode( "1", tn.HyperparameterNode("2", tn.IdentityNode("3"))) canopy.node_utils.postwalk_node(node, f) nt.assert_equal(names, ["3", "2", "1"])
def test_remove_parents(): network1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=()), tn.HyperparameterNode("hp1", tn.HyperparameterNode( "hp2", tn.AddConstantNode("ac"), value=1), value=2) ]).network() network2 = canopy.transforms.remove_parents(network1, "ac") nt.assert_equal(tn.AddConstantNode("ac"), network2.root_node)
def test_affine_spatial_transformer_node_build(): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DenseNode("loc_fc1", num_units=50), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.ZeroInit()])]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), ) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), spatial_transformer.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, ) network = with_updates.network() network.build() # build eagerly to share weights
def test_auxiliary_cost_node(): network = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(3, 4, 5)), tn.AuxiliaryCostNode( "cost1", {"target": tn.InputNode("y1", shape=(3, 4, 5))}), tn.AddConstantNode("a1", value=2), tn.AuxiliaryCostNode( "cost2", {"target": tn.InputNode("y2", shape=(3, 4, 5))}), tn.MultiplyConstantNode("m1", value=2), tn.AuxiliaryCostNode( "cost3", {"target": tn.InputNode("y3", shape=(3, 4, 5))}), tn.ConstantNode("const", value=0), tn.InputElementwiseSumNode("cost") ]), cost_reference="cost", cost_function=treeano.utils.squared_error, ).network() fn = network.function(["x", "y1", "y2", "y3"], ["cost"]) x = np.random.rand(3, 4, 5).astype(fX) ys = [np.random.rand(3, 4, 5).astype(fX) for _ in range(3)] def mse(x, y): return ((x - y)**2).mean() expected_output = (mse(x, ys[0]) + mse(x + 2, ys[1]) + mse(2 * (x + 2), ys[2])) np.testing.assert_allclose(fn(x, *ys)[0], expected_output, rtol=1e-5)
def axes(ndim, pos, neg): network = tn.HyperparameterNode( "a", tn.InputNode("b", shape=()), pos=pos, neg=neg, ).network()["a"] return treeano.utils.find_axes(network, ndim, ["pos"], ["neg"])
def test_remove_nodes(): network1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=()), tn.HyperparameterNode("hp1", tn.HyperparameterNode( "hp2", tn.AddConstantNode("ac"), value=1), value=2) ]).network() fn1 = network1.function(["i"], ["seq"]) nt.assert_equal(1, fn1(0)[0]) network2 = canopy.transforms.remove_nodes(network1, {"hp2"}, keep_child=True) fn2 = network2.function(["i"], ["seq"]) nt.assert_equal(2, fn2(0)[0]) network3 = canopy.transforms.remove_nodes(network1, {"ac"}) fn3 = network3.function(["i"], ["seq"]) nt.assert_equal(0, fn3(0)[0])
def GradualBatchNormalization(name, **kwargs): from treeano.sandbox.nodes import batch_normalization as bn return tn.HyperparameterNode( name, LinearInterpolationNode( name + "_interpolate", { "early": bn.BatchNormalizationNode(name + "_bn"), "late": tn.IdentityNode(name + "_identity") }), **kwargs)
def AverageSamplesDropoutDnnMaxPoolNode(name, *args, **kwargs): return tn.HyperparameterNode( name, AverageSamplesNode( name + "_samples", tn.SequentialNode( name + "_seq", [tn.DropoutNode(name + "_dropout"), tn.DnnMaxPoolNode(name + "_maxpool")])), *args, **kwargs)
def HighwayDenseNode(name, nonlinearity_node, **hyperparameters): return tn.HyperparameterNode( name, HighwayNode( name + "_highway", { "transform": tn.SequentialNode(name + "_transform", [ tn.DenseNode(name + "_transformdense"), nonlinearity_node ]), "gate": tn.DenseNode(name + "_gatedense") }), **hyperparameters)
def test_dense_node_and_dense_combine_node2(): # testing that summing the output of 2 dense nodes is the same as # applying a dense combine node with 2 identities (+ bias) # and the same as multiplying the output of 1 dense node by 2 network0 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("dense1", num_units=6), tn.MultiplyConstantNode("mul", value=2) ]), inits=[treeano.inits.ConstantInit(1)]).network() network1 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.ElementwiseSumNode("sum", [ tn.DenseNode("dense1", num_units=6), tn.DenseNode("dense2", num_units=6) ]) ]), inits=[treeano.inits.ConstantInit(1)]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc", [tn.IdentityNode("i1"), tn.IdentityNode("i2")], num_units=6), tn.AddBiasNode("bias") ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn0 = network0.function(["in"], ["hp"]) fn1 = network1.function(["in"], ["hp"]) fn2 = network2.function(["in"], ["hp"]) np.testing.assert_allclose(fn0(x), fn1(x)) np.testing.assert_allclose(fn0(x), fn2(x))
def test_remove_parent(): network1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=()), tn.HyperparameterNode("hp1", tn.HyperparameterNode( "hp2", tn.AddConstantNode("ac"), value=1), value=2) ]).network() fn1 = network1.function(["i"], ["seq"]) nt.assert_equal(1, fn1(0)[0]) network2 = canopy.transforms.remove_parent(network1, {"ac"}) fn2 = network2.function(["i"], ["seq"]) nt.assert_equal(2, fn2(0)[0]) network3 = canopy.transforms.remove_parent(network1, {"i"}) @nt.raises(Exception) def fails(name): network3.function(["i"], [name]) # testing that these nodes are removed fails("ac") fails("seq") network3.function(["i"], ["i"])
def MultiPool2DNode(name, **kwargs): # TODO tests # TODO make a node that verifies hyperparameters return tn.HyperparameterNode( name, tn.ConcatenateNode(name + "_concat", [ tn.SequentialNode(name + "_seq0", [ PartitionAxisNode(name + "_part0", split_idx=0, num_splits=2), tn.MaxPool2DNode(name + "_max", ignore_border=True) ]), tn.SequentialNode(name + "_seq1", [ PartitionAxisNode(name + "_part1", split_idx=1, num_splits=2), tn.MeanPool2DNode(name + "_mean") ]) ]), **kwargs)
def test_scale_hyperparameter(): network = tn.HyperparameterNode( "hp", eb.ScaleHyperparameterNode("scale", tn.ConstantNode("c")), value=42.0, hyperparameter="value", start_percent=0., end_percent=1.0, start_scale=1.0, end_scale=0.1, expected_batches=2, ).network() fn = network.function([], ["c"], include_updates=True) np.testing.assert_allclose(42.0, fn()[0], rtol=1e-5) np.testing.assert_allclose(42.0 * 0.55, fn()[0], rtol=1e-5) np.testing.assert_allclose(42.0 * 0.1, fn()[0], rtol=1e-5) np.testing.assert_allclose(42.0 * 0.1, fn()[0], rtol=1e-5)
def test_auxiliary_kl_sparsity_penalty_node(): # testing that both sparsity penalty versions return the same thing network = tn.HyperparameterNode( "hp", tn.SequentialNode( "s", [ tn.InputNode("i", shape=(10, 3)), tn.DenseNode("d", num_units=9), sp.AuxiliaryKLSparsityPenaltyNode("scp", cost_reference="sum"), sp.ElementwiseKLSparsityPenaltyNode("sp"), tn.AggregatorNode("a"), # zero out rest of network, so that value of sum is just the value # from auxiliary sparsity pentalty node tn.ConstantNode("foo", value=0), tn.InputElementwiseSumNode("sum") ]), sparsity=0.1, ).network() fn = network.function(["i"], ["sum", "a"]) x = np.random.rand(10, 3).astype(fX) res = fn(x) np.testing.assert_equal(res[0], res[1])
model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.Conv2DWithBiasNode("conv1"), # bn.BatchNormalizationNode("bn1"), timesout.IndexedTimesoutNode("to1"), tn.TanhNode("tanh1"), tn.MaxPool2DNode("mp1"), tn.Conv2DWithBiasNode("conv2"), # bn.BatchNormalizationNode("bn2"), timesout.IndexedTimesoutNode("to2"), tn.TanhNode("tanh2"), tn.MaxPool2DNode("mp2"), tn.DenseNode("fc1"), # bn.BatchNormalizationNode("bn3"), timesout.IndexedTimesoutNode("to3"), tn.TanhNode("tanh3"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), num_units=256, num_pieces=2, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], )
def vgg_16_nodes(conv_only): """ conv_only: whether or not to only return conv layers (before FC layers) """ assert conv_only return tn.HyperparameterNode( "vgg16", tn.SequentialNode( "vgg16_seq", [ tn.HyperparameterNode( "conv_group_1", tn.SequentialNode("conv_group_1_seq", [ tn.DnnConv2DWithBiasNode("conv1_1"), tn.ReLUNode("relu1_1"), tn.DnnConv2DWithBiasNode("conv1_2"), tn.ReLUNode("relu1_2") ]), num_filters=64), tn.MaxPool2DNode("pool1"), tn.HyperparameterNode( "conv_group_2", tn.SequentialNode("conv_group_2_seq", [ tn.DnnConv2DWithBiasNode("conv2_1"), tn.ReLUNode("relu2_1"), tn.DnnConv2DWithBiasNode("conv2_2"), tn.ReLUNode("relu2_2") ]), num_filters=128), tn.MaxPool2DNode("pool2"), tn.HyperparameterNode( "conv_group_3", tn.SequentialNode("conv_group_3_seq", [ tn.DnnConv2DWithBiasNode("conv3_1"), tn.ReLUNode("relu3_1"), tn.DnnConv2DWithBiasNode("conv3_2"), tn.ReLUNode("relu3_2"), tn.DnnConv2DWithBiasNode("conv3_3"), tn.ReLUNode("relu3_3") ]), num_filters=256), tn.MaxPool2DNode("pool3"), tn.HyperparameterNode( "conv_group_4", tn.SequentialNode("conv_group_4_seq", [ tn.DnnConv2DWithBiasNode("conv4_1"), tn.ReLUNode("relu4_1"), tn.DnnConv2DWithBiasNode("conv4_2"), tn.ReLUNode("relu4_2"), tn.DnnConv2DWithBiasNode("conv4_3"), tn.ReLUNode("relu4_3") ]), num_filters=512), tn.MaxPool2DNode("pool4"), tn.HyperparameterNode( "conv_group_5", tn.SequentialNode("conv_group_5_seq", [ tn.DnnConv2DWithBiasNode("conv5_1"), tn.ReLUNode("relu5_1"), tn.DnnConv2DWithBiasNode("conv5_2"), tn.ReLUNode("relu5_2"), tn.DnnConv2DWithBiasNode("conv5_3"), tn.ReLUNode("relu5_3") ]), num_filters=512), tn.MaxPool2DNode("pool5"), # TODO add dense nodes ]), pad="same", filter_size=(3, 3), pool_size=(2, 2), # VGG net uses cross-correlation by default conv_mode="cross", )
# - ReLU # - 50% dropout # - fully connected 10 units # - softmax # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.ReLUNode("relu2"), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), ]), num_units=512, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree": model,
def test_suffix_node(): node1 = tn.HyperparameterNode( "1", tn.HyperparameterNode("2", tn.IdentityNode("3"))) node2 = tn.HyperparameterNode( "1_foo", tn.HyperparameterNode("2_foo", tn.IdentityNode("3_foo"))) nt.assert_equal(canopy.node_utils.suffix_node(node1, "_foo"), node2)
nodes.append( resnet.residual_block_conv_2d("resblock_%d_%d" % (group, block), num_filters=num_filters, num_layers=num_layers)) nodes += [ tn.GlobalMeanPool2DNode("global_pool"), tn.DenseNode("logit", num_units=10), tn.SoftmaxNode("pred"), ] model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", nodes), filter_size=(3, 3), inits=[treeano.inits.OrthogonalInit()], pad="same", ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree": model, "cost": tn.TotalCostNode( "cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"),
model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [ tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("fc1"), # nbn.GradualBatchToNoBatchNormalizationNode("bn1"), nbn.NoBatchNormalizationNode("bn1"), # bn.BatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), # tn.DropoutNode("do2", p=0.5), tn.DenseNode("fc2"), # nbn.GradualBatchToNoBatchNormalizationNode("bn2"), nbn.NoBatchNormalizationNode("bn2"), # bn.BatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), # tn.DropoutNode("do3", p=0.5), tn.DenseNode("fc3", num_units=10), # nbn.GradualBatchToNoBatchNormalizationNode("bn3"), # nbn.NoBatchNormalizationNode("bn3"), # bn.BatchNormalizationNode("bn3"), tn.SoftmaxNode("pred"), ]), num_units=512, inits=[treeano.inits.XavierNormalInit()], current_mean_weight=1. / 8, current_var_weight=1. / 8, rolling_mean_rate=0.99, rolling_var_rate=0.99, expected_batches=25 * len(X_train) / BATCH_SIZE, )
model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 28 * 28)), cp.AuxiliaryContractionPenaltyNode( "cp1", tn.SequentialNode( "cp_seq1", [tn.DenseNode("fc1"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs1")]), cost_weight=1e1), # the cost has nan's when this is enabled # TODO look into why # tn.DropoutNode("do1"), cp.AuxiliaryContractionPenaltyNode( "cp2", tn.SequentialNode( "cp_seq2", [tn.DenseNode("fc2"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs2")]), cost_weight=1e1), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), tn.TotalCostNode( "cost", {"pred": tn.IdentityNode("pred_id"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, cost_function=treeano.utils.categorical_crossentropy_i32), tn.InputElementwiseSumNode("total_cost")]), num_units=32, cost_reference="total_cost", dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], )
train, valid, test = canopy.sandbox.datasets.mnist() # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 28, 28)), tn.Conv2DWithBiasNode("conv1"), tn.ReLUNode("relu1"), dropout_max_pool.AverageSamplesDropoutDnnMaxPoolNode("mp1"), tn.Conv2DWithBiasNode("conv2"), tn.ReLUNode("relu2"), dropout_max_pool.AverageSamplesDropoutDnnMaxPoolNode("mp2"), tn.DenseNode("fc1"), tn.ReLUNode("relu3"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode(
in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), inception.InceptionNode("i1"), tn.DnnMaxPoolNode("mp1"), bn.BatchNormalizationNode("bn1"), inception.InceptionNode("i2"), tn.DnnMaxPoolNode("mp2"), bn.BatchNormalizationNode("bn2"), tn.DenseNode("fc1"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters_1x1=32, num_filters_3x3reduce=16, num_filters_3x3=32, num_filters_5x5reduce=16, num_filters_5x5=32, num_filters_poolproj=32, pool_size=(2, 2), num_units=32, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates",
# ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("fc1"), tn.SigmoidNode("sigmoid1"), sp.AuxiliaryKLSparsityPenaltyNode("sp1", cost_weight=1e1), tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.SigmoidNode("sigmoid2"), sp.AuxiliaryKLSparsityPenaltyNode("sp2", cost_weight=1e1), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), tn.TotalCostNode( "cost", { "pred": tn.IdentityNode("pred_id"), "target": tn.InputNode("y", shape=(None, ), dtype="int32") }, cost_function=treeano.utils.categorical_crossentropy_i32), tn.InputElementwiseSumNode("total_cost") ]), num_units=512, sparsity=0.1, cost_reference="total_cost", dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode(
# ############################## prepare model ############################## BATCH_SIZE = 500 NUM_EPOCHS = 25 model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.DenseNode("fc1"), eb.GradualBatchNormalization("bn1"), tn.ReLUNode("relu1"), tn.DenseNode("fc2"), eb.GradualBatchNormalization("bn2"), tn.ReLUNode("relu2"), tn.DenseNode("fc3", num_units=10), eb.GradualBatchNormalization("bn3"), tn.SoftmaxNode("pred"), ]), num_units=512, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], expected_batches=NUM_EPOCHS * len(train["x"]) / BATCH_SIZE, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree":
train, valid, _ = canopy.sandbox.datasets.cluttered_mnist() # ############################## prepare model ############################## localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode("loc_seq", [ tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.SimpleBatchNormalizationNode("loc_bn2"), tn.SpatialSoftmaxNode("loc_spatial_softmax"), spatial_attention.SpatialFeaturePointNode("loc_feature_point"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)]) ]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode("st", localization_network, output_shape=(20, 20))
model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)), tn.DnnConv2DWithBiasNode("conv1", num_filters=96), tn.ReLUNode("relu1"), tn.DnnConv2DWithBiasNode("conv2", num_filters=96), tn.ReLUNode("relu2"), tn.MaxPool2DNode("mp1"), tn.DropoutNode("do1", dropout_probability=0.1), tn.DnnConv2DWithBiasNode("conv3", num_filters=192), tn.ReLUNode("relu3"), tn.DnnConv2DWithBiasNode("conv4", num_filters=192), tn.ReLUNode("relu4"), tn.DnnConv2DWithBiasNode("conv5", num_filters=192), tn.ReLUNode("relu5"), tn.MaxPool2DNode("mp2"), tn.DropoutNode("do2", dropout_probability=0.5), tn.DnnConv2DWithBiasNode("conv6", num_filters=192), tn.ReLUNode("relu6"), tn.DnnConv2DWithBiasNode("conv7", num_filters=192, filter_size=(1, 1)), tn.ReLUNode("relu7"), tn.DnnConv2DWithBiasNode("conv8", num_filters=10, filter_size=(1, 1)), tn.GlobalMeanPool2DNode("mean_pool"), tn.SoftmaxNode("pred"), ]), filter_size=(3, 3), conv_pad="same", pool_size=(3, 3), pool_stride=(2, 2), pool_pad=(1, 1), inits=[treeano.inits.OrthogonalInit()], )
i, o = binary_toy_data(lag, length) inputs.append(i) outputs.append(o) return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis] # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, None, 1)), recurrent_hc.GRUNode("gru1"), tn.LinearMappingNode("y_linear", output_dim=1), tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)), tn.SigmoidNode("sigmoid"), ]), inits=[treeano.inits.OrthogonalInit()], num_units=HIDDEN_STATE_SIZE, learn_init=True, grad_clip=1, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"),