def tmp(include_batch_pad): network = tn.SequentialNode( "seq", [tn.InputNode("i", shape=(None, 2)), tn.ApplyNode("a", fn=(lambda x: x.shape[0].astype(fX) + x), shape_fn=(lambda s: s))] ).network() handlers = [canopy.handlers.chunk_variables(3, ["i"])] if include_batch_pad: handlers.insert(0, canopy.handlers.batch_pad(3, ["x"])) fn = canopy.handlers.handled_fn(network, handlers, {"x": "i"}, {"out": "seq"}) return fn({"x": np.zeros((16, 2), dtype=fX)})
def forget_gate_conv_2d_node(name, num_filters, filter_size=(3, 3), initial_bias=0): return tn.ElementwiseProductNode(name, [ tn.IdentityNode(name + "_identity"), tn.SequentialNode(name + "_forget", [ tn.Conv2DWithBiasNode(name + "_conv", num_filters=num_filters, filter_size=filter_size, stride=(1, 1), pad="same"), tn.AddConstantNode(name + "_initial_bias", value=initial_bias), tn.SigmoidNode(name + "_sigmoid") ]) ])
def test_network_with_shape(shape): network = tn.SequentialNode("seq", [ tn.InputNode("x", shape=shape), batch_normalization.NoScaleBatchNormalizationNode("bn") ]).network() fn = network.function(["x"], ["seq"]) x = (100 * np.random.randn(*shape) + 3).astype(fX) axis = tuple([i for i in range(len(shape)) if i != 1]) mean = x.mean(axis=axis, keepdims=True) std = np.sqrt(x.var(axis=axis, keepdims=True) + 1e-8) ans = (x - mean) / std res = fn(x)[0] np.testing.assert_allclose(ans, res, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(np.zeros(shape[1]), ans.mean(axis=axis), atol=1e-6)
def test_simple_recurrent_node(): # just testing that it runs # --- # the test may look dumb, but it's found a LOT of problems network = nodes.SequentialNode("n", [ nodes.InputNode("in", shape=(3, 4, 5)), nodes.recurrent.SimpleRecurrentNode("srn", nodes.ReLUNode("relu"), batch_size=4, num_units=35, scan_axis=0) ]).network() fn = network.function(["in"], ["n"]) x = np.random.rand(3, 4, 5).astype(fX) res = fn(x)[0] # 3 = scan axis, 4 = batch axis, 35 = num output units nt.assert_equal(res.shape, (3, 4, 35))
def test_spatial_feature_point_node(): network = tn.SequentialNode("s", [ tn.InputNode("i", shape=(2, 2, 2, 3)), spatial_attention.SpatialFeaturePointNode("fp") ]).network() fn = network.function(["i"], ["s"]) x = np.zeros((2, 2, 2, 3), dtype=fX) idxs = np.array([[[0, 0], [1, 0]], [[0, 1], [1, 2]]], dtype=fX) ans = idxs / np.array([1, 2], dtype=fX)[None, None] for batch in range(2): for channel in range(2): i, j = idxs[batch, channel] x[batch, channel, i, j] = 1 np.testing.assert_allclose(ans, fn(x)[0], rtol=1e-5, atol=1e-8)
def test_remove_nodes(): network1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=()), tn.HyperparameterNode("hp1", tn.HyperparameterNode( "hp2", tn.AddConstantNode("ac"), value=1), value=2) ]).network() fn1 = network1.function(["i"], ["seq"]) nt.assert_equal(1, fn1(0)[0]) network2 = canopy.transforms.remove_nodes(network1, {"hp2"}, keep_child=True) fn2 = network2.function(["i"], ["seq"]) nt.assert_equal(2, fn2(0)[0]) network3 = canopy.transforms.remove_nodes(network1, {"ac"}) fn3 = network3.function(["i"], ["seq"]) nt.assert_equal(0, fn3(0)[0])
def test_pickle_unpickle_network(): temp_dir = tempfile.mkdtemp() dirname = os.path.join(temp_dir, "network") try: n1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=(10, 100)), tn.LinearMappingNode( "lm", output_dim=15, inits=[treeano.inits.NormalWeightInit()]) ]).network() fn1 = n1.function(["i"], ["lm"]) x = np.random.randn(10, 100).astype(fX) canopy.serialization.pickle_network(n1, dirname) n2 = canopy.serialization.unpickle_network(dirname) fn2 = n2.function(["i"], ["lm"]) np.testing.assert_equal(fn1(x), fn2(x)) finally: shutil.rmtree(temp_dir)
def test_inverse_node(): network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(1, 1, 2, 2)), tn.MaxPool2DNode("m", pool_size=(2, 2)), tn.InputNode("i2", shape=(1, 1, 1, 1)), inverse.InverseNode("in", reference="m")] ).network() fn = network.function(["i", "i2"], ["in"]) x = np.array([[[[1, 2], [3, 4]]]], dtype=fX) x2 = np.array(np.random.randn(), dtype=fX) ans = x2 * np.array([[[[0, 0], [0, 1]]]], dtype=fX) np.testing.assert_equal(ans, fn(x, x2.reshape(1, 1, 1, 1))[0])
def test_irregular_length_attention_node(): network = tn.SequentialNode( "s", [tn.InputNode("l", shape=(None,)), tn.InputNode("i", shape=(None, 3)), irregular_length.irregular_length_attention_node( "foo", lengths_reference="l", num_units=3, output_units=None)] ).network() nt.assert_equal((None, 3), network["foo"].get_vw("default").shape) fn = network.function(["i", "l"], ["s"]) x = np.random.randn(15, 3).astype(fX) l = np.array([2, 3, 7, 3], dtype=fX) res = fn(x, l)[0].shape ans = (4, 3) nt.assert_equal(ans, res)
def test_remove_dropout(): network1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=(3, 4, 5)), tn.DropoutNode("do", dropout_probability=0.5) ]).network() network2 = canopy.transforms.remove_dropout(network1) assert "DropoutNode" in str(network1.root_node) assert "DropoutNode" not in str(network2.root_node) fn1 = network1.function(["i"], ["do"]) fn2 = network2.function(["i"], ["do"]) x = np.random.randn(3, 4, 5).astype(fX) @nt.raises(AssertionError) def fails(): np.testing.assert_equal(x, fn1(x)[0]) fails() np.testing.assert_equal(x, fn2(x)[0])
def test_auxiliary_contraction_penalty_node(): # testing that both contraction penalty versions return the same thing network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(10, 3)), cp.AuxiliaryContractionPenaltyNode( "acp", tn.DenseNode("d", num_units=9), cost_reference="sum"), cp.ElementwiseContractionPenaltyNode("cp", input_reference="i"), tn.AggregatorNode("a"), # zero out rest of network, so that value of sum is just value from # auxiliary contraction pentalty node tn.ConstantNode("foo", value=0), tn.InputElementwiseSumNode("sum")] ).network() fn = network.function(["i"], ["sum", "a"]) x = np.random.rand(10, 3).astype(fX) res = fn(x) np.testing.assert_equal(res[0], res[1])
def test_monitor_update_ratio_node(): network = tn.WeightDecayNode( "decay", monitor_update_ratio.MonitorUpdateRatioNode( "mur", tn.SequentialNode( "s", [tn.InputNode("i", shape=(None, 3)), tn.LinearMappingNode("linear", output_dim=10), tn.AddBiasNode("bias")])), weight_decay=1 ).network() network.build() mur_net = network["mur"] vws = mur_net.find_vws_in_subtree(tags={"monitor"}) assert len(vws) == 1 vw, = vws assert re.match(".*_2-norm$", vw.name) assert re.match(".*linear.*", vw.name) assert not re.match(".*bias.*", vw.name)
def test_evaluate_monitoring_variables(): class FooNode(treeano.NodeImpl): def compute_output(self, network, in_vw): network.create_vw("default", variable=42 * in_vw.variable.sum(), shape=(), tags={"monitor"}) network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(3, 4, 5)), FooNode("f")]).network() x = np.random.randn(3, 4, 5).astype(fX) fn = canopy.handlers.handled_fn( network, [canopy.handlers.evaluate_monitoring_variables(fmt="train_%s")], {"x": "i"}, {}) res = fn({"x": x}) ans_key = "train_f:default" assert ans_key in res np.testing.assert_allclose(res[ans_key], 42 * x.sum(), rtol=1e-5)
def test_auxiliary_dense_softmax_cce_node(): network = tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 5)), auxiliary_costs.AuxiliaryDenseSoftmaxCCENode( "aux", {"target": tn.ConstantNode("target", value=np.eye(3).astype(fX))}, num_units=3, cost_reference="foo"), tn.IdentityNode("i"), tn.InputElementwiseSumNode("foo", ignore_default_input=True) ]).network() x = np.random.randn(3, 5).astype(fX) fn = network.function(["in"], ["i", "foo", "aux_dense"]) res = fn(x) np.testing.assert_equal(res[0], x) loss = T.nnet.categorical_crossentropy( np.ones((3, 3), dtype=fX) / 3.0, np.eye(3).astype(fX), ).mean().eval() np.testing.assert_allclose(res[1], loss)
def pool_with_projection_2d(name, projection_filters, stride=(2, 2), filter_size=(3, 3), bn_node=bn.BatchNormalizationNode): pool_node = tn.MaxPool2DNode(name + "_pool", pool_size=stride, stride=stride) projection_node = tn.SequentialNode(name + "_projection", [ tn.Conv2DNode(name + "_projectionconv", num_filters=projection_filters, filter_size=filter_size, stride=stride, pad="same"), bn_node(name + "_projectionbn") ]) return tn.ConcatenateNode(name, [pool_node, projection_node])
def test_save_last_inputs_and_networks(): class StateDiffNode(treeano.NodeImpl): def compute_output(self, network, in_vw): foo_vw = network.create_vw("foo", shape=(), is_shared=True, tags={"parameter", "weight"}, inits=[]) network.create_vw("default", variable=abs(in_vw.variable - foo_vw.variable), shape=()) network = tn.AdamNode( "adam", { "subtree": tn.SequentialNode( "s", [tn.InputNode("i", shape=()), StateDiffNode("ss")]), "cost": tn.ReferenceNode("r", reference="s") }).network() # eagerly create shared variables network.build() save_handler = canopy.handlers.save_last_inputs_and_networks(5) fn = canopy.handlers.handled_fn(network, [save_handler], {"x": "i"}, {"out": "s"}, include_updates=True) inputs = [{"x": treeano.utils.as_fX(np.random.randn())} for _ in range(10)] outputs = [fn(i) for i in inputs] nt.assert_equal(save_handler.inputs_, inputs[-5:]) # PY3: calling list on zip to make it eager # otherwise, save_handler.value_dicts_ looks at the mutating # value ducts for value_dict, i, o in list( zip(save_handler.value_dicts_, inputs[-5:], outputs[-5:])): canopy.network_utils.load_value_dict(network, value_dict) nt.assert_equal(o, fn(i))
def test_add_remove_axis_node(): network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(2, 3, 4)), bf.AddAxisNode("a1", axis=3), bf.AddAxisNode("a2", axis=1), bf.RemoveAxisNode("r1", axis=1), bf.AddAxisNode("a3", axis=0), bf.RemoveAxisNode("r2", axis=4), bf.RemoveAxisNode("r3", axis=0)] ).network() fn = network.function(["i"], ["a1", "a2", "r1", "a3", "r2", "r3"]) x = np.zeros((2, 3, 4), dtype=fX) a1, a2, r1, a3, r2, r3 = fn(x) nt.assert_equal((2, 3, 4, 1), a1.shape) nt.assert_equal((2, 1, 3, 4, 1), a2.shape) nt.assert_equal((2, 3, 4, 1), r1.shape) nt.assert_equal((1, 2, 3, 4, 1), a3.shape) nt.assert_equal((1, 2, 3, 4), r2.shape) nt.assert_equal((2, 3, 4), r3.shape)
def test_irregular_length_attention_softmax_node(): network = tn.SequentialNode( "s", [tn.InputNode("l", shape=(None,)), tn.InputNode("i", shape=(None, None, 3)), irregular_length._IrregularLengthAttentionSoftmaxNode( "foo", lengths_reference="l")] ).network() fn = network.function(["i", "l"], ["s"]) x = np.random.randn(4, 7, 3).astype(fX) l = np.array([2, 3, 7, 3], dtype=fX) for idx, l_ in enumerate(l): x[idx, l_:] = 0 res = fn(x, l)[0] nt.assert_equal((4, 7, 3), res.shape) for idx, l_ in enumerate(l): np.testing.assert_almost_equal(res[idx][:l_, 0].sum(), desired=1.0, decimal=5)
def test_auxiliary_kl_sparsity_penalty_node(): # testing that both sparsity penalty versions return the same thing network = tn.HyperparameterNode( "hp", tn.SequentialNode( "s", [ tn.InputNode("i", shape=(10, 3)), tn.DenseNode("d", num_units=9), sp.AuxiliaryKLSparsityPenaltyNode("scp", cost_reference="sum"), sp.ElementwiseKLSparsityPenaltyNode("sp"), tn.AggregatorNode("a"), # zero out rest of network, so that value of sum is just the value # from auxiliary sparsity pentalty node tn.ConstantNode("foo", value=0), tn.InputElementwiseSumNode("sum") ]), sparsity=0.1, ).network() fn = network.function(["i"], ["sum", "a"]) x = np.random.rand(10, 3).astype(fX) res = fn(x) np.testing.assert_equal(res[0], res[1])
def test_chunk_variables(): network = tn.SequentialNode( "seq", [tn.InputNode("i", shape=(None, 2)), tn.ApplyNode("a", fn=(lambda x: x.shape[0].astype(fX) + x), shape_fn=(lambda s: s))] ).network() fn1 = canopy.handlers.handled_fn(network, [], {"x": "i"}, {"out": "seq"}) np.testing.assert_equal(fn1({"x": np.zeros((18, 2), dtype=fX)})["out"], np.ones((18, 2), dtype=fX) * 18) fn2 = canopy.handlers.handled_fn( network, [canopy.handlers.chunk_variables(3, ["i"])], {"x": "i"}, {"out": "seq"}) np.testing.assert_equal(fn2({"x": np.zeros((18, 2), dtype=fX)})["out"], np.ones((18, 2), dtype=fX) * 3)
def test_transform_root_node_postwalk(): network1 = tn.toy.ConstantUpdaterNode( "cun", tn.SequentialNode("seq", [ tn.InputNode("i", shape=(3, 4, 5)), tn.LinearMappingNode("lm", output_dim=15, inits=[treeano.inits.NormalWeightInit(15.0)]) ]), value=-0.1, ).network() def log_name(node): all_names.append(node.name) return node all_names = [] canopy.transforms.transform_root_node_postwalk(network1, log_name) nt.assert_equal(all_names, ["i", "lm", "seq", "cun"]) def append_name(node): # NOTE: assumes NodeImpl subclass node = treeano.node_utils.copy_node(node) node._name += "_1" return node network2 = canopy.transforms.transform_root_node_postwalk( network1, append_name) all_names = [] canopy.transforms.transform_root_node_postwalk(network2, log_name) nt.assert_equal(all_names, ["i_1", "lm_1", "seq_1", "cun_1"]) # assert unmodified all_names = [] canopy.transforms.transform_root_node_postwalk(network1, log_name) nt.assert_equal(all_names, ["i", "lm", "seq", "cun"])
def test_remove_parent(): network1 = tn.SequentialNode("seq", [ tn.InputNode("i", shape=()), tn.HyperparameterNode("hp1", tn.HyperparameterNode( "hp2", tn.AddConstantNode("ac"), value=1), value=2) ]).network() fn1 = network1.function(["i"], ["seq"]) nt.assert_equal(1, fn1(0)[0]) network2 = canopy.transforms.remove_parent(network1, {"ac"}) fn2 = network2.function(["i"], ["seq"]) nt.assert_equal(2, fn2(0)[0]) network3 = canopy.transforms.remove_parent(network1, {"i"}) @nt.raises(Exception) def fails(name): network3.function(["i"], [name]) # testing that these nodes are removed fails("ac") fails("seq") network3.function(["i"], ["i"])
def test_split_probabilities_to_leaf_probabilities_node(): x = np.array([[[0.9, 0.2], [0.7, 0.6], [0.4, 0.3]]], dtype=fX) ans = np.array([[[0.9 * 0.7, 0.2 * 0.6], [0.9 * (1 - 0.7), 0.2 * (1 - 0.6)], [(1 - 0.9) * 0.4, (1 - 0.2) * 0.3], [(1 - 0.9) * (1 - 0.4), (1 - 0.2) * (1 - 0.3)]]], dtype=fX) for node in [dNDF.TheanoSplitProbabilitiesToLeafProbabilitiesNode, dNDF.NumpySplitProbabilitiesToLeafProbabilitiesNode]: network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(1, 3, 2)), node("p")] ).network() fn = network.function(["i"], ["s"]) np.testing.assert_allclose(ans, fn(x)[0], rtol=1e-5)
# based off of architecture from "Scalable Bayesian Optimization Using # Deep Neural Networks" http://arxiv.org/abs/1502.05700 model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)), tn.DnnConv2DWithBiasNode("conv1", num_filters=96), tn.ReLUNode("relu1"), tn.DnnConv2DWithBiasNode("conv2", num_filters=96), tn.ReLUNode("relu2"), tn.MaxPool2DNode("mp1"), tn.DropoutNode("do1", dropout_probability=0.1), tn.DnnConv2DWithBiasNode("conv3", num_filters=192), tn.ReLUNode("relu3"), tn.DnnConv2DWithBiasNode("conv4", num_filters=192), tn.ReLUNode("relu4"), tn.DnnConv2DWithBiasNode("conv5", num_filters=192), tn.ReLUNode("relu5"), tn.MaxPool2DNode("mp2"), tn.DropoutNode("do2", dropout_probability=0.5), tn.DnnConv2DWithBiasNode("conv6", num_filters=192), tn.ReLUNode("relu6"), tn.DnnConv2DWithBiasNode("conv7", num_filters=192, filter_size=(1, 1)), tn.ReLUNode("relu7"), tn.DnnConv2DWithBiasNode("conv8", num_filters=10, filter_size=(1, 1)), tn.GlobalMeanPool2DNode("mean_pool"), tn.SoftmaxNode("pred"), ]), filter_size=(3, 3), conv_pad="same", pool_size=(3, 3), pool_stride=(2, 2),
import numpy as np import theano import theano.tensor as T import treeano.nodes as tn fX = theano.config.floatX network = tn.SequentialNode("s", [ tn.InputNode("i", shape=(32, 32, 32, 32, 32)), tn.SpatialRepeatNDNode("r", upsample_factor=(2, 2, 2)) ]).network() fn = network.function(["i"], ["s"]) x = np.random.randn(32, 32, 32, 32, 32).astype(fX) """ 20150922 results: %timeit fn(x) from axis 0 to 4: 596 ms from axis 4 to 0: 526 ms """
tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 28 * 28)), cp.AuxiliaryContractionPenaltyNode( "cp1", tn.SequentialNode( "cp_seq1", [tn.DenseNode("fc1"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs1")]), cost_weight=1e1), # the cost has nan's when this is enabled # TODO look into why # tn.DropoutNode("do1"), cp.AuxiliaryContractionPenaltyNode( "cp2", tn.SequentialNode( "cp_seq2", [tn.DenseNode("fc2"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs2")]), cost_weight=1e1), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), tn.TotalCostNode( "cost", {"pred": tn.IdentityNode("pred_id"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, cost_function=treeano.utils.categorical_crossentropy_i32), tn.InputElementwiseSumNode("total_cost")]),
# - fully connected 512 units # - ReLU # - 50% dropout # - fully connected 10 units # - softmax # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.ReLUNode("relu2"), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), ]), num_units=512, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree":
def vgg_16_nodes(conv_only): """ conv_only: whether or not to only return conv layers (before FC layers) """ assert conv_only return tn.HyperparameterNode( "vgg16", tn.SequentialNode( "vgg16_seq", [ tn.HyperparameterNode( "conv_group_1", tn.SequentialNode("conv_group_1_seq", [ tn.DnnConv2DWithBiasNode("conv1_1"), tn.ReLUNode("relu1_1"), tn.DnnConv2DWithBiasNode("conv1_2"), tn.ReLUNode("relu1_2") ]), num_filters=64), tn.MaxPool2DNode("pool1"), tn.HyperparameterNode( "conv_group_2", tn.SequentialNode("conv_group_2_seq", [ tn.DnnConv2DWithBiasNode("conv2_1"), tn.ReLUNode("relu2_1"), tn.DnnConv2DWithBiasNode("conv2_2"), tn.ReLUNode("relu2_2") ]), num_filters=128), tn.MaxPool2DNode("pool2"), tn.HyperparameterNode( "conv_group_3", tn.SequentialNode("conv_group_3_seq", [ tn.DnnConv2DWithBiasNode("conv3_1"), tn.ReLUNode("relu3_1"), tn.DnnConv2DWithBiasNode("conv3_2"), tn.ReLUNode("relu3_2"), tn.DnnConv2DWithBiasNode("conv3_3"), tn.ReLUNode("relu3_3") ]), num_filters=256), tn.MaxPool2DNode("pool3"), tn.HyperparameterNode( "conv_group_4", tn.SequentialNode("conv_group_4_seq", [ tn.DnnConv2DWithBiasNode("conv4_1"), tn.ReLUNode("relu4_1"), tn.DnnConv2DWithBiasNode("conv4_2"), tn.ReLUNode("relu4_2"), tn.DnnConv2DWithBiasNode("conv4_3"), tn.ReLUNode("relu4_3") ]), num_filters=512), tn.MaxPool2DNode("pool4"), tn.HyperparameterNode( "conv_group_5", tn.SequentialNode("conv_group_5_seq", [ tn.DnnConv2DWithBiasNode("conv5_1"), tn.ReLUNode("relu5_1"), tn.DnnConv2DWithBiasNode("conv5_2"), tn.ReLUNode("relu5_2"), tn.DnnConv2DWithBiasNode("conv5_3"), tn.ReLUNode("relu5_3") ]), num_filters=512), tn.MaxPool2DNode("pool5"), # TODO add dense nodes ]), pad="same", filter_size=(3, 3), pool_size=(2, 2), # VGG net uses cross-correlation by default conv_mode="cross", )
in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [ tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("fc1"), # nbn.GradualBatchToNoBatchNormalizationNode("bn1"), nbn.NoBatchNormalizationNode("bn1"), # bn.BatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), # tn.DropoutNode("do2", p=0.5), tn.DenseNode("fc2"), # nbn.GradualBatchToNoBatchNormalizationNode("bn2"), nbn.NoBatchNormalizationNode("bn2"), # bn.BatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), # tn.DropoutNode("do3", p=0.5), tn.DenseNode("fc3", num_units=10), # nbn.GradualBatchToNoBatchNormalizationNode("bn3"), # nbn.NoBatchNormalizationNode("bn3"), # bn.BatchNormalizationNode("bn3"), tn.SoftmaxNode("pred"), ]), num_units=512, inits=[treeano.inits.XavierNormalInit()], current_mean_weight=1. / 8, current_var_weight=1. / 8, rolling_mean_rate=0.99,
# ############################### prepare data ############################### train, valid, test = canopy.sandbox.datasets.mnist() # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 28, 28)), tn.Conv2DWithBiasNode("conv1"), tn.ReLUNode("relu1"), dropout_max_pool.AverageSamplesDropoutDnnMaxPoolNode("mp1"), tn.Conv2DWithBiasNode("conv2"), tn.ReLUNode("relu2"), dropout_max_pool.AverageSamplesDropoutDnnMaxPoolNode("mp2"), tn.DenseNode("fc1"), tn.ReLUNode("relu3"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], )