def architecture_children(self): gate_node = tn.SequentialNode( self.name + "_gate_seq", [ batch_fold.AddAxisNode(self.name + "_add_axis", axis=2), batch_fold.FoldUnfoldAxisIntoBatchNode( self.name + "_batch_fold", # NOTE: using dnn conv, since pooling is normally strided # and the normal conv is slow with strides tn.DnnConv2DWithBiasNode(self.name + "_conv", num_filters=1), axis=1), batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2), tn.SigmoidNode(self.name + "_gate_sigmoid") ]) inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [ tn.ReferenceNode(self.name + "_gate_ref", reference=gate_node.name), tn.MultiplyConstantNode(self.name + "_", value=-1), tn.AddConstantNode(self.name + "_add1", value=1) ]) mean_node = tn.ElementwiseProductNode( self.name + "_mean_product", [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node]) max_node = tn.ElementwiseProductNode( self.name + "_max_product", [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node]) return [ tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node]) ]
def test_max_pool_2d_node(): network = tn.SequentialNode("s", [ tn.InputNode("i", shape=(1, 1, 4, 4)), tn.MaxPool2DNode("m", pool_size=(2, 2)) ]).network() fn = network.function(["i"], ["m"]) x = np.arange(16).astype(fX).reshape(1, 1, 4, 4) ans = np.array([[[[5, 7], [13, 15]]]], dtype=fX) np.testing.assert_equal(ans, fn(x)[0]) nt.assert_equal(ans.shape, network["m"].get_vw("default").shape)
def MultiPool2DNode(name, **kwargs): # TODO tests # TODO make a node that verifies hyperparameters return tn.HyperparameterNode( name, tn.ConcatenateNode(name + "_concat", [ tn.SequentialNode(name + "_seq0", [ PartitionAxisNode(name + "_part0", split_idx=0, num_splits=2), tn.MaxPool2DNode(name + "_max", ignore_border=True) ]), tn.SequentialNode(name + "_seq1", [ PartitionAxisNode(name + "_part1", split_idx=1, num_splits=2), tn.MeanPool2DNode(name + "_mean") ]) ]), **kwargs)
def test_inverse_node(): network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(1, 1, 2, 2)), tn.MaxPool2DNode("m", pool_size=(2, 2)), tn.InputNode("i2", shape=(1, 1, 1, 1)), inverse.InverseNode("in", reference="m")] ).network() fn = network.function(["i", "i2"], ["in"]) x = np.array([[[[1, 2], [3, 4]]]], dtype=fX) x2 = np.array(np.random.randn(), dtype=fX) ans = x2 * np.array([[[[0, 0], [0, 1]]]], dtype=fX) np.testing.assert_equal(ans, fn(x, x2.reshape(1, 1, 1, 1))[0])
def pool_with_projection_2d(name, projection_filters, stride=(2, 2), filter_size=(3, 3), bn_node=bn.BatchNormalizationNode): pool_node = tn.MaxPool2DNode(name + "_pool", pool_size=stride, stride=stride) projection_node = tn.SequentialNode(name + "_projection", [ tn.Conv2DNode(name + "_projectionconv", num_filters=projection_filters, filter_size=filter_size, stride=stride, pad="same"), bn_node(name + "_projectionbn") ]) return tn.ConcatenateNode(name, [pool_node, projection_node])
fX = theano.config.floatX BATCH_SIZE = 256 train, valid, test = canopy.sandbox.datasets.cifar10() # based off of architecture from "Scalable Bayesian Optimization Using # Deep Neural Networks" http://arxiv.org/abs/1502.05700 model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)), tn.DnnConv2DWithBiasNode("conv1", num_filters=96), tn.ReLUNode("relu1"), tn.DnnConv2DWithBiasNode("conv2", num_filters=96), tn.ReLUNode("relu2"), tn.MaxPool2DNode("mp1"), tn.DropoutNode("do1", dropout_probability=0.1), tn.DnnConv2DWithBiasNode("conv3", num_filters=192), tn.ReLUNode("relu3"), tn.DnnConv2DWithBiasNode("conv4", num_filters=192), tn.ReLUNode("relu4"), tn.DnnConv2DWithBiasNode("conv5", num_filters=192), tn.ReLUNode("relu5"), tn.MaxPool2DNode("mp2"), tn.DropoutNode("do2", dropout_probability=0.5), tn.DnnConv2DWithBiasNode("conv6", num_filters=192), tn.ReLUNode("relu6"), tn.DnnConv2DWithBiasNode("conv7", num_filters=192, filter_size=(1, 1)), tn.ReLUNode("relu7"), tn.DnnConv2DWithBiasNode("conv8", num_filters=10, filter_size=(1, 1)), tn.GlobalMeanPool2DNode("mean_pool"),
def vgg_16_nodes(conv_only): """ conv_only: whether or not to only return conv layers (before FC layers) """ assert conv_only return tn.HyperparameterNode( "vgg16", tn.SequentialNode( "vgg16_seq", [ tn.HyperparameterNode( "conv_group_1", tn.SequentialNode("conv_group_1_seq", [ tn.DnnConv2DWithBiasNode("conv1_1"), tn.ReLUNode("relu1_1"), tn.DnnConv2DWithBiasNode("conv1_2"), tn.ReLUNode("relu1_2") ]), num_filters=64), tn.MaxPool2DNode("pool1"), tn.HyperparameterNode( "conv_group_2", tn.SequentialNode("conv_group_2_seq", [ tn.DnnConv2DWithBiasNode("conv2_1"), tn.ReLUNode("relu2_1"), tn.DnnConv2DWithBiasNode("conv2_2"), tn.ReLUNode("relu2_2") ]), num_filters=128), tn.MaxPool2DNode("pool2"), tn.HyperparameterNode( "conv_group_3", tn.SequentialNode("conv_group_3_seq", [ tn.DnnConv2DWithBiasNode("conv3_1"), tn.ReLUNode("relu3_1"), tn.DnnConv2DWithBiasNode("conv3_2"), tn.ReLUNode("relu3_2"), tn.DnnConv2DWithBiasNode("conv3_3"), tn.ReLUNode("relu3_3") ]), num_filters=256), tn.MaxPool2DNode("pool3"), tn.HyperparameterNode( "conv_group_4", tn.SequentialNode("conv_group_4_seq", [ tn.DnnConv2DWithBiasNode("conv4_1"), tn.ReLUNode("relu4_1"), tn.DnnConv2DWithBiasNode("conv4_2"), tn.ReLUNode("relu4_2"), tn.DnnConv2DWithBiasNode("conv4_3"), tn.ReLUNode("relu4_3") ]), num_filters=512), tn.MaxPool2DNode("pool4"), tn.HyperparameterNode( "conv_group_5", tn.SequentialNode("conv_group_5_seq", [ tn.DnnConv2DWithBiasNode("conv5_1"), tn.ReLUNode("relu5_1"), tn.DnnConv2DWithBiasNode("conv5_2"), tn.ReLUNode("relu5_2"), tn.DnnConv2DWithBiasNode("conv5_3"), tn.ReLUNode("relu5_3") ]), num_filters=512), tn.MaxPool2DNode("pool5"), # TODO add dense nodes ]), pad="same", filter_size=(3, 3), pool_size=(2, 2), # VGG net uses cross-correlation by default conv_mode="cross", )
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network
import numpy as np import theano import theano.tensor as T import treeano.nodes as tn from treeano.sandbox.nodes import fmp fX = theano.config.floatX # TODO change me node = "fmp2" compute_grad = True if node == "mp": n = tn.MaxPool2DNode("mp", pool_size=(2, 2)) elif node == "fmp": n = fmp.DisjointPseudorandomFractionalMaxPool2DNode("fmp1", fmp_alpha=1.414, fmp_u=0.5) elif node == "fmp2": n = fmp.OverlappingRandomFractionalMaxPool2DNode("fmp2", pool_size=(1.414, 1.414)) else: assert False network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(1, 1, 32, 32)), n]).network() if compute_grad: i = network["i"].get_vw("default").variable s = network["s"].get_vw("default").variable fn = network.function(["i"], [T.grad(s.sum(), i)]) else: