def architecture_children(self): # TODO set LRN n = num_filters / 8 + 1 nodes = [ # NOTE: not explicitly giving the first conv a pad of "same", # since the first conv can have any output shape tn.DnnConv2DWithBiasNode(self.name + "_conv0"), tn.IdentityNode(self.name + "_z0"), tn.ReLUNode(self.name + "_z0_relu"), lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"), tn.IdentityNode(self.name + "_x0"), ] for t in range(1, self.steps + 1): nodes += [ tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t, stride=(1, 1), pad="same"), tn.ElementwiseSumNode(self.name + "_sum%d" % t, [ tn.ReferenceNode(self.name + "_sum%d_curr" % t, reference=self.name + "_conv%d" % t), tn.ReferenceNode(self.name + "_sum%d_prev" % t, reference=self.name + "_z0") ]), tn.IdentityNode(self.name + "_z%d" % t), tn.ReLUNode(self.name + "_z%d_relu" % t), lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t), tn.IdentityNode(self.name + "_x%d" % t), ] return [tn.SequentialNode(self.name + "_sequential", nodes)]
def architecture_children(self): gate_node = tn.SequentialNode( self.name + "_gate_seq", [ batch_fold.AddAxisNode(self.name + "_add_axis", axis=2), batch_fold.FoldUnfoldAxisIntoBatchNode( self.name + "_batch_fold", # NOTE: using dnn conv, since pooling is normally strided # and the normal conv is slow with strides tn.DnnConv2DWithBiasNode(self.name + "_conv", num_filters=1), axis=1), batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2), tn.SigmoidNode(self.name + "_gate_sigmoid") ]) inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [ tn.ReferenceNode(self.name + "_gate_ref", reference=gate_node.name), tn.MultiplyConstantNode(self.name + "_", value=-1), tn.AddConstantNode(self.name + "_add1", value=1) ]) mean_node = tn.ElementwiseProductNode( self.name + "_mean_product", [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node]) max_node = tn.ElementwiseProductNode( self.name + "_max_product", [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node]) return [ tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node]) ]
def architecture_children(self): children = self.raw_children() if "activation" in children: activation = children["activation"] else: activation = tn.ReLUNode(self.name + "_relu") path_1x1 = tn.SequentialNode(self.name + "_1x1", [ tn.DnnConv2DWithBiasNode( self.name + "_1x1conv", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_1x1") ]) path_3x3 = tn.SequentialNode(self.name + "_3x3", [ tn.DnnConv2DWithBiasNode( self.name + "_3x3reduce", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_3x3reduce"), tn.DnnConv2DWithBiasNode( self.name + "_3x3conv", filter_size=(3, 3), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_3x3") ]) path_5x5 = tn.SequentialNode(self.name + "_5x5", [ tn.DnnConv2DWithBiasNode( self.name + "_5x5reduce", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_5x5reduce"), tn.DnnConv2DWithBiasNode( self.name + "_5x5conv", filter_size=(5, 5), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_5x5") ]) path_pool = tn.SequentialNode( self.name + "_poolproj", [ tn.DnnMaxPoolNode( self.name + "_poolprojmax", pool_stride=(1, 1), # TODO parameterize # also need to make padding be dependent on pool size pool_size=(3, 3), pad=(1, 1)), tn.DnnConv2DWithBiasNode(self.name + "_poolproj1x1", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name( activation, self.name + "_%s_poolproj1x1") ]) return [ tn.ConcatenateNode(self.name + "_concat", [path_1x1, path_3x3, path_5x5, path_pool]) ]
import treeano import treeano.nodes as tn import canopy import canopy.sandbox.datasets fX = theano.config.floatX BATCH_SIZE = 256 train, valid, test = canopy.sandbox.datasets.cifar10() # based off of architecture from "Scalable Bayesian Optimization Using # Deep Neural Networks" http://arxiv.org/abs/1502.05700 model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)), tn.DnnConv2DWithBiasNode("conv1", num_filters=96), tn.ReLUNode("relu1"), tn.DnnConv2DWithBiasNode("conv2", num_filters=96), tn.ReLUNode("relu2"), tn.MaxPool2DNode("mp1"), tn.DropoutNode("do1", dropout_probability=0.1), tn.DnnConv2DWithBiasNode("conv3", num_filters=192), tn.ReLUNode("relu3"), tn.DnnConv2DWithBiasNode("conv4", num_filters=192), tn.ReLUNode("relu4"), tn.DnnConv2DWithBiasNode("conv5", num_filters=192), tn.ReLUNode("relu5"), tn.MaxPool2DNode("mp2"), tn.DropoutNode("do2", dropout_probability=0.5), tn.DnnConv2DWithBiasNode("conv6", num_filters=192), tn.ReLUNode("relu6"),
def vgg_16_nodes(conv_only): """ conv_only: whether or not to only return conv layers (before FC layers) """ assert conv_only return tn.HyperparameterNode( "vgg16", tn.SequentialNode( "vgg16_seq", [ tn.HyperparameterNode( "conv_group_1", tn.SequentialNode("conv_group_1_seq", [ tn.DnnConv2DWithBiasNode("conv1_1"), tn.ReLUNode("relu1_1"), tn.DnnConv2DWithBiasNode("conv1_2"), tn.ReLUNode("relu1_2") ]), num_filters=64), tn.MaxPool2DNode("pool1"), tn.HyperparameterNode( "conv_group_2", tn.SequentialNode("conv_group_2_seq", [ tn.DnnConv2DWithBiasNode("conv2_1"), tn.ReLUNode("relu2_1"), tn.DnnConv2DWithBiasNode("conv2_2"), tn.ReLUNode("relu2_2") ]), num_filters=128), tn.MaxPool2DNode("pool2"), tn.HyperparameterNode( "conv_group_3", tn.SequentialNode("conv_group_3_seq", [ tn.DnnConv2DWithBiasNode("conv3_1"), tn.ReLUNode("relu3_1"), tn.DnnConv2DWithBiasNode("conv3_2"), tn.ReLUNode("relu3_2"), tn.DnnConv2DWithBiasNode("conv3_3"), tn.ReLUNode("relu3_3") ]), num_filters=256), tn.MaxPool2DNode("pool3"), tn.HyperparameterNode( "conv_group_4", tn.SequentialNode("conv_group_4_seq", [ tn.DnnConv2DWithBiasNode("conv4_1"), tn.ReLUNode("relu4_1"), tn.DnnConv2DWithBiasNode("conv4_2"), tn.ReLUNode("relu4_2"), tn.DnnConv2DWithBiasNode("conv4_3"), tn.ReLUNode("relu4_3") ]), num_filters=512), tn.MaxPool2DNode("pool4"), tn.HyperparameterNode( "conv_group_5", tn.SequentialNode("conv_group_5_seq", [ tn.DnnConv2DWithBiasNode("conv5_1"), tn.ReLUNode("relu5_1"), tn.DnnConv2DWithBiasNode("conv5_2"), tn.ReLUNode("relu5_2"), tn.DnnConv2DWithBiasNode("conv5_3"), tn.ReLUNode("relu5_3") ]), num_filters=512), tn.MaxPool2DNode("pool5"), # TODO add dense nodes ]), pad="same", filter_size=(3, 3), pool_size=(2, 2), # VGG net uses cross-correlation by default conv_mode="cross", )
# - 2x2 maxpool # - 5x5 conv, 32 filters # - ReLU # - 2x2 maxpool # - fully connected layer - 256 units # - 50% dropout # - fully connected layer- 10 units # - softmax # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.DnnConv2DWithBiasNode("conv1"), tn.ReLUNode("relu1"), tn.DnnMaxPoolNode("mp1"), tn.DnnConv2DWithBiasNode("conv2"), tn.ReLUNode("relu2"), tn.DnnMaxPoolNode("mp2"), tn.DenseNode("fc1"), tn.ReLUNode("relu3"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), num_units=256,
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network