def AverageSamplesDropoutDnnMaxPoolNode(name, *args, **kwargs): return tn.HyperparameterNode( name, AverageSamplesNode( name + "_samples", tn.SequentialNode( name + "_seq", [tn.DropoutNode(name + "_dropout"), tn.DnnMaxPoolNode(name + "_maxpool")])), *args, **kwargs)
def architecture_children(self): children = self.raw_children() if "activation" in children: activation = children["activation"] else: activation = tn.ReLUNode(self.name + "_relu") path_1x1 = tn.SequentialNode(self.name + "_1x1", [ tn.DnnConv2DWithBiasNode( self.name + "_1x1conv", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_1x1") ]) path_3x3 = tn.SequentialNode(self.name + "_3x3", [ tn.DnnConv2DWithBiasNode( self.name + "_3x3reduce", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_3x3reduce"), tn.DnnConv2DWithBiasNode( self.name + "_3x3conv", filter_size=(3, 3), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_3x3") ]) path_5x5 = tn.SequentialNode(self.name + "_5x5", [ tn.DnnConv2DWithBiasNode( self.name + "_5x5reduce", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_5x5reduce"), tn.DnnConv2DWithBiasNode( self.name + "_5x5conv", filter_size=(5, 5), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_5x5") ]) path_pool = tn.SequentialNode( self.name + "_poolproj", [ tn.DnnMaxPoolNode( self.name + "_poolprojmax", pool_stride=(1, 1), # TODO parameterize # also need to make padding be dependent on pool size pool_size=(3, 3), pad=(1, 1)), tn.DnnConv2DWithBiasNode(self.name + "_poolproj1x1", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name( activation, self.name + "_%s_poolproj1x1") ]) return [ tn.ConcatenateNode(self.name + "_concat", [path_1x1, path_3x3, path_5x5, path_pool]) ]
def architecture_children(self): mean_seq_node = tn.SequentialNode(self.name + "_mean_seq", [ tn.DnnMeanPoolNode(self.name + "_mean_pool"), tn.MultiplyConstantNode(self.name + "_mean_const_mult") ]) max_seq_node = tn.SequentialNode(self.name + "_max_seq", [ tn.DnnMaxPoolNode(self.name + "_max_pool"), tn.MultiplyConstantNode(self.name + "_max_const_mult") ]) return [ tn.ElementwiseSumNode(self.name + "_sum_mixed", [max_seq_node, mean_seq_node]) ]
# theano has a constant float type that it uses (float32 for GPU) # also rescaling to [0, 1] instead of [0, 255] X = mnist['data'].reshape(-1, 1, 28, 28).astype(fX) / 255.0 y = mnist['target'].astype("int32") X_train, X_valid, y_train, y_valid = sklearn.cross_validation.train_test_split( X, y, random_state=42) in_train = {"x": X_train, "y": y_train} in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), inception.InceptionNode("i1"), tn.DnnMaxPoolNode("mp1"), bn.BatchNormalizationNode("bn1"), inception.InceptionNode("i2"), tn.DnnMaxPoolNode("mp2"), bn.BatchNormalizationNode("bn2"), tn.DenseNode("fc1"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters_1x1=32, num_filters_3x3reduce=16, num_filters_3x3=32, num_filters_5x5reduce=16, num_filters_5x5=32, num_filters_poolproj=32,
import canopy.sandbox.datasets fX = theano.config.floatX UPDATE_SCALE_FACTOR = 1.0 MAX_ITERS = 100 BATCH_SIZE = 500 train, valid, _ = canopy.sandbox.datasets.cluttered_mnist() # ############################## prepare model ############################## localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode("loc_seq", [ tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.SimpleBatchNormalizationNode("loc_bn2"), tn.SpatialSoftmaxNode("loc_spatial_softmax"), spatial_attention.SpatialFeaturePointNode("loc_feature_point"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)]) ]),
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network