def architecture_children(self): # TODO set LRN n = num_filters / 8 + 1 nodes = [ # NOTE: not explicitly giving the first conv a pad of "same", # since the first conv can have any output shape tn.DnnConv2DWithBiasNode(self.name + "_conv0"), tn.IdentityNode(self.name + "_z0"), tn.ReLUNode(self.name + "_z0_relu"), lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"), tn.IdentityNode(self.name + "_x0"), ] for t in range(1, self.steps + 1): nodes += [ tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t, stride=(1, 1), pad="same"), tn.ElementwiseSumNode(self.name + "_sum%d" % t, [ tn.ReferenceNode(self.name + "_sum%d_curr" % t, reference=self.name + "_conv%d" % t), tn.ReferenceNode(self.name + "_sum%d_prev" % t, reference=self.name + "_z0") ]), tn.IdentityNode(self.name + "_z%d" % t), tn.ReLUNode(self.name + "_z%d_relu" % t), lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t), tn.IdentityNode(self.name + "_x%d" % t), ] return [tn.SequentialNode(self.name + "_sequential", nodes)]
def test_affine_spatial_transformer_node_build(): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DenseNode("loc_fc1", num_units=50), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.ZeroInit()])]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), ) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), spatial_transformer.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, ) network = with_updates.network() network.build() # build eagerly to share weights
def GeometricMeanOutNode(name, epsilon=1e-8, **kwargs): return tn.SequentialNode(name, [ tn.ReLUNode(name + "_relu"), tn.AddConstantNode(name + "_add", value=epsilon), TimesoutNode(name + "_to", **kwargs), tn.SqrtNode(name + "_sqrt"), tn.AddConstantNode(name + "_sub", value=-(epsilon**2)) ])
def architecture_children(self): children = self.raw_children() if "activation" in children: activation = children["activation"] else: activation = tn.ReLUNode(self.name + "_relu") path_1x1 = tn.SequentialNode(self.name + "_1x1", [ tn.DnnConv2DWithBiasNode( self.name + "_1x1conv", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_1x1") ]) path_3x3 = tn.SequentialNode(self.name + "_3x3", [ tn.DnnConv2DWithBiasNode( self.name + "_3x3reduce", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_3x3reduce"), tn.DnnConv2DWithBiasNode( self.name + "_3x3conv", filter_size=(3, 3), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_3x3") ]) path_5x5 = tn.SequentialNode(self.name + "_5x5", [ tn.DnnConv2DWithBiasNode( self.name + "_5x5reduce", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_5x5reduce"), tn.DnnConv2DWithBiasNode( self.name + "_5x5conv", filter_size=(5, 5), pad="same"), canopy.node_utils.format_node_name(activation, self.name + "_%s_5x5") ]) path_pool = tn.SequentialNode( self.name + "_poolproj", [ tn.DnnMaxPoolNode( self.name + "_poolprojmax", pool_stride=(1, 1), # TODO parameterize # also need to make padding be dependent on pool size pool_size=(3, 3), pad=(1, 1)), tn.DnnConv2DWithBiasNode(self.name + "_poolproj1x1", filter_size=(1, 1), pad="same"), canopy.node_utils.format_node_name( activation, self.name + "_%s_poolproj1x1") ]) return [ tn.ConcatenateNode(self.name + "_concat", [path_1x1, path_3x3, path_5x5, path_pool]) ]
def test_grad_net_interpolation_node(): network = tn.SequentialNode("s", [ tn.InputNode("i", shape=(1, 10)), gradnet.GradNetInterpolationNode("gradnet", { "early": tn.ReLUNode("r"), "late": tn.TanhNode("t") }, late_gate=0.5) ]).network() fn = network.function(["i"], ["s"]) x = np.random.randn(1, 10).astype(fX) ans = 0.5 * np.clip(x, 0, np.inf) + 0.5 * np.tanh(x) np.testing.assert_allclose(ans, fn(x)[0], rtol=1e-5)
def test_simple_recurrent_node(): # just testing that it runs # --- # the test may look dumb, but it's found a LOT of problems network = nodes.SequentialNode("n", [ nodes.InputNode("in", shape=(3, 4, 5)), nodes.recurrent.SimpleRecurrentNode("srn", nodes.ReLUNode("relu"), batch_size=4, num_units=35, scan_axis=0) ]).network() fn = network.function(["in"], ["n"]) x = np.random.rand(3, 4, 5).astype(fX) res = fn(x)[0] # 3 = scan axis, 4 = batch axis, 35 = num output units nt.assert_equal(res.shape, (3, 4, 35))
import treeano.nodes as tn import canopy import canopy.sandbox.datasets fX = theano.config.floatX BATCH_SIZE = 256 train, valid, test = canopy.sandbox.datasets.cifar10() # based off of architecture from "Scalable Bayesian Optimization Using # Deep Neural Networks" http://arxiv.org/abs/1502.05700 model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)), tn.DnnConv2DWithBiasNode("conv1", num_filters=96), tn.ReLUNode("relu1"), tn.DnnConv2DWithBiasNode("conv2", num_filters=96), tn.ReLUNode("relu2"), tn.MaxPool2DNode("mp1"), tn.DropoutNode("do1", dropout_probability=0.1), tn.DnnConv2DWithBiasNode("conv3", num_filters=192), tn.ReLUNode("relu3"), tn.DnnConv2DWithBiasNode("conv4", num_filters=192), tn.ReLUNode("relu4"), tn.DnnConv2DWithBiasNode("conv5", num_filters=192), tn.ReLUNode("relu5"), tn.MaxPool2DNode("mp2"), tn.DropoutNode("do2", dropout_probability=0.5), tn.DnnConv2DWithBiasNode("conv6", num_filters=192), tn.ReLUNode("relu6"), tn.DnnConv2DWithBiasNode("conv7", num_filters=192, filter_size=(1, 1)),
# - ReLU # - 50% dropout # - fully connected 512 units # - ReLU # - 50% dropout # - fully connected 10 units # - softmax # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.ReLUNode("relu2"), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), ]), num_units=512, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode(
def vgg_16_nodes(conv_only): """ conv_only: whether or not to only return conv layers (before FC layers) """ assert conv_only return tn.HyperparameterNode( "vgg16", tn.SequentialNode( "vgg16_seq", [ tn.HyperparameterNode( "conv_group_1", tn.SequentialNode("conv_group_1_seq", [ tn.DnnConv2DWithBiasNode("conv1_1"), tn.ReLUNode("relu1_1"), tn.DnnConv2DWithBiasNode("conv1_2"), tn.ReLUNode("relu1_2") ]), num_filters=64), tn.MaxPool2DNode("pool1"), tn.HyperparameterNode( "conv_group_2", tn.SequentialNode("conv_group_2_seq", [ tn.DnnConv2DWithBiasNode("conv2_1"), tn.ReLUNode("relu2_1"), tn.DnnConv2DWithBiasNode("conv2_2"), tn.ReLUNode("relu2_2") ]), num_filters=128), tn.MaxPool2DNode("pool2"), tn.HyperparameterNode( "conv_group_3", tn.SequentialNode("conv_group_3_seq", [ tn.DnnConv2DWithBiasNode("conv3_1"), tn.ReLUNode("relu3_1"), tn.DnnConv2DWithBiasNode("conv3_2"), tn.ReLUNode("relu3_2"), tn.DnnConv2DWithBiasNode("conv3_3"), tn.ReLUNode("relu3_3") ]), num_filters=256), tn.MaxPool2DNode("pool3"), tn.HyperparameterNode( "conv_group_4", tn.SequentialNode("conv_group_4_seq", [ tn.DnnConv2DWithBiasNode("conv4_1"), tn.ReLUNode("relu4_1"), tn.DnnConv2DWithBiasNode("conv4_2"), tn.ReLUNode("relu4_2"), tn.DnnConv2DWithBiasNode("conv4_3"), tn.ReLUNode("relu4_3") ]), num_filters=512), tn.MaxPool2DNode("pool4"), tn.HyperparameterNode( "conv_group_5", tn.SequentialNode("conv_group_5_seq", [ tn.DnnConv2DWithBiasNode("conv5_1"), tn.ReLUNode("relu5_1"), tn.DnnConv2DWithBiasNode("conv5_2"), tn.ReLUNode("relu5_2"), tn.DnnConv2DWithBiasNode("conv5_3"), tn.ReLUNode("relu5_3") ]), num_filters=512), tn.MaxPool2DNode("pool5"), # TODO add dense nodes ]), pad="same", filter_size=(3, 3), pool_size=(2, 2), # VGG net uses cross-correlation by default conv_mode="cross", )
in_train = {"x": X_train, "y": y_train} in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1, 28, 28)), inception.InceptionNode("i1"), tn.DnnMaxPoolNode("mp1"), bn.BatchNormalizationNode("bn1"), inception.InceptionNode("i2"), tn.DnnMaxPoolNode("mp2"), bn.BatchNormalizationNode("bn2"), tn.DenseNode("fc1"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters_1x1=32, num_filters_3x3reduce=16, num_filters_3x3=32, num_filters_5x5reduce=16, num_filters_5x5=32, num_filters_poolproj=32, pool_size=(2, 2), num_units=32, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode(
UPDATE_SCALE_FACTOR = 1.0 MAX_ITERS = 100 BATCH_SIZE = 500 train, valid, _ = canopy.sandbox.datasets.cluttered_mnist() # ############################## prepare model ############################## localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode("loc_seq", [ tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.SimpleBatchNormalizationNode("loc_bn2"), tn.SpatialSoftmaxNode("loc_spatial_softmax"), spatial_attention.SpatialFeaturePointNode("loc_feature_point"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)]) ]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), )
# ############################### prepare data ############################### train, valid, test = canopy.sandbox.datasets.mnist() # ############################## prepare model ############################## groups = 3 blocks_per_group = 5 num_layers = 2 num_filters = 16 nodes = [ tn.InputNode("x", shape=(None, 1, 28, 28)), tn.Conv2DNode("conv1", num_filters=num_filters), bn.BatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), ] for group in range(groups): for block in range(blocks_per_group): if group != 0 and block == 0: num_filters *= 2 nodes.append( resnet.residual_block_conv_2d("resblock_%d_%d" % (group, block), num_filters=num_filters, num_layers=num_layers, increase_dim="projection")) else: nodes.append( resnet.residual_block_conv_2d("resblock_%d_%d" %
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network
def test_relu_node_serialization(): tn.check_serialization(tn.ReLUNode("a"))
X_train, X_valid, y_train, y_valid = sklearn.cross_validation.train_test_split( X, y, random_state=42) in_train = {"x": X_train, "y": y_train} in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## highway_layers = [] for i in range(50): highway_layers.append( highway.HighwayDenseNode( "highway%d" % i, tn.SequentialNode( "seq%d" % i, [ tn.ReLUNode("relu%d" % i), # tn.DropoutNode("do%d" % i) ]))) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [ tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("in_dense"), tn.ReLUNode("in_relu"), # tn.DropoutNode("in_do") ] + highway_layers + [tn.DenseNode("out_dense", num_units=10), tn.SoftmaxNode("pred")]),
# ############################### prepare data ############################### import du train, valid = du.tasks.image_tasks.svhn(fX) # ############################## prepare model ############################## # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 3, 32, 32)), tn.Conv2DWithBiasNode("conv1a"), bn.BatchNormalizationNode("bn1a"), tn.ReLUNode("relu1a"), tn.Conv2DWithBiasNode("conv1"), bn.BatchNormalizationNode("bn1"), tn.MaxPool2DNode("mp1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2a"), bn.BatchNormalizationNode("bn2a"), tn.ReLUNode("relu2a"), tn.Conv2DWithBiasNode("conv2"), bn.BatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.MaxPool2DNode("mp2"), tn.DenseNode("fc1"), bn.BatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10),