def architecture_children(self): return [ tn.AuxiliaryCostNode(self.name + "_auxiliary", { "target": self.raw_children()["target"], "pre_cost": tn.SequentialNode(self.name + "_sequential", [ tn.DenseNode(self.name + "_dense"), tn.SoftmaxNode(self.name + "_softmax") ]) }, cost_function=T.nnet.categorical_crossentropy) ]
def test_affine_spatial_transformer_node_build(): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DenseNode("loc_fc1", num_units=50), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.ZeroInit()])]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), ) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), spatial_transformer.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, ) network = with_updates.network() network.build() # build eagerly to share weights
tn.DropoutNode("do1", dropout_probability=0.1), tn.DnnConv2DWithBiasNode("conv3", num_filters=192), tn.ReLUNode("relu3"), tn.DnnConv2DWithBiasNode("conv4", num_filters=192), tn.ReLUNode("relu4"), tn.DnnConv2DWithBiasNode("conv5", num_filters=192), tn.ReLUNode("relu5"), tn.MaxPool2DNode("mp2"), tn.DropoutNode("do2", dropout_probability=0.5), tn.DnnConv2DWithBiasNode("conv6", num_filters=192), tn.ReLUNode("relu6"), tn.DnnConv2DWithBiasNode("conv7", num_filters=192, filter_size=(1, 1)), tn.ReLUNode("relu7"), tn.DnnConv2DWithBiasNode("conv8", num_filters=10, filter_size=(1, 1)), tn.GlobalMeanPool2DNode("mean_pool"), tn.SoftmaxNode("pred"), ]), filter_size=(3, 3), conv_pad="same", pool_size=(3, 3), pool_stride=(2, 2), pool_pad=(1, 1), inits=[treeano.inits.OrthogonalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree": model,
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network
"model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), # tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.ReLUNode("relu2"), # tn.DropoutNode("do2"), tn.ConcatenateNode( "concat", [tn.SequentialNode( "y_vars", [tn.DenseNode("fc_y", num_units=10), tn.SoftmaxNode("y_pred"), tn.AuxiliaryCostNode( "classification_cost", {"target": tn.InputNode("y", shape=(None,), dtype="int32")}, cost_function=treeano.utils.categorical_crossentropy_i32)]), tn.SequentialNode( "z_vars", [tn.DenseNode("fc_z", num_units=LATENT_SIZE), tn.AuxiliaryCostNode( "xcov_cost", {"target": tn.ReferenceNode("y_ref", reference="y_pred")}, cost_function=cross_covariance)])], axis=1),
def test_softmax_node_serialization(): tn.check_serialization(tn.SoftmaxNode("a"))