Exemple #1
0
    def architecture_children(self):
        gate_node = tn.SequentialNode(
            self.name + "_gate_seq",
            [
                batch_fold.AddAxisNode(self.name + "_add_axis", axis=2),
                batch_fold.FoldUnfoldAxisIntoBatchNode(
                    self.name + "_batch_fold",
                    # NOTE: using dnn conv, since pooling is normally strided
                    # and the normal conv is slow with strides
                    tn.DnnConv2DWithBiasNode(self.name + "_conv",
                                             num_filters=1),
                    axis=1),
                batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2),
                tn.SigmoidNode(self.name + "_gate_sigmoid")
            ])

        inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [
            tn.ReferenceNode(self.name + "_gate_ref",
                             reference=gate_node.name),
            tn.MultiplyConstantNode(self.name + "_", value=-1),
            tn.AddConstantNode(self.name + "_add1", value=1)
        ])

        mean_node = tn.ElementwiseProductNode(
            self.name + "_mean_product",
            [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node])

        max_node = tn.ElementwiseProductNode(
            self.name + "_max_product",
            [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node])

        return [
            tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node])
        ]
def test_max_pool_2d_node():
    network = tn.SequentialNode("s", [
        tn.InputNode("i", shape=(1, 1, 4, 4)),
        tn.MaxPool2DNode("m", pool_size=(2, 2))
    ]).network()
    fn = network.function(["i"], ["m"])
    x = np.arange(16).astype(fX).reshape(1, 1, 4, 4)
    ans = np.array([[[[5, 7], [13, 15]]]], dtype=fX)
    np.testing.assert_equal(ans, fn(x)[0])
    nt.assert_equal(ans.shape, network["m"].get_vw("default").shape)
Exemple #3
0
def MultiPool2DNode(name, **kwargs):
    # TODO tests
    # TODO make a node that verifies hyperparameters
    return tn.HyperparameterNode(
        name,
        tn.ConcatenateNode(name + "_concat", [
            tn.SequentialNode(name + "_seq0", [
                PartitionAxisNode(name + "_part0", split_idx=0, num_splits=2),
                tn.MaxPool2DNode(name + "_max", ignore_border=True)
            ]),
            tn.SequentialNode(name + "_seq1", [
                PartitionAxisNode(name + "_part1", split_idx=1, num_splits=2),
                tn.MeanPool2DNode(name + "_mean")
            ])
        ]), **kwargs)
Exemple #4
0
def test_inverse_node():
    network = tn.SequentialNode(
        "s",
        [tn.InputNode("i", shape=(1, 1, 2, 2)),
         tn.MaxPool2DNode("m", pool_size=(2, 2)),
         tn.InputNode("i2", shape=(1, 1, 1, 1)),
         inverse.InverseNode("in", reference="m")]
    ).network()
    fn = network.function(["i", "i2"], ["in"])
    x = np.array([[[[1, 2],
                    [3, 4]]]],
                 dtype=fX)
    x2 = np.array(np.random.randn(), dtype=fX)
    ans = x2 * np.array([[[[0, 0],
                           [0, 1]]]],
                        dtype=fX)

    np.testing.assert_equal(ans, fn(x, x2.reshape(1, 1, 1, 1))[0])
Exemple #5
0
def pool_with_projection_2d(name,
                            projection_filters,
                            stride=(2, 2),
                            filter_size=(3, 3),
                            bn_node=bn.BatchNormalizationNode):

    pool_node = tn.MaxPool2DNode(name + "_pool",
                                 pool_size=stride,
                                 stride=stride)

    projection_node = tn.SequentialNode(name + "_projection", [
        tn.Conv2DNode(name + "_projectionconv",
                      num_filters=projection_filters,
                      filter_size=filter_size,
                      stride=stride,
                      pad="same"),
        bn_node(name + "_projectionbn")
    ])

    return tn.ConcatenateNode(name, [pool_node, projection_node])
Exemple #6
0
fX = theano.config.floatX
BATCH_SIZE = 256
train, valid, test = canopy.sandbox.datasets.cifar10()

# based off of architecture from "Scalable Bayesian Optimization Using
# Deep Neural Networks" http://arxiv.org/abs/1502.05700
model = tn.HyperparameterNode(
    "model",
    tn.SequentialNode("seq", [
        tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)),
        tn.DnnConv2DWithBiasNode("conv1", num_filters=96),
        tn.ReLUNode("relu1"),
        tn.DnnConv2DWithBiasNode("conv2", num_filters=96),
        tn.ReLUNode("relu2"),
        tn.MaxPool2DNode("mp1"),
        tn.DropoutNode("do1", dropout_probability=0.1),
        tn.DnnConv2DWithBiasNode("conv3", num_filters=192),
        tn.ReLUNode("relu3"),
        tn.DnnConv2DWithBiasNode("conv4", num_filters=192),
        tn.ReLUNode("relu4"),
        tn.DnnConv2DWithBiasNode("conv5", num_filters=192),
        tn.ReLUNode("relu5"),
        tn.MaxPool2DNode("mp2"),
        tn.DropoutNode("do2", dropout_probability=0.5),
        tn.DnnConv2DWithBiasNode("conv6", num_filters=192),
        tn.ReLUNode("relu6"),
        tn.DnnConv2DWithBiasNode("conv7", num_filters=192, filter_size=(1, 1)),
        tn.ReLUNode("relu7"),
        tn.DnnConv2DWithBiasNode("conv8", num_filters=10, filter_size=(1, 1)),
        tn.GlobalMeanPool2DNode("mean_pool"),
Exemple #7
0
def vgg_16_nodes(conv_only):
    """
    conv_only:
    whether or not to only return conv layers (before FC layers)
    """
    assert conv_only

    return tn.HyperparameterNode(
        "vgg16",
        tn.SequentialNode(
            "vgg16_seq",
            [
                tn.HyperparameterNode(
                    "conv_group_1",
                    tn.SequentialNode("conv_group_1_seq", [
                        tn.DnnConv2DWithBiasNode("conv1_1"),
                        tn.ReLUNode("relu1_1"),
                        tn.DnnConv2DWithBiasNode("conv1_2"),
                        tn.ReLUNode("relu1_2")
                    ]),
                    num_filters=64),
                tn.MaxPool2DNode("pool1"),
                tn.HyperparameterNode(
                    "conv_group_2",
                    tn.SequentialNode("conv_group_2_seq", [
                        tn.DnnConv2DWithBiasNode("conv2_1"),
                        tn.ReLUNode("relu2_1"),
                        tn.DnnConv2DWithBiasNode("conv2_2"),
                        tn.ReLUNode("relu2_2")
                    ]),
                    num_filters=128),
                tn.MaxPool2DNode("pool2"),
                tn.HyperparameterNode(
                    "conv_group_3",
                    tn.SequentialNode("conv_group_3_seq", [
                        tn.DnnConv2DWithBiasNode("conv3_1"),
                        tn.ReLUNode("relu3_1"),
                        tn.DnnConv2DWithBiasNode("conv3_2"),
                        tn.ReLUNode("relu3_2"),
                        tn.DnnConv2DWithBiasNode("conv3_3"),
                        tn.ReLUNode("relu3_3")
                    ]),
                    num_filters=256),
                tn.MaxPool2DNode("pool3"),
                tn.HyperparameterNode(
                    "conv_group_4",
                    tn.SequentialNode("conv_group_4_seq", [
                        tn.DnnConv2DWithBiasNode("conv4_1"),
                        tn.ReLUNode("relu4_1"),
                        tn.DnnConv2DWithBiasNode("conv4_2"),
                        tn.ReLUNode("relu4_2"),
                        tn.DnnConv2DWithBiasNode("conv4_3"),
                        tn.ReLUNode("relu4_3")
                    ]),
                    num_filters=512),
                tn.MaxPool2DNode("pool4"),
                tn.HyperparameterNode(
                    "conv_group_5",
                    tn.SequentialNode("conv_group_5_seq", [
                        tn.DnnConv2DWithBiasNode("conv5_1"),
                        tn.ReLUNode("relu5_1"),
                        tn.DnnConv2DWithBiasNode("conv5_2"),
                        tn.ReLUNode("relu5_2"),
                        tn.DnnConv2DWithBiasNode("conv5_3"),
                        tn.ReLUNode("relu5_3")
                    ]),
                    num_filters=512),
                tn.MaxPool2DNode("pool5"),
                # TODO add dense nodes
            ]),
        pad="same",
        filter_size=(3, 3),
        pool_size=(2, 2),
        # VGG net uses cross-correlation by default
        conv_mode="cross",
    )
Exemple #8
0
def load_network(update_scale_factor):
    localization_network = tn.HyperparameterNode(
        "loc",
        tn.SequentialNode(
            "loc_seq",
            [tn.DnnMaxPoolNode("loc_pool1"),
             tn.DnnConv2DWithBiasNode("loc_conv1"),
             tn.DnnMaxPoolNode("loc_pool2"),
             bn.NoScaleBatchNormalizationNode("loc_bn1"),
             tn.ReLUNode("loc_relu1"),
             tn.DnnConv2DWithBiasNode("loc_conv2"),
             bn.NoScaleBatchNormalizationNode("loc_bn2"),
             tn.ReLUNode("loc_relu2"),
             tn.DenseNode("loc_fc1", num_units=50),
             bn.NoScaleBatchNormalizationNode("loc_bn3"),
             tn.ReLUNode("loc_relu3"),
             tn.DenseNode("loc_fc2",
                          num_units=6,
                          inits=[treeano.inits.NormalWeightInit(std=0.001)])]),
        num_filters=20,
        filter_size=(5, 5),
        pool_size=(2, 2),
    )

    st_node = st.AffineSpatialTransformerNode(
        "st",
        localization_network,
        output_shape=(20, 20))

    model = tn.HyperparameterNode(
        "model",
        tn.SequentialNode(
            "seq",
            [tn.InputNode("x", shape=(None, 1, 60, 60)),
             # scaling the updates of the spatial transformer
             # seems to be very helpful, to allow the clasification
             # net to learn what to look for, before prematurely
             # looking
             tn.UpdateScaleNode(
                 "st_update_scale",
                 st_node,
                 update_scale_factor=update_scale_factor),
             tn.Conv2DWithBiasNode("conv1"),
             tn.MaxPool2DNode("mp1"),
             bn.NoScaleBatchNormalizationNode("bn1"),
             tn.ReLUNode("relu1"),
             tn.Conv2DWithBiasNode("conv2"),
             tn.MaxPool2DNode("mp2"),
             bn.NoScaleBatchNormalizationNode("bn2"),
             tn.ReLUNode("relu2"),
             tn.GaussianDropoutNode("do1"),
             tn.DenseNode("fc1"),
             bn.NoScaleBatchNormalizationNode("bn3"),
             tn.ReLUNode("relu3"),
             tn.DenseNode("fc2", num_units=10),
             tn.SoftmaxNode("pred"),
             ]),
        num_filters=32,
        filter_size=(3, 3),
        pool_size=(2, 2),
        num_units=256,
        dropout_probability=0.5,
        inits=[treeano.inits.HeUniformInit()],
        bn_update_moving_stats=True,
    )

    with_updates = tn.HyperparameterNode(
        "with_updates",
        tn.AdamNode(
            "adam",
            {"subtree": model,
             "cost": tn.TotalCostNode("cost", {
                 "pred": tn.ReferenceNode("pred_ref", reference="model"),
                 "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             )}),
        cost_function=treeano.utils.categorical_crossentropy_i32,
        learning_rate=2e-3,
    )
    network = with_updates.network()
    network.build()  # build eagerly to share weights
    return network
import numpy as np
import theano
import theano.tensor as T
import treeano.nodes as tn
from treeano.sandbox.nodes import fmp
fX = theano.config.floatX

# TODO change me
node = "fmp2"
compute_grad = True

if node == "mp":
    n = tn.MaxPool2DNode("mp", pool_size=(2, 2))
elif node == "fmp":
    n = fmp.DisjointPseudorandomFractionalMaxPool2DNode("fmp1",
                                                        fmp_alpha=1.414,
                                                        fmp_u=0.5)
elif node == "fmp2":
    n = fmp.OverlappingRandomFractionalMaxPool2DNode("fmp2",
                                                     pool_size=(1.414, 1.414))
else:
    assert False

network = tn.SequentialNode(
    "s", [tn.InputNode("i", shape=(1, 1, 32, 32)), n]).network()

if compute_grad:
    i = network["i"].get_vw("default").variable
    s = network["s"].get_vw("default").variable
    fn = network.function(["i"], [T.grad(s.sum(), i)])
else: