Beispiel #1
0
    def architecture_children(self):
        children = self.raw_children()
        gate = children["gate"]
        transform = children["transform"]

        # prepare gates
        transform_gate = tn.SequentialNode(
            self.name + "_transformgate",
            [
                gate,
                # add initial value as bias instead
                # TODO parameterize
                tn.AddConstantNode(self.name + "_biastranslation", value=-4),
                tn.SigmoidNode(self.name + "_transformgatesigmoid")
            ])
        # carry gate = 1 - transform gate
        carry_gate = tn.SequentialNode(self.name + "_carrygate", [
            tn.ReferenceNode(self.name + "_transformgateref",
                             reference=transform_gate.name),
            tn.MultiplyConstantNode(self.name + "_invert", value=-1),
            tn.AddConstantNode(self.name + "_add", value=1)
        ])

        # combine with gates
        gated_transform = tn.ElementwiseProductNode(
            self.name + "_gatedtransform", [transform_gate, transform])
        gated_carry = tn.ElementwiseProductNode(
            self.name + "_gatedcarry",
            [carry_gate, tn.IdentityNode(self.name + "_carry")])
        res = tn.ElementwiseSumNode(self.name + "_res",
                                    [gated_carry, gated_transform])
        return [res]
 def architecture_children(self):
     # TODO set LRN n = num_filters / 8 + 1
     nodes = [
         # NOTE: not explicitly giving the first conv a pad of "same",
         # since the first conv can have any output shape
         tn.DnnConv2DWithBiasNode(self.name + "_conv0"),
         tn.IdentityNode(self.name + "_z0"),
         tn.ReLUNode(self.name + "_z0_relu"),
         lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"),
         tn.IdentityNode(self.name + "_x0"),
     ]
     for t in range(1, self.steps + 1):
         nodes += [
             tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t,
                                      stride=(1, 1),
                                      pad="same"),
             tn.ElementwiseSumNode(self.name + "_sum%d" % t, [
                 tn.ReferenceNode(self.name + "_sum%d_curr" % t,
                                  reference=self.name + "_conv%d" % t),
                 tn.ReferenceNode(self.name + "_sum%d_prev" % t,
                                  reference=self.name + "_z0")
             ]),
             tn.IdentityNode(self.name + "_z%d" % t),
             tn.ReLUNode(self.name + "_z%d_relu" % t),
             lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t),
             tn.IdentityNode(self.name + "_x%d" % t),
         ]
     return [tn.SequentialNode(self.name + "_sequential", nodes)]
Beispiel #3
0
    def architecture_children(self):
        gate_node = tn.SequentialNode(
            self.name + "_gate_seq",
            [
                batch_fold.AddAxisNode(self.name + "_add_axis", axis=2),
                batch_fold.FoldUnfoldAxisIntoBatchNode(
                    self.name + "_batch_fold",
                    # NOTE: using dnn conv, since pooling is normally strided
                    # and the normal conv is slow with strides
                    tn.DnnConv2DWithBiasNode(self.name + "_conv",
                                             num_filters=1),
                    axis=1),
                batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2),
                tn.SigmoidNode(self.name + "_gate_sigmoid")
            ])

        inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [
            tn.ReferenceNode(self.name + "_gate_ref",
                             reference=gate_node.name),
            tn.MultiplyConstantNode(self.name + "_", value=-1),
            tn.AddConstantNode(self.name + "_add1", value=1)
        ])

        mean_node = tn.ElementwiseProductNode(
            self.name + "_mean_product",
            [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node])

        max_node = tn.ElementwiseProductNode(
            self.name + "_max_product",
            [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node])

        return [
            tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node])
        ]
Beispiel #4
0
    def architecture_children(self):
        mean_seq_node = tn.SequentialNode(self.name + "_mean_seq", [
            tn.DnnMeanPoolNode(self.name + "_mean_pool"),
            tn.MultiplyConstantNode(self.name + "_mean_const_mult")
        ])

        max_seq_node = tn.SequentialNode(self.name + "_max_seq", [
            tn.DnnMaxPoolNode(self.name + "_max_pool"),
            tn.MultiplyConstantNode(self.name + "_max_const_mult")
        ])

        return [
            tn.ElementwiseSumNode(self.name + "_sum_mixed",
                                  [max_seq_node, mean_seq_node])
        ]
Beispiel #5
0
def test_elementwise_sum_node():
    for s in [(),
              (3, 4, 5)]:
        network = tn.ElementwiseSumNode(
            "es",
            [tn.InputNode("i1", shape=s),
             tn.InputNode("i2", shape=s),
             tn.InputNode("i3", shape=s)],
        ).network()
        fn = network.function(["i1", "i2", "i3"], ["es"])
        i1 = np.array(np.random.rand(*s), dtype=fX)
        i2 = np.array(np.random.rand(*s), dtype=fX)
        i3 = np.array(np.random.rand(*s), dtype=fX)
        np.testing.assert_allclose(i1 + i2 + i3,
                                   fn(i1, i2, i3)[0],
                                   rtol=1e-5)
Beispiel #6
0
def test_dense_node_and_dense_combine_node2():
    # testing that summing the output of 2 dense nodes is the same as
    # applying a dense combine node with 2 identities (+ bias)
    # and the same as multiplying the output of 1 dense node by 2
    network0 = tn.HyperparameterNode(
        "hp",
        tn.SequentialNode("seq", [
            tn.InputNode("in", shape=(3, 4, 5)),
            tn.DenseNode("dense1", num_units=6),
            tn.MultiplyConstantNode("mul", value=2)
        ]),
        inits=[treeano.inits.ConstantInit(1)]).network()
    network1 = tn.HyperparameterNode(
        "hp",
        tn.SequentialNode("seq", [
            tn.InputNode("in", shape=(3, 4, 5)),
            tn.ElementwiseSumNode("sum", [
                tn.DenseNode("dense1", num_units=6),
                tn.DenseNode("dense2", num_units=6)
            ])
        ]),
        inits=[treeano.inits.ConstantInit(1)]).network()
    network2 = tn.HyperparameterNode(
        "hp",
        tn.SequentialNode("seq", [
            tn.InputNode("in", shape=(3, 4, 5)),
            tn.DenseCombineNode("fc",
                                [tn.IdentityNode("i1"),
                                 tn.IdentityNode("i2")],
                                num_units=6),
            tn.AddBiasNode("bias")
        ]),
        inits=[treeano.inits.ConstantInit(1)]).network()
    x = np.random.randn(3, 4, 5).astype(fX)
    fn0 = network0.function(["in"], ["hp"])
    fn1 = network1.function(["in"], ["hp"])
    fn2 = network2.function(["in"], ["hp"])
    np.testing.assert_allclose(fn0(x), fn1(x))
    np.testing.assert_allclose(fn0(x), fn2(x))
Beispiel #7
0
def generalized_residual(name, nodes, identity_ratio=0.5):
    return tn.ElementwiseSumNode(name, [
        _ZeroLastAxisPartitionNode(name + "_zero",
                                   zero_ratio=(1 - identity_ratio)),
        tn.SequentialNode(name + "_seq", nodes)
    ])
Beispiel #8
0
def preactivation_residual_block_conv_2d(name,
                                         num_filters,
                                         num_layers,
                                         increase_dim=None,
                                         initial_block=False,
                                         conv_node=tn.Conv2DNode,
                                         bn_node=bn.BatchNormalizationNode,
                                         activation_node=tn.ReLUNode,
                                         input_num_filters=None,
                                         projection_filter_size=(1, 1),
                                         increase_dim_stride=(2, 2),
                                         no_identity=False):
    """
    from http://arxiv.org/abs/1603.05027
    """
    if increase_dim is not None:
        assert increase_dim in {"projection", "pad"}
        first_stride = increase_dim_stride
        if increase_dim == "projection":
            # TODO remove pre-activation when initial block
            assert not initial_block
            identity_node = tn.SequentialNode(name + "_projection", [
                bn_node(name + "_projectionbn"),
                activation_node(name + "_projectionactivation"),
                tn.Conv2DNode(name + "_projectionconv",
                              num_filters=num_filters,
                              filter_size=projection_filter_size,
                              stride=first_stride,
                              pad="same"),
            ])
        elif increase_dim == "pad":
            assert input_num_filters is not None
            identity_node = tn.SequentialNode(name + "_pad", [
                StridedDownsampleNode(name + "_stride",
                                      strides=(1, 1) + first_stride),
                tn.PadNode(
                    name + "_addpad",
                    padding=(0, (num_filters - input_num_filters) // 2, 0, 0))
            ])
    else:
        first_stride = (1, 1)
        identity_node = tn.IdentityNode(name + "_identity")

    nodes = []
    # first node
    for i in range(num_layers):
        if i == 0:
            # first conv
            # ---
            # maybe remove initial activation
            if not initial_block:
                nodes += [
                    bn_node(name + "_bn%d" % i),
                    activation_node(name + "_activation%d" % i),
                ]
            # same as middle convs, but with stride
            nodes += [
                conv_node(name + "_conv%d" % i,
                          num_filters=num_filters,
                          stride=first_stride,
                          pad="same"),
            ]
        else:
            nodes += [
                bn_node(name + "_bn%d" % i),
                activation_node(name + "_activation%d" % i),
                conv_node(name + "_conv%d" % i,
                          num_filters=num_filters,
                          stride=(1, 1),
                          pad="same"),
            ]

    residual_node = tn.SequentialNode(name + "_seq", nodes)

    if no_identity:
        # ability to disable resnet connections
        return residual_node
    else:
        return tn.ElementwiseSumNode(name, [identity_node, residual_node])
Beispiel #9
0
def test_elementwise_sum_node_serialization():
    tn.check_serialization(tn.ElementwiseSumNode("a", []))
    tn.check_serialization(tn.ElementwiseSumNode(
        "a",
        [tn.ElementwiseSumNode("b", []),
         tn.ElementwiseSumNode("c", [])]))