def architecture_children(self): children = self.raw_children() gate = children["gate"] transform = children["transform"] # prepare gates transform_gate = tn.SequentialNode( self.name + "_transformgate", [ gate, # add initial value as bias instead # TODO parameterize tn.AddConstantNode(self.name + "_biastranslation", value=-4), tn.SigmoidNode(self.name + "_transformgatesigmoid") ]) # carry gate = 1 - transform gate carry_gate = tn.SequentialNode(self.name + "_carrygate", [ tn.ReferenceNode(self.name + "_transformgateref", reference=transform_gate.name), tn.MultiplyConstantNode(self.name + "_invert", value=-1), tn.AddConstantNode(self.name + "_add", value=1) ]) # combine with gates gated_transform = tn.ElementwiseProductNode( self.name + "_gatedtransform", [transform_gate, transform]) gated_carry = tn.ElementwiseProductNode( self.name + "_gatedcarry", [carry_gate, tn.IdentityNode(self.name + "_carry")]) res = tn.ElementwiseSumNode(self.name + "_res", [gated_carry, gated_transform]) return [res]
def architecture_children(self): # TODO set LRN n = num_filters / 8 + 1 nodes = [ # NOTE: not explicitly giving the first conv a pad of "same", # since the first conv can have any output shape tn.DnnConv2DWithBiasNode(self.name + "_conv0"), tn.IdentityNode(self.name + "_z0"), tn.ReLUNode(self.name + "_z0_relu"), lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"), tn.IdentityNode(self.name + "_x0"), ] for t in range(1, self.steps + 1): nodes += [ tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t, stride=(1, 1), pad="same"), tn.ElementwiseSumNode(self.name + "_sum%d" % t, [ tn.ReferenceNode(self.name + "_sum%d_curr" % t, reference=self.name + "_conv%d" % t), tn.ReferenceNode(self.name + "_sum%d_prev" % t, reference=self.name + "_z0") ]), tn.IdentityNode(self.name + "_z%d" % t), tn.ReLUNode(self.name + "_z%d_relu" % t), lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t), tn.IdentityNode(self.name + "_x%d" % t), ] return [tn.SequentialNode(self.name + "_sequential", nodes)]
def architecture_children(self): gate_node = tn.SequentialNode( self.name + "_gate_seq", [ batch_fold.AddAxisNode(self.name + "_add_axis", axis=2), batch_fold.FoldUnfoldAxisIntoBatchNode( self.name + "_batch_fold", # NOTE: using dnn conv, since pooling is normally strided # and the normal conv is slow with strides tn.DnnConv2DWithBiasNode(self.name + "_conv", num_filters=1), axis=1), batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2), tn.SigmoidNode(self.name + "_gate_sigmoid") ]) inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [ tn.ReferenceNode(self.name + "_gate_ref", reference=gate_node.name), tn.MultiplyConstantNode(self.name + "_", value=-1), tn.AddConstantNode(self.name + "_add1", value=1) ]) mean_node = tn.ElementwiseProductNode( self.name + "_mean_product", [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node]) max_node = tn.ElementwiseProductNode( self.name + "_max_product", [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node]) return [ tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node]) ]
def architecture_children(self): mean_seq_node = tn.SequentialNode(self.name + "_mean_seq", [ tn.DnnMeanPoolNode(self.name + "_mean_pool"), tn.MultiplyConstantNode(self.name + "_mean_const_mult") ]) max_seq_node = tn.SequentialNode(self.name + "_max_seq", [ tn.DnnMaxPoolNode(self.name + "_max_pool"), tn.MultiplyConstantNode(self.name + "_max_const_mult") ]) return [ tn.ElementwiseSumNode(self.name + "_sum_mixed", [max_seq_node, mean_seq_node]) ]
def test_elementwise_sum_node(): for s in [(), (3, 4, 5)]: network = tn.ElementwiseSumNode( "es", [tn.InputNode("i1", shape=s), tn.InputNode("i2", shape=s), tn.InputNode("i3", shape=s)], ).network() fn = network.function(["i1", "i2", "i3"], ["es"]) i1 = np.array(np.random.rand(*s), dtype=fX) i2 = np.array(np.random.rand(*s), dtype=fX) i3 = np.array(np.random.rand(*s), dtype=fX) np.testing.assert_allclose(i1 + i2 + i3, fn(i1, i2, i3)[0], rtol=1e-5)
def test_dense_node_and_dense_combine_node2(): # testing that summing the output of 2 dense nodes is the same as # applying a dense combine node with 2 identities (+ bias) # and the same as multiplying the output of 1 dense node by 2 network0 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("dense1", num_units=6), tn.MultiplyConstantNode("mul", value=2) ]), inits=[treeano.inits.ConstantInit(1)]).network() network1 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.ElementwiseSumNode("sum", [ tn.DenseNode("dense1", num_units=6), tn.DenseNode("dense2", num_units=6) ]) ]), inits=[treeano.inits.ConstantInit(1)]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc", [tn.IdentityNode("i1"), tn.IdentityNode("i2")], num_units=6), tn.AddBiasNode("bias") ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn0 = network0.function(["in"], ["hp"]) fn1 = network1.function(["in"], ["hp"]) fn2 = network2.function(["in"], ["hp"]) np.testing.assert_allclose(fn0(x), fn1(x)) np.testing.assert_allclose(fn0(x), fn2(x))
def generalized_residual(name, nodes, identity_ratio=0.5): return tn.ElementwiseSumNode(name, [ _ZeroLastAxisPartitionNode(name + "_zero", zero_ratio=(1 - identity_ratio)), tn.SequentialNode(name + "_seq", nodes) ])
def preactivation_residual_block_conv_2d(name, num_filters, num_layers, increase_dim=None, initial_block=False, conv_node=tn.Conv2DNode, bn_node=bn.BatchNormalizationNode, activation_node=tn.ReLUNode, input_num_filters=None, projection_filter_size=(1, 1), increase_dim_stride=(2, 2), no_identity=False): """ from http://arxiv.org/abs/1603.05027 """ if increase_dim is not None: assert increase_dim in {"projection", "pad"} first_stride = increase_dim_stride if increase_dim == "projection": # TODO remove pre-activation when initial block assert not initial_block identity_node = tn.SequentialNode(name + "_projection", [ bn_node(name + "_projectionbn"), activation_node(name + "_projectionactivation"), tn.Conv2DNode(name + "_projectionconv", num_filters=num_filters, filter_size=projection_filter_size, stride=first_stride, pad="same"), ]) elif increase_dim == "pad": assert input_num_filters is not None identity_node = tn.SequentialNode(name + "_pad", [ StridedDownsampleNode(name + "_stride", strides=(1, 1) + first_stride), tn.PadNode( name + "_addpad", padding=(0, (num_filters - input_num_filters) // 2, 0, 0)) ]) else: first_stride = (1, 1) identity_node = tn.IdentityNode(name + "_identity") nodes = [] # first node for i in range(num_layers): if i == 0: # first conv # --- # maybe remove initial activation if not initial_block: nodes += [ bn_node(name + "_bn%d" % i), activation_node(name + "_activation%d" % i), ] # same as middle convs, but with stride nodes += [ conv_node(name + "_conv%d" % i, num_filters=num_filters, stride=first_stride, pad="same"), ] else: nodes += [ bn_node(name + "_bn%d" % i), activation_node(name + "_activation%d" % i), conv_node(name + "_conv%d" % i, num_filters=num_filters, stride=(1, 1), pad="same"), ] residual_node = tn.SequentialNode(name + "_seq", nodes) if no_identity: # ability to disable resnet connections return residual_node else: return tn.ElementwiseSumNode(name, [identity_node, residual_node])
def test_elementwise_sum_node_serialization(): tn.check_serialization(tn.ElementwiseSumNode("a", [])) tn.check_serialization(tn.ElementwiseSumNode( "a", [tn.ElementwiseSumNode("b", []), tn.ElementwiseSumNode("c", [])]))