def irregular_length_attention_node(name, lengths_reference, num_units, output_units=None): """ NOTE: if output_units is not None, this should be the number of input units """ value_branch = UngroupIrregularLengthTensorsNode( name + "_ungroup_values", lengths_reference=lengths_reference) fc2_units = 1 if output_units is None else output_units attention_nodes = [ tn.DenseNode(name + "_fc1", num_units=num_units), tn.ScaledTanhNode(name + "_tanh"), tn.DenseNode(name + "_fc2", num_units=fc2_units), UngroupIrregularLengthTensorsNode(name + "_ungroup_attention", lengths_reference=lengths_reference), _IrregularLengthAttentionSoftmaxNode( name + "_softmax", lengths_reference=lengths_reference), ] if output_units is None: attention_nodes += [ tn.AddBroadcastNode(name + "_bcast", axes=(2, )), ] attention_branch = tn.SequentialNode(name + "_attention", attention_nodes) return tn.SequentialNode(name, [ tn.ElementwiseProductNode(name + "_prod", [value_branch, attention_branch]), tn.SumNode(name + "_sum", axis=1) ])
def test_dense_node_and_dense_combine_node1(): # testing that dense node and dense combine node with identity child # return the same thing network1 = tn.HyperparameterNode("hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("fc1", num_units=6), tn.DenseNode("fc2", num_units=7), tn.DenseNode("fc3", num_units=8) ]), inits=[treeano.inits.ConstantInit(1) ]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc1", [tn.IdentityNode("i1")], num_units=6), tn.DenseCombineNode("fc2", [tn.IdentityNode("i2")], num_units=7), tn.DenseCombineNode("fc3", [tn.IdentityNode("i3")], num_units=8) ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn1 = network1.function(["in"], ["fc3"]) fn2 = network2.function(["in"], ["fc3"]) np.testing.assert_allclose(fn1(x), fn2(x))
def test_dense_node(): network = tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("fc1", num_units=6), tn.DenseNode("fc2", num_units=7), tn.DenseNode("fc3", num_units=8) ]).network() x = np.random.randn(3, 4, 5).astype(fX) fn = network.function(["in"], ["fc3"]) res = fn(x)[0] nt.assert_equal(res.shape, (3, 8))
def HighwayDenseNode(name, nonlinearity_node, **hyperparameters): return tn.HyperparameterNode( name, HighwayNode( name + "_highway", { "transform": tn.SequentialNode(name + "_transform", [ tn.DenseNode(name + "_transformdense"), nonlinearity_node ]), "gate": tn.DenseNode(name + "_gatedense") }), **hyperparameters)
def test_affine_spatial_transformer_node_build(): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DenseNode("loc_fc1", num_units=50), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.ZeroInit()])]), num_filters=32, filter_size=(5, 5), pool_size=(2, 2), ) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), spatial_transformer.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), tn.DropoutNode("do1"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, ) network = with_updates.network() network.build() # build eagerly to share weights
def test_fold_unfold_axis_into_batch_node(): network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(2, 3, 4, 5)), bf.FoldUnfoldAxisIntoBatchNode( "fu1", tn.SequentialNode( "s2", [tn.IdentityNode("i1"), bf.FoldUnfoldAxisIntoBatchNode( "fu2", tn.SequentialNode( "s3", [tn.IdentityNode("i2"), tn.DenseNode("d", num_units=11)]), axis=1)]), axis=3)] ).network() fn = network.function(["i"], ["i1", "i2", "fu2", "fu1"]) x = np.zeros((2, 3, 4, 5), dtype=fX) i1, i2, fu2, fu1 = fn(x) nt.assert_equal((10, 3, 4), i1.shape) nt.assert_equal((30, 4), i2.shape) nt.assert_equal((10, 3, 11), fu2.shape) nt.assert_equal((2, 3, 11, 5), fu1.shape)
def test_batch_normalization_node(): network = tn.AdamNode( "adam", { "subtree": tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 10)), batch_normalization.BatchNormalizationNode("bn"), tn.DenseNode("d", num_units=1), ]), "cost": tn.TotalCostNode( "cost", { "target": tn.InputNode("y", shape=(None, 1)), "pred": tn.ReferenceNode("pred_ref", reference="d"), }, cost_function=treeano.utils.squared_error) }).network() fn = network.function(["x", "y"], ["cost"], include_updates=True) x = 100 + 100 * np.random.randn(100, 10).astype(fX) y = np.random.randn(100, 1).astype(fX) prev_cost = fn(x, y)[0] for _ in range(3): cost = fn(x, y)[0] assert cost < prev_cost prev_cost = cost
def test_elementwise_kl_sparsity_penalty_node2(): # just testing that it runs network = tn.SequentialNode("s", [ tn.InputNode("i", shape=(10, 3)), tn.DenseNode("d", num_units=9), sp.ElementwiseKLSparsityPenaltyNode("sp", sparsity=0.1) ]).network() fn = network.function(["i"], ["s"]) x = np.random.rand(10, 3).astype(fX) nt.assert_equal(fn(x)[0].shape, (10, 9))
def test_elementwise_contraction_penalty_node2(): # just testing that it runs network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(10, 3)), tn.DenseNode("d", num_units=9), cp.ElementwiseContractionPenaltyNode("cp", input_reference="i")] ).network() fn = network.function(["i"], ["s"]) x = np.random.rand(10, 3).astype(fX) nt.assert_equal(fn(x)[0].shape, (10,))
def architecture_children(self): return [ tn.AuxiliaryCostNode(self.name + "_auxiliary", { "target": self.raw_children()["target"], "pre_cost": tn.SequentialNode(self.name + "_sequential", [ tn.DenseNode(self.name + "_dense"), tn.SoftmaxNode(self.name + "_softmax") ]) }, cost_function=T.nnet.categorical_crossentropy) ]
def test_dense_node_and_dense_combine_node2(): # testing that summing the output of 2 dense nodes is the same as # applying a dense combine node with 2 identities (+ bias) # and the same as multiplying the output of 1 dense node by 2 network0 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("dense1", num_units=6), tn.MultiplyConstantNode("mul", value=2) ]), inits=[treeano.inits.ConstantInit(1)]).network() network1 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.ElementwiseSumNode("sum", [ tn.DenseNode("dense1", num_units=6), tn.DenseNode("dense2", num_units=6) ]) ]), inits=[treeano.inits.ConstantInit(1)]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc", [tn.IdentityNode("i1"), tn.IdentityNode("i2")], num_units=6), tn.AddBiasNode("bias") ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn0 = network0.function(["in"], ["hp"]) fn1 = network1.function(["in"], ["hp"]) fn2 = network2.function(["in"], ["hp"]) np.testing.assert_allclose(fn0(x), fn1(x)) np.testing.assert_allclose(fn0(x), fn2(x))
def test_auxiliary_contraction_penalty_node(): # testing that both contraction penalty versions return the same thing network = tn.SequentialNode( "s", [tn.InputNode("i", shape=(10, 3)), cp.AuxiliaryContractionPenaltyNode( "acp", tn.DenseNode("d", num_units=9), cost_reference="sum"), cp.ElementwiseContractionPenaltyNode("cp", input_reference="i"), tn.AggregatorNode("a"), # zero out rest of network, so that value of sum is just value from # auxiliary contraction pentalty node tn.ConstantNode("foo", value=0), tn.InputElementwiseSumNode("sum")] ).network() fn = network.function(["i"], ["sum", "a"]) x = np.random.rand(10, 3).astype(fX) res = fn(x) np.testing.assert_equal(res[0], res[1])
def test_auxiliary_kl_sparsity_penalty_node(): # testing that both sparsity penalty versions return the same thing network = tn.HyperparameterNode( "hp", tn.SequentialNode( "s", [ tn.InputNode("i", shape=(10, 3)), tn.DenseNode("d", num_units=9), sp.AuxiliaryKLSparsityPenaltyNode("scp", cost_reference="sum"), sp.ElementwiseKLSparsityPenaltyNode("sp"), tn.AggregatorNode("a"), # zero out rest of network, so that value of sum is just the value # from auxiliary sparsity pentalty node tn.ConstantNode("foo", value=0), tn.InputElementwiseSumNode("sum") ]), sparsity=0.1, ).network() fn = network.function(["i"], ["sum", "a"]) x = np.random.rand(10, 3).astype(fX) res = fn(x) np.testing.assert_equal(res[0], res[1])
def test_dense_node_serialization(): tn.check_serialization(tn.DenseNode("a")) tn.check_serialization(tn.DenseNode("a", num_units=100))
variable=T.mean(reward), shape=(), ) baseline_reward = 100 network.create_vw( "default", variable=reward + baseline_reward, shape=(state_vw.shape[0], ), tags={"output"}, ) BATCH_SIZE = 64 graph = tn.GraphNode("graph", [[ tn.InputNode("state", shape=(BATCH_SIZE, 10)), tn.DenseNode("mu", num_units=2), tn.ConstantNode("sigma", value=1.), REINFORCE.NormalSampleNode("sampled"), RewardNode("reward"), REINFORCE.NormalREINFORCECostNode("REINFORCE") ], [{ "from": "state", "to": "mu" }, { "from": "mu", "to": "sampled", "to_key": "mu" }, { "from": "sigma", "to": "sampled",
# ############################## prepare model ############################## localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode("loc_seq", [ tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.SimpleBatchNormalizationNode("loc_bn2"), tn.SpatialSoftmaxNode("loc_spatial_softmax"), spatial_attention.SpatialFeaturePointNode("loc_feature_point"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)]) ]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode("st", localization_network, output_shape=(20, 20))
nodes.append( resnet.residual_block_conv_2d("resblock_%d_%d" % (group, block), num_filters=num_filters, num_layers=num_layers, increase_dim="projection")) else: nodes.append( resnet.residual_block_conv_2d("resblock_%d_%d" % (group, block), num_filters=num_filters, num_layers=num_layers)) nodes += [ tn.GlobalMeanPool2DNode("global_pool"), tn.DenseNode("logit", num_units=10), tn.SoftmaxNode("pred"), ] model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", nodes), filter_size=(3, 3), inits=[treeano.inits.OrthogonalInit()], pad="same", ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {
inputs = np.random.randint(0, 2, length).astype(fX) outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] return inputs, outputs # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1)), tn.recurrent.SimpleRecurrentNode("srn", tn.TanhNode("nonlin"), batch_size=None, num_units=HIDDEN_STATE_SIZE), tn.scan.ScanNode("scan", tn.DenseNode("fc", num_units=1)), tn.SigmoidNode("pred"), ]), inits=[treeano.inits.NormalWeightInit(0.01)], batch_axis=None, scan_axis=0) with_updates = tn.HyperparameterNode( "with_updates", tn.SGDNode( "adam", { "subtree": model, "cost": tn.TotalCostNode( "cost",
def load_network(update_scale_factor): localization_network = tn.HyperparameterNode( "loc", tn.SequentialNode( "loc_seq", [tn.DnnMaxPoolNode("loc_pool1"), tn.DnnConv2DWithBiasNode("loc_conv1"), tn.DnnMaxPoolNode("loc_pool2"), bn.NoScaleBatchNormalizationNode("loc_bn1"), tn.ReLUNode("loc_relu1"), tn.DnnConv2DWithBiasNode("loc_conv2"), bn.NoScaleBatchNormalizationNode("loc_bn2"), tn.ReLUNode("loc_relu2"), tn.DenseNode("loc_fc1", num_units=50), bn.NoScaleBatchNormalizationNode("loc_bn3"), tn.ReLUNode("loc_relu3"), tn.DenseNode("loc_fc2", num_units=6, inits=[treeano.inits.NormalWeightInit(std=0.001)])]), num_filters=20, filter_size=(5, 5), pool_size=(2, 2), ) st_node = st.AffineSpatialTransformerNode( "st", localization_network, output_shape=(20, 20)) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 60, 60)), # scaling the updates of the spatial transformer # seems to be very helpful, to allow the clasification # net to learn what to look for, before prematurely # looking tn.UpdateScaleNode( "st_update_scale", st_node, update_scale_factor=update_scale_factor), tn.Conv2DWithBiasNode("conv1"), tn.MaxPool2DNode("mp1"), bn.NoScaleBatchNormalizationNode("bn1"), tn.ReLUNode("relu1"), tn.Conv2DWithBiasNode("conv2"), tn.MaxPool2DNode("mp2"), bn.NoScaleBatchNormalizationNode("bn2"), tn.ReLUNode("relu2"), tn.GaussianDropoutNode("do1"), tn.DenseNode("fc1"), bn.NoScaleBatchNormalizationNode("bn3"), tn.ReLUNode("relu3"), tn.DenseNode("fc2", num_units=10), tn.SoftmaxNode("pred"), ]), num_filters=32, filter_size=(3, 3), pool_size=(2, 2), num_units=256, dropout_probability=0.5, inits=[treeano.inits.HeUniformInit()], bn_update_moving_stats=True, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32, learning_rate=2e-3, ) network = with_updates.network() network.build() # build eagerly to share weights return network
train, valid, test = canopy.sandbox.datasets.mnist() # ############################## prepare model ############################## # architecture: # - fully connected 10 units # - softmax # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 1, 28, 28)), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), ]), inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None,), dtype="int32")}, )}), cost_function=treeano.utils.categorical_crossentropy_i32,
highway.HighwayDenseNode( "highway%d" % i, tn.SequentialNode( "seq%d" % i, [ tn.ReLUNode("relu%d" % i), # tn.DropoutNode("do%d" % i) ]))) model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [ tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("in_dense"), tn.ReLUNode("in_relu"), # tn.DropoutNode("in_do") ] + highway_layers + [tn.DenseNode("out_dense", num_units=10), tn.SoftmaxNode("pred")]), num_units=128, dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", { "subtree":
X, y, random_state=42) in_train = {"x": X_train, "y": y_train} in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 28 * 28)), cp.AuxiliaryContractionPenaltyNode( "cp1", tn.SequentialNode( "cp_seq1", [tn.DenseNode("fc1"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs1")]), cost_weight=1e1), # the cost has nan's when this is enabled # TODO look into why # tn.DropoutNode("do1"), cp.AuxiliaryContractionPenaltyNode( "cp2", tn.SequentialNode( "cp_seq2", [tn.DenseNode("fc2"), # the cost has nan's when using ReLU's # TODO look into why tn.AbsNode("abs2")]),
# - ReLU # - 50% dropout # - fully connected 512 units # - ReLU # - 50% dropout # - fully connected 10 units # - softmax # - the batch size can be provided as `None` to make the network # work for multiple different batch sizes model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("fc1"), tn.ReLUNode("relu1"), # tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.ReLUNode("relu2"), # tn.DropoutNode("do2"), tn.ConcatenateNode( "concat", [tn.SequentialNode( "y_vars", [tn.DenseNode("fc_y", num_units=10), tn.SoftmaxNode("y_pred"), tn.AuxiliaryCostNode( "classification_cost", {"target": tn.InputNode("y", shape=(None,),