def test_dense_combine_node_uses_children(): network1 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.MultiplyConstantNode("mul", value=2), tn.DenseCombineNode("fc", [tn.IdentityNode("i1"), tn.IdentityNode("i2")], num_units=6) ]), inits=[treeano.inits.ConstantInit(1)]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc", [ tn.MultiplyConstantNode("mul1", value=2), tn.MultiplyConstantNode("mul2", value=2) ], num_units=6) ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn1 = network1.function(["in"], ["hp"]) fn2 = network2.function(["in"], ["hp"]) np.testing.assert_allclose(fn1(x), fn2(x))
def architecture_children(self): mean_seq_node = tn.SequentialNode(self.name + "_mean_seq", [ tn.DnnMeanPoolNode(self.name + "_mean_pool"), tn.MultiplyConstantNode(self.name + "_mean_const_mult") ]) max_seq_node = tn.SequentialNode(self.name + "_max_seq", [ tn.DnnMaxPoolNode(self.name + "_max_pool"), tn.MultiplyConstantNode(self.name + "_max_const_mult") ]) return [ tn.ElementwiseSumNode(self.name + "_sum_mixed", [max_seq_node, mean_seq_node]) ]
def test_auxiliary_cost_node(): network = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(3, 4, 5)), tn.AuxiliaryCostNode( "cost1", {"target": tn.InputNode("y1", shape=(3, 4, 5))}), tn.AddConstantNode("a1", value=2), tn.AuxiliaryCostNode( "cost2", {"target": tn.InputNode("y2", shape=(3, 4, 5))}), tn.MultiplyConstantNode("m1", value=2), tn.AuxiliaryCostNode( "cost3", {"target": tn.InputNode("y3", shape=(3, 4, 5))}), tn.ConstantNode("const", value=0), tn.InputElementwiseSumNode("cost") ]), cost_reference="cost", cost_function=treeano.utils.squared_error, ).network() fn = network.function(["x", "y1", "y2", "y3"], ["cost"]) x = np.random.rand(3, 4, 5).astype(fX) ys = [np.random.rand(3, 4, 5).astype(fX) for _ in range(3)] def mse(x, y): return ((x - y)**2).mean() expected_output = (mse(x, ys[0]) + mse(x + 2, ys[1]) + mse(2 * (x + 2), ys[2])) np.testing.assert_allclose(fn(x, *ys)[0], expected_output, rtol=1e-5)
def test_use_scheduled_hyperparameter(): network1 = tn.OutputHyperparameterNode( "a", hyperparameter="foo").network(default_hyperparameters=dict(foo=101)) network2 = tn.SequentialNode("s", [ tn.OutputHyperparameterNode("a", hyperparameter="foo"), tn.MultiplyConstantNode("m", value=42) ]).network(default_hyperparameters=dict(foo=101)) schedule = canopy.schedules.PiecewiseLinearSchedule([(1, 1), (10, 10)]) sh_handler = canopy.handlers.schedule_hyperparameter(schedule, "foo") fn2 = canopy.handled_fn( network2, [canopy.handlers.use_scheduled_hyperparameter(sh_handler)], {}, {"out": "s"}) def callback(in_dict, result_dict): result_dict["out2"] = fn2(in_dict)["out"] fn1 = canopy.handled_fn( network1, [sh_handler, canopy.handlers.call_after_every(1, callback)], {}, {"out": "a"}) res = fn1({}) nt.assert_equal(res, {"out": 1, "out2": 42}) res = fn1({}) nt.assert_equal(res, {"out": 2, "out2": 84})
def architecture_children(self): children = self.raw_children() gate = children["gate"] transform = children["transform"] # prepare gates transform_gate = tn.SequentialNode( self.name + "_transformgate", [ gate, # add initial value as bias instead # TODO parameterize tn.AddConstantNode(self.name + "_biastranslation", value=-4), tn.SigmoidNode(self.name + "_transformgatesigmoid") ]) # carry gate = 1 - transform gate carry_gate = tn.SequentialNode(self.name + "_carrygate", [ tn.ReferenceNode(self.name + "_transformgateref", reference=transform_gate.name), tn.MultiplyConstantNode(self.name + "_invert", value=-1), tn.AddConstantNode(self.name + "_add", value=1) ]) # combine with gates gated_transform = tn.ElementwiseProductNode( self.name + "_gatedtransform", [transform_gate, transform]) gated_carry = tn.ElementwiseProductNode( self.name + "_gatedcarry", [carry_gate, tn.IdentityNode(self.name + "_carry")]) res = tn.ElementwiseSumNode(self.name + "_res", [gated_carry, gated_transform]) return [res]
def architecture_children(self): gate_node = tn.SequentialNode( self.name + "_gate_seq", [ batch_fold.AddAxisNode(self.name + "_add_axis", axis=2), batch_fold.FoldUnfoldAxisIntoBatchNode( self.name + "_batch_fold", # NOTE: using dnn conv, since pooling is normally strided # and the normal conv is slow with strides tn.DnnConv2DWithBiasNode(self.name + "_conv", num_filters=1), axis=1), batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2), tn.SigmoidNode(self.name + "_gate_sigmoid") ]) inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [ tn.ReferenceNode(self.name + "_gate_ref", reference=gate_node.name), tn.MultiplyConstantNode(self.name + "_", value=-1), tn.AddConstantNode(self.name + "_add1", value=1) ]) mean_node = tn.ElementwiseProductNode( self.name + "_mean_product", [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node]) max_node = tn.ElementwiseProductNode( self.name + "_max_product", [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node]) return [ tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node]) ]
def architecture_children(self): nodes = [ tn.SqrNode(self.name + "_sqr"), self.raw_children(), # convert mean pool to sum pool by multiplying by pool size tn.MultiplyConstantNode(self.name + "_mul"), tn.SqrtNode(self.name + "_sqrt"), ] return [tn.SequentialNode(self.name + "_sequential", nodes)]
def test_auxiliary_node(): network = tn.SequentialNode( "s", [tn.InputNode("i", shape=()), tn.AuxiliaryNode("a", tn.MultiplyConstantNode("m", value=2))] ).network() fn = network.function(["i"], ["s", "a", "m"]) np.testing.assert_equal(np.array(fn(3.2)), np.array([3.2, 3.2, 6.4], dtype=fX))
def architecture_children(self): return [ tn.AuxiliaryNode( self.name + "_auxiliary", tn.SequentialNode( self.name + "_sequential", [ElementwiseKLSparsityPenaltyNode( self.name + "_sparsitypenalty"), tn.AggregatorNode(self.name + "_aggregator"), tn.MultiplyConstantNode(self.name + "_multiplyweight"), tn.SendToNode(self.name + "_sendto", to_key=self.name)]))]
def test_graph_node_no_input(): network = tn.GraphNode( "g", [(tn.InputNode("i", shape=()), tn.MultiplyConstantNode("m1", value=2), tn.AddConstantNode("a1", value=2)), [{"from": "i", "to": "a1"}, {"from": "a1", "to": "m1"}, {"from": "m1"}]] ).network() fn = network.function(["i"], ["g"]) nt.assert_equal([10], fn(3))
def test_graph_node_no_output_key(): network = tn.SequentialNode( "s", [tn.InputNode("i", shape=()), tn.GraphNode("g", [(tn.MultiplyConstantNode("m1", value=2), tn.AddConstantNode("a1", value=2)), [{"to": "a1"}, {"from": "a1", "to": "m1"}, {"from": "m1", "to_key": "foo"}]])] ).network() fn = network.function(["i"], ["s"]) nt.assert_equal([3], fn(3))
def test_graph_node(): network = tn.GraphNode( "g1", [[tn.InputNode("i", shape=()), tn.GraphNode("g2", [(tn.MultiplyConstantNode("m1", value=2), tn.AddConstantNode("a1", value=2)), [{"to": "a1"}, {"from": "a1", "to": "m1"}, {"from": "m1", "to_key": "foo"}]], output_key="foo")], [{"from": "i", "to": "g2"}, {"from": "g2", "to_key": "bar"}]], output_key="bar" ).network() fn = network.function(["i"], ["a1", "m1", "g1", "g2"]) nt.assert_equal([5, 10, 10, 10], fn(3))
def architecture_children(self): inner = self.raw_children() input_node = tn.IdentityNode(self.name + "_input") return [ tn.SequentialNode(self.name + "_sequential", [ input_node, inner, tn.AuxiliaryNode( self.name + "_auxiliary", tn.SequentialNode(self.name + "_innerseq", [ ElementwiseContractionPenaltyNode( self.name + "_contractionpenalty", input_reference=input_node.name), tn.AggregatorNode(self.name + "_aggregator"), tn.MultiplyConstantNode(self.name + "_multiplyweight"), tn.SendToNode(self.name + "_sendto", to_key=self.name) ])) ]) ]
def test_dense_node_and_dense_combine_node2(): # testing that summing the output of 2 dense nodes is the same as # applying a dense combine node with 2 identities (+ bias) # and the same as multiplying the output of 1 dense node by 2 network0 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseNode("dense1", num_units=6), tn.MultiplyConstantNode("mul", value=2) ]), inits=[treeano.inits.ConstantInit(1)]).network() network1 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.ElementwiseSumNode("sum", [ tn.DenseNode("dense1", num_units=6), tn.DenseNode("dense2", num_units=6) ]) ]), inits=[treeano.inits.ConstantInit(1)]).network() network2 = tn.HyperparameterNode( "hp", tn.SequentialNode("seq", [ tn.InputNode("in", shape=(3, 4, 5)), tn.DenseCombineNode("fc", [tn.IdentityNode("i1"), tn.IdentityNode("i2")], num_units=6), tn.AddBiasNode("bias") ]), inits=[treeano.inits.ConstantInit(1)]).network() x = np.random.randn(3, 4, 5).astype(fX) fn0 = network0.function(["in"], ["hp"]) fn1 = network1.function(["in"], ["hp"]) fn2 = network2.function(["in"], ["hp"]) np.testing.assert_allclose(fn0(x), fn1(x)) np.testing.assert_allclose(fn0(x), fn2(x))
"xcov_cost", {"target": tn.ReferenceNode("y_ref", reference="y_pred")}, cost_function=cross_covariance)])], axis=1), tn.DenseNode("fc3"), tn.ReLUNode("relu3"), tn.DenseNode("fc4"), tn.ReLUNode("relu4"), tn.DenseNode("reconstruction", num_units=28 * 28), tn.TotalCostNode( "cost", {"pred": tn.IdentityNode("recon_id"), "target": tn.ReferenceNode("in_ref", reference="x")}, cost_function=treeano.utils.squared_error), tn.MultiplyConstantNode("mul_reconstruction_error", value=0.1), tn.InputElementwiseSumNode("total_cost")]), num_units=512, cost_reference="total_cost", dropout_probability=0.5, inits=[treeano.inits.XavierNormalInit()], ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.ReferenceNode("cost_ref", reference="total_cost")}), ) network = with_updates.network()