def architecture_children(self): children = self.raw_children() gate = children["gate"] transform = children["transform"] # prepare gates transform_gate = tn.SequentialNode( self.name + "_transformgate", [ gate, # add initial value as bias instead # TODO parameterize tn.AddConstantNode(self.name + "_biastranslation", value=-4), tn.SigmoidNode(self.name + "_transformgatesigmoid") ]) # carry gate = 1 - transform gate carry_gate = tn.SequentialNode(self.name + "_carrygate", [ tn.ReferenceNode(self.name + "_transformgateref", reference=transform_gate.name), tn.MultiplyConstantNode(self.name + "_invert", value=-1), tn.AddConstantNode(self.name + "_add", value=1) ]) # combine with gates gated_transform = tn.ElementwiseProductNode( self.name + "_gatedtransform", [transform_gate, transform]) gated_carry = tn.ElementwiseProductNode( self.name + "_gatedcarry", [carry_gate, tn.IdentityNode(self.name + "_carry")]) res = tn.ElementwiseSumNode(self.name + "_res", [gated_carry, gated_transform]) return [res]
def architecture_children(self): gate_node = tn.SequentialNode( self.name + "_gate_seq", [ batch_fold.AddAxisNode(self.name + "_add_axis", axis=2), batch_fold.FoldUnfoldAxisIntoBatchNode( self.name + "_batch_fold", # NOTE: using dnn conv, since pooling is normally strided # and the normal conv is slow with strides tn.DnnConv2DWithBiasNode(self.name + "_conv", num_filters=1), axis=1), batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2), tn.SigmoidNode(self.name + "_gate_sigmoid") ]) inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [ tn.ReferenceNode(self.name + "_gate_ref", reference=gate_node.name), tn.MultiplyConstantNode(self.name + "_", value=-1), tn.AddConstantNode(self.name + "_add1", value=1) ]) mean_node = tn.ElementwiseProductNode( self.name + "_mean_product", [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node]) max_node = tn.ElementwiseProductNode( self.name + "_max_product", [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node]) return [ tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node]) ]
def forget_gate_conv_2d_node(name, num_filters, filter_size=(3, 3), initial_bias=0): return tn.ElementwiseProductNode(name, [ tn.IdentityNode(name + "_identity"), tn.SequentialNode(name + "_forget", [ tn.Conv2DWithBiasNode(name + "_conv", num_filters=num_filters, filter_size=filter_size, stride=(1, 1), pad="same"), tn.AddConstantNode(name + "_initial_bias", value=initial_bias), tn.SigmoidNode(name + "_sigmoid") ]) ])
# also rescaling to [0, 1] instead of [0, 255] X = mnist['data'].astype(fX) / 255.0 y = mnist['target'].astype("int32") X_train, X_valid, y_train, y_valid = sklearn.cross_validation.train_test_split( X, y, random_state=42) in_train = {"x": X_train, "y": y_train} in_valid = {"x": X_valid, "y": y_valid} # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 28 * 28)), tn.DenseNode("fc1"), tn.SigmoidNode("sigmoid1"), sp.AuxiliaryKLSparsityPenaltyNode("sp1", cost_weight=1e1), tn.DropoutNode("do1"), tn.DenseNode("fc2"), tn.SigmoidNode("sigmoid2"), sp.AuxiliaryKLSparsityPenaltyNode("sp2", cost_weight=1e1), tn.DropoutNode("do2"), tn.DenseNode("fc3", num_units=10), tn.SoftmaxNode("pred"), tn.TotalCostNode( "cost", { "pred": tn.IdentityNode("pred_id"), "target": tn.InputNode("y", shape=(None, ), dtype="int32") }, cost_function=treeano.utils.categorical_crossentropy_i32), tn.InputElementwiseSumNode("total_cost")
inputs.append(i) outputs.append(o) return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis] # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode( "seq", [tn.InputNode("x", shape=(None, None, 1)), recurrent_hc.GRUNode("gru1"), tn.LinearMappingNode("y_linear", output_dim=1), tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)), tn.SigmoidNode("sigmoid"), ]), inits=[treeano.inits.OrthogonalInit()], num_units=HIDDEN_STATE_SIZE, learn_init=True, grad_clip=1, ) with_updates = tn.HyperparameterNode( "with_updates", tn.AdamNode( "adam", {"subtree": model, "cost": tn.TotalCostNode("cost", { "pred": tn.ReferenceNode("pred_ref", reference="model"), "target": tn.InputNode("y", shape=(None, None, 1))},
outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] return inputs, outputs # ############################## prepare model ############################## model = tn.HyperparameterNode( "model", tn.SequentialNode("seq", [ tn.InputNode("x", shape=(None, 1)), tn.recurrent.SimpleRecurrentNode("srn", tn.TanhNode("nonlin"), batch_size=None, num_units=HIDDEN_STATE_SIZE), tn.scan.ScanNode("scan", tn.DenseNode("fc", num_units=1)), tn.SigmoidNode("pred"), ]), inits=[treeano.inits.NormalWeightInit(0.01)], batch_axis=None, scan_axis=0) with_updates = tn.HyperparameterNode( "with_updates", tn.SGDNode( "adam", { "subtree": model, "cost": tn.TotalCostNode( "cost", {