Beispiel #1
0
    def architecture_children(self):
        children = self.raw_children()
        gate = children["gate"]
        transform = children["transform"]

        # prepare gates
        transform_gate = tn.SequentialNode(
            self.name + "_transformgate",
            [
                gate,
                # add initial value as bias instead
                # TODO parameterize
                tn.AddConstantNode(self.name + "_biastranslation", value=-4),
                tn.SigmoidNode(self.name + "_transformgatesigmoid")
            ])
        # carry gate = 1 - transform gate
        carry_gate = tn.SequentialNode(self.name + "_carrygate", [
            tn.ReferenceNode(self.name + "_transformgateref",
                             reference=transform_gate.name),
            tn.MultiplyConstantNode(self.name + "_invert", value=-1),
            tn.AddConstantNode(self.name + "_add", value=1)
        ])

        # combine with gates
        gated_transform = tn.ElementwiseProductNode(
            self.name + "_gatedtransform", [transform_gate, transform])
        gated_carry = tn.ElementwiseProductNode(
            self.name + "_gatedcarry",
            [carry_gate, tn.IdentityNode(self.name + "_carry")])
        res = tn.ElementwiseSumNode(self.name + "_res",
                                    [gated_carry, gated_transform])
        return [res]
Beispiel #2
0
    def architecture_children(self):
        gate_node = tn.SequentialNode(
            self.name + "_gate_seq",
            [
                batch_fold.AddAxisNode(self.name + "_add_axis", axis=2),
                batch_fold.FoldUnfoldAxisIntoBatchNode(
                    self.name + "_batch_fold",
                    # NOTE: using dnn conv, since pooling is normally strided
                    # and the normal conv is slow with strides
                    tn.DnnConv2DWithBiasNode(self.name + "_conv",
                                             num_filters=1),
                    axis=1),
                batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2),
                tn.SigmoidNode(self.name + "_gate_sigmoid")
            ])

        inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [
            tn.ReferenceNode(self.name + "_gate_ref",
                             reference=gate_node.name),
            tn.MultiplyConstantNode(self.name + "_", value=-1),
            tn.AddConstantNode(self.name + "_add1", value=1)
        ])

        mean_node = tn.ElementwiseProductNode(
            self.name + "_mean_product",
            [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node])

        max_node = tn.ElementwiseProductNode(
            self.name + "_max_product",
            [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node])

        return [
            tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node])
        ]
Beispiel #3
0
def forget_gate_conv_2d_node(name,
                             num_filters,
                             filter_size=(3, 3),
                             initial_bias=0):
    return tn.ElementwiseProductNode(name, [
        tn.IdentityNode(name + "_identity"),
        tn.SequentialNode(name + "_forget", [
            tn.Conv2DWithBiasNode(name + "_conv",
                                  num_filters=num_filters,
                                  filter_size=filter_size,
                                  stride=(1, 1),
                                  pad="same"),
            tn.AddConstantNode(name + "_initial_bias", value=initial_bias),
            tn.SigmoidNode(name + "_sigmoid")
        ])
    ])
Beispiel #4
0
# also rescaling to [0, 1] instead of [0, 255]
X = mnist['data'].astype(fX) / 255.0
y = mnist['target'].astype("int32")
X_train, X_valid, y_train, y_valid = sklearn.cross_validation.train_test_split(
    X, y, random_state=42)
in_train = {"x": X_train, "y": y_train}
in_valid = {"x": X_valid, "y": y_valid}

# ############################## prepare model ##############################

model = tn.HyperparameterNode(
    "model",
    tn.SequentialNode("seq", [
        tn.InputNode("x", shape=(None, 28 * 28)),
        tn.DenseNode("fc1"),
        tn.SigmoidNode("sigmoid1"),
        sp.AuxiliaryKLSparsityPenaltyNode("sp1", cost_weight=1e1),
        tn.DropoutNode("do1"),
        tn.DenseNode("fc2"),
        tn.SigmoidNode("sigmoid2"),
        sp.AuxiliaryKLSparsityPenaltyNode("sp2", cost_weight=1e1),
        tn.DropoutNode("do2"),
        tn.DenseNode("fc3", num_units=10),
        tn.SoftmaxNode("pred"),
        tn.TotalCostNode(
            "cost", {
                "pred": tn.IdentityNode("pred_id"),
                "target": tn.InputNode("y", shape=(None, ), dtype="int32")
            },
            cost_function=treeano.utils.categorical_crossentropy_i32),
        tn.InputElementwiseSumNode("total_cost")
Beispiel #5
0
        inputs.append(i)
        outputs.append(o)
    return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis]


# ############################## prepare model ##############################

model = tn.HyperparameterNode(
    "model",
    tn.SequentialNode(
        "seq",
        [tn.InputNode("x", shape=(None, None, 1)),
         recurrent_hc.GRUNode("gru1"),
         tn.LinearMappingNode("y_linear", output_dim=1),
         tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)),
         tn.SigmoidNode("sigmoid"),
         ]),
    inits=[treeano.inits.OrthogonalInit()],
    num_units=HIDDEN_STATE_SIZE,
    learn_init=True,
    grad_clip=1,
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam",
        {"subtree": model,
         "cost": tn.TotalCostNode("cost", {
             "pred": tn.ReferenceNode("pred_ref", reference="model"),
             "target": tn.InputNode("y", shape=(None, None, 1))},
Beispiel #6
0
    outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length]
    return inputs, outputs


# ############################## prepare model ##############################

model = tn.HyperparameterNode(
    "model",
    tn.SequentialNode("seq", [
        tn.InputNode("x", shape=(None, 1)),
        tn.recurrent.SimpleRecurrentNode("srn",
                                         tn.TanhNode("nonlin"),
                                         batch_size=None,
                                         num_units=HIDDEN_STATE_SIZE),
        tn.scan.ScanNode("scan", tn.DenseNode("fc", num_units=1)),
        tn.SigmoidNode("pred"),
    ]),
    inits=[treeano.inits.NormalWeightInit(0.01)],
    batch_axis=None,
    scan_axis=0)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.SGDNode(
        "adam", {
            "subtree":
            model,
            "cost":
            tn.TotalCostNode(
                "cost",
                {