def architecture_children(self):
     # TODO set LRN n = num_filters / 8 + 1
     nodes = [
         # NOTE: not explicitly giving the first conv a pad of "same",
         # since the first conv can have any output shape
         tn.DnnConv2DWithBiasNode(self.name + "_conv0"),
         tn.IdentityNode(self.name + "_z0"),
         tn.ReLUNode(self.name + "_z0_relu"),
         lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"),
         tn.IdentityNode(self.name + "_x0"),
     ]
     for t in range(1, self.steps + 1):
         nodes += [
             tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t,
                                      stride=(1, 1),
                                      pad="same"),
             tn.ElementwiseSumNode(self.name + "_sum%d" % t, [
                 tn.ReferenceNode(self.name + "_sum%d_curr" % t,
                                  reference=self.name + "_conv%d" % t),
                 tn.ReferenceNode(self.name + "_sum%d_prev" % t,
                                  reference=self.name + "_z0")
             ]),
             tn.IdentityNode(self.name + "_z%d" % t),
             tn.ReLUNode(self.name + "_z%d_relu" % t),
             lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t),
             tn.IdentityNode(self.name + "_x%d" % t),
         ]
     return [tn.SequentialNode(self.name + "_sequential", nodes)]
Beispiel #2
0
    def architecture_children(self):
        gate_node = tn.SequentialNode(
            self.name + "_gate_seq",
            [
                batch_fold.AddAxisNode(self.name + "_add_axis", axis=2),
                batch_fold.FoldUnfoldAxisIntoBatchNode(
                    self.name + "_batch_fold",
                    # NOTE: using dnn conv, since pooling is normally strided
                    # and the normal conv is slow with strides
                    tn.DnnConv2DWithBiasNode(self.name + "_conv",
                                             num_filters=1),
                    axis=1),
                batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2),
                tn.SigmoidNode(self.name + "_gate_sigmoid")
            ])

        inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [
            tn.ReferenceNode(self.name + "_gate_ref",
                             reference=gate_node.name),
            tn.MultiplyConstantNode(self.name + "_", value=-1),
            tn.AddConstantNode(self.name + "_add1", value=1)
        ])

        mean_node = tn.ElementwiseProductNode(
            self.name + "_mean_product",
            [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node])

        max_node = tn.ElementwiseProductNode(
            self.name + "_max_product",
            [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node])

        return [
            tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node])
        ]
Beispiel #3
0
    def architecture_children(self):
        children = self.raw_children()
        gate = children["gate"]
        transform = children["transform"]

        # prepare gates
        transform_gate = tn.SequentialNode(
            self.name + "_transformgate",
            [
                gate,
                # add initial value as bias instead
                # TODO parameterize
                tn.AddConstantNode(self.name + "_biastranslation", value=-4),
                tn.SigmoidNode(self.name + "_transformgatesigmoid")
            ])
        # carry gate = 1 - transform gate
        carry_gate = tn.SequentialNode(self.name + "_carrygate", [
            tn.ReferenceNode(self.name + "_transformgateref",
                             reference=transform_gate.name),
            tn.MultiplyConstantNode(self.name + "_invert", value=-1),
            tn.AddConstantNode(self.name + "_add", value=1)
        ])

        # combine with gates
        gated_transform = tn.ElementwiseProductNode(
            self.name + "_gatedtransform", [transform_gate, transform])
        gated_carry = tn.ElementwiseProductNode(
            self.name + "_gatedcarry",
            [carry_gate, tn.IdentityNode(self.name + "_carry")])
        res = tn.ElementwiseSumNode(self.name + "_res",
                                    [gated_carry, gated_transform])
        return [res]
Beispiel #4
0
def test_batch_normalization_node():
    network = tn.AdamNode(
        "adam", {
            "subtree":
            tn.SequentialNode("seq", [
                tn.InputNode("x", shape=(None, 10)),
                batch_normalization.BatchNormalizationNode("bn"),
                tn.DenseNode("d", num_units=1),
            ]),
            "cost":
            tn.TotalCostNode(
                "cost", {
                    "target": tn.InputNode("y", shape=(None, 1)),
                    "pred": tn.ReferenceNode("pred_ref", reference="d"),
                },
                cost_function=treeano.utils.squared_error)
        }).network()

    fn = network.function(["x", "y"], ["cost"], include_updates=True)

    x = 100 + 100 * np.random.randn(100, 10).astype(fX)
    y = np.random.randn(100, 1).astype(fX)
    prev_cost = fn(x, y)[0]
    for _ in range(3):
        cost = fn(x, y)[0]
        assert cost < prev_cost
        prev_cost = cost
Beispiel #5
0
def test_reference_node():
    network = tn.SequentialNode("s", [
        tn.InputNode("input1", shape=(3, 4, 5)),
        tn.InputNode("input2", shape=(5, 4, 3)),
        tn.ReferenceNode("ref", reference="input1"),
    ]).network()

    fn = network.function(["input1"], ["ref"])
    x = np.random.randn(3, 4, 5).astype(fX)
    np.testing.assert_allclose(fn(x)[0], x)
def test_affine_spatial_transformer_node_build():
    localization_network = tn.HyperparameterNode(
        "loc",
        tn.SequentialNode(
            "loc_seq",
            [tn.DenseNode("loc_fc1", num_units=50),
             tn.ReLUNode("loc_relu3"),
             tn.DenseNode("loc_fc2",
                          num_units=6,
                          inits=[treeano.inits.ZeroInit()])]),
        num_filters=32,
        filter_size=(5, 5),
        pool_size=(2, 2),
    )

    model = tn.HyperparameterNode(
        "model",
        tn.SequentialNode(
            "seq",
            [tn.InputNode("x", shape=(None, 1, 60, 60)),
             spatial_transformer.AffineSpatialTransformerNode(
                 "st",
                 localization_network,
                 output_shape=(20, 20)),
             tn.DenseNode("fc1"),
             tn.ReLUNode("relu1"),
             tn.DropoutNode("do1"),
             tn.DenseNode("fc2", num_units=10),
             tn.SoftmaxNode("pred"),
             ]),
        num_filters=32,
        filter_size=(3, 3),
        pool_size=(2, 2),
        num_units=256,
        dropout_probability=0.5,
        inits=[treeano.inits.HeNormalInit()],
    )

    with_updates = tn.HyperparameterNode(
        "with_updates",
        tn.AdamNode(
            "adam",
            {"subtree": model,
             "cost": tn.TotalCostNode("cost", {
                 "pred": tn.ReferenceNode("pred_ref", reference="model"),
                 "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             )}),
        cost_function=treeano.utils.categorical_crossentropy_i32,
    )
    network = with_updates.network()
    network.build()  # build eagerly to share weights
Beispiel #7
0
def GradNetOptimizerInterpolationNode(name, children, early, late, **kwargs):
    """
    interpolates updates from 2 optimizers nodes

    NOTE: this is a hack to take in node constructors as arguments
    """
    assert set(children.keys()) == {"subtree", "cost"}
    subtree = children["subtree"]
    cost = children["cost"]

    cost_ref = tn.ReferenceNode(name + "_costref", reference=cost.name)
    late_subtree = tn.UpdateScaleNode(name + "_late_update_scale", subtree)
    late_node = late(name + "_late", {"subtree": late_subtree, "cost": cost})
    early_subtree = tn.UpdateScaleNode(name + "_early_update_scale", late_node)
    early_node = early(name + "_early", {
        "subtree": early_subtree,
        "cost": cost_ref
    })
    # NOTE: need separate node to forward hyperparameter
    return _GradNetOptimizerInterpolationNode(name, early_node, **kwargs)
Beispiel #8
0
def test_save_last_inputs_and_networks():
    class StateDiffNode(treeano.NodeImpl):
        def compute_output(self, network, in_vw):
            foo_vw = network.create_vw("foo",
                                       shape=(),
                                       is_shared=True,
                                       tags={"parameter", "weight"},
                                       inits=[])
            network.create_vw("default",
                              variable=abs(in_vw.variable - foo_vw.variable),
                              shape=())

    network = tn.AdamNode(
        "adam", {
            "subtree":
            tn.SequentialNode(
                "s", [tn.InputNode("i", shape=()),
                      StateDiffNode("ss")]),
            "cost":
            tn.ReferenceNode("r", reference="s")
        }).network()
    # eagerly create shared variables
    network.build()

    save_handler = canopy.handlers.save_last_inputs_and_networks(5)
    fn = canopy.handlers.handled_fn(network, [save_handler], {"x": "i"},
                                    {"out": "s"},
                                    include_updates=True)

    inputs = [{"x": treeano.utils.as_fX(np.random.randn())} for _ in range(10)]
    outputs = [fn(i) for i in inputs]

    nt.assert_equal(save_handler.inputs_, inputs[-5:])

    # PY3: calling list on zip to make it eager
    # otherwise, save_handler.value_dicts_ looks at the mutating
    # value ducts
    for value_dict, i, o in list(
            zip(save_handler.value_dicts_, inputs[-5:], outputs[-5:])):
        canopy.network_utils.load_value_dict(network, value_dict)
        nt.assert_equal(o, fn(i))
Beispiel #9
0
def test_anrat_node():
    network = tn.AdamNode(
        "adam", {
            "subtree":
            tn.InputNode("x", shape=(None, 1)),
            "cost":
            anrat.ANRATNode(
                "cost", {
                    "target": tn.InputNode("y", shape=(None, 1)),
                    "pred": tn.ReferenceNode("pred_ref", reference="x"),
                })
        }).network()

    fn = network.function(["x", "y"], ["cost"], include_updates=True)

    for x_raw, y_raw in [(3.4, 2), (4.2, 4.2)]:
        x = np.array([[x_raw]], dtype=fX)
        y = np.array([[y_raw]], dtype=fX)
        prev_cost = fn(x, y)[0]
        for _ in range(3):
            cost = fn(x, y)[0]
            assert cost < prev_cost
            prev_cost = cost
Beispiel #10
0
def test_grad_net_optimizer_interpolation_node():
    class StateNode(treeano.NodeImpl):
        input_keys = ()

        def compute_output(self, network):
            network.create_vw(
                name="default",
                shape=(),
                is_shared=True,
                tags=["parameter"],
                inits=[],
            )

    def updater(const):
        class UpdaterNode(treeano.nodes.updates.StandardUpdatesNode):
            def _new_update_deltas(self, network, vws, grads):
                return treeano.UpdateDeltas({vw.variable: const for vw in vws})

        return UpdaterNode

    network = tn.SharedHyperparameterNode(
        "n",
        gradnet.GradNetOptimizerInterpolationNode(
            "g", {
                "subtree": StateNode("s"),
                "cost": tn.ReferenceNode("r", reference="s")
            },
            early=updater(-1),
            late=updater(1)),
        hyperparameter="late_gate").network()

    fn1 = network.function([("n", "hyperparameter")], [], include_updates=True)
    fn2 = network.function([], ["n"])
    gates_and_answers = [(0, -1), (0.25, -1.5), (1, -0.5), (1, 0.5)]
    for gate, ans in gates_and_answers:
        fn1(gate)
        np.testing.assert_allclose(ans, fn2()[0], rtol=1e-1)
Beispiel #11
0
    pool_stride=(2, 2),
    pool_pad=(1, 1),
    inits=[treeano.inits.OrthogonalInit()],
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam", {
            "subtree":
            model,
            "cost":
            tn.TotalCostNode(
                "cost",
                {
                    "pred": tn.ReferenceNode("pred_ref", reference="model"),
                    "target": tn.InputNode("y", shape=(None, ), dtype="int32")
                },
            )
        }),
    cost_function=treeano.utils.categorical_crossentropy_i32,
)
network = with_updates.network()
network.build()  # build eagerly to share weights

valid_fn = canopy.handled_fn(network, [
    canopy.handlers.time_call(key="valid_time"),
    canopy.handlers.override_hyperparameters(dropout_probability=0),
    canopy.handlers.batch_pad(BATCH_SIZE, keys=["x", "y"]),
    canopy.handlers.chunk_variables(batch_size=BATCH_SIZE,
                                    variables=["x", "y"])
Beispiel #12
0
             {"pred": tn.IdentityNode("pred_id"),
              "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             cost_function=treeano.utils.categorical_crossentropy_i32),
         tn.InputElementwiseSumNode("total_cost")]),
    num_units=32,
    cost_reference="total_cost",
    dropout_probability=0.5,
    inits=[treeano.inits.XavierNormalInit()],
)

with_updates = tn.HyperparameterNode(
    "with_updates",
    tn.AdamNode(
        "adam",
        {"subtree": model,
         "cost": tn.ReferenceNode("cost_ref", reference="total_cost")}),
)
network = with_updates.network()
network.build()  # build eagerly to share weights

BATCH_SIZE = 500

valid_fn = canopy.handled_fn(
    network,
    [canopy.handlers.time_call(key="valid_time"),
     canopy.handlers.override_hyperparameters(dropout_probability=0),
     canopy.handlers.chunk_variables(batch_size=BATCH_SIZE,
                                     variables=["x", "y"])],
    {"x": "x", "y": "y"},
    {"total_cost": "total_cost", "pred": "pred"})
Beispiel #13
0
def load_network(update_scale_factor):
    localization_network = tn.HyperparameterNode(
        "loc",
        tn.SequentialNode(
            "loc_seq",
            [tn.DnnMaxPoolNode("loc_pool1"),
             tn.DnnConv2DWithBiasNode("loc_conv1"),
             tn.DnnMaxPoolNode("loc_pool2"),
             bn.NoScaleBatchNormalizationNode("loc_bn1"),
             tn.ReLUNode("loc_relu1"),
             tn.DnnConv2DWithBiasNode("loc_conv2"),
             bn.NoScaleBatchNormalizationNode("loc_bn2"),
             tn.ReLUNode("loc_relu2"),
             tn.DenseNode("loc_fc1", num_units=50),
             bn.NoScaleBatchNormalizationNode("loc_bn3"),
             tn.ReLUNode("loc_relu3"),
             tn.DenseNode("loc_fc2",
                          num_units=6,
                          inits=[treeano.inits.NormalWeightInit(std=0.001)])]),
        num_filters=20,
        filter_size=(5, 5),
        pool_size=(2, 2),
    )

    st_node = st.AffineSpatialTransformerNode(
        "st",
        localization_network,
        output_shape=(20, 20))

    model = tn.HyperparameterNode(
        "model",
        tn.SequentialNode(
            "seq",
            [tn.InputNode("x", shape=(None, 1, 60, 60)),
             # scaling the updates of the spatial transformer
             # seems to be very helpful, to allow the clasification
             # net to learn what to look for, before prematurely
             # looking
             tn.UpdateScaleNode(
                 "st_update_scale",
                 st_node,
                 update_scale_factor=update_scale_factor),
             tn.Conv2DWithBiasNode("conv1"),
             tn.MaxPool2DNode("mp1"),
             bn.NoScaleBatchNormalizationNode("bn1"),
             tn.ReLUNode("relu1"),
             tn.Conv2DWithBiasNode("conv2"),
             tn.MaxPool2DNode("mp2"),
             bn.NoScaleBatchNormalizationNode("bn2"),
             tn.ReLUNode("relu2"),
             tn.GaussianDropoutNode("do1"),
             tn.DenseNode("fc1"),
             bn.NoScaleBatchNormalizationNode("bn3"),
             tn.ReLUNode("relu3"),
             tn.DenseNode("fc2", num_units=10),
             tn.SoftmaxNode("pred"),
             ]),
        num_filters=32,
        filter_size=(3, 3),
        pool_size=(2, 2),
        num_units=256,
        dropout_probability=0.5,
        inits=[treeano.inits.HeUniformInit()],
        bn_update_moving_stats=True,
    )

    with_updates = tn.HyperparameterNode(
        "with_updates",
        tn.AdamNode(
            "adam",
            {"subtree": model,
             "cost": tn.TotalCostNode("cost", {
                 "pred": tn.ReferenceNode("pred_ref", reference="model"),
                 "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             )}),
        cost_function=treeano.utils.categorical_crossentropy_i32,
        learning_rate=2e-3,
    )
    network = with_updates.network()
    network.build()  # build eagerly to share weights
    return network
Beispiel #14
0
          [tn.SequentialNode(
              "y_vars",
              [tn.DenseNode("fc_y", num_units=10),
               tn.SoftmaxNode("y_pred"),
               tn.AuxiliaryCostNode(
                   "classification_cost",
                   {"target": tn.InputNode("y",
                                           shape=(None,),
                                           dtype="int32")},
                   cost_function=treeano.utils.categorical_crossentropy_i32)]),
           tn.SequentialNode(
               "z_vars",
               [tn.DenseNode("fc_z", num_units=LATENT_SIZE),
                tn.AuxiliaryCostNode(
                    "xcov_cost",
                    {"target": tn.ReferenceNode("y_ref",
                                                reference="y_pred")},
                    cost_function=cross_covariance)])],
          axis=1),
      tn.DenseNode("fc3"),
      tn.ReLUNode("relu3"),
      tn.DenseNode("fc4"),
      tn.ReLUNode("relu4"),
      tn.DenseNode("reconstruction", num_units=28 * 28),
      tn.TotalCostNode(
          "cost",
          {"pred": tn.IdentityNode("recon_id"),
           "target": tn.ReferenceNode("in_ref", reference="x")},
          cost_function=treeano.utils.squared_error),
      tn.MultiplyConstantNode("mul_reconstruction_error", value=0.1),
      tn.InputElementwiseSumNode("total_cost")]),
 num_units=512,
Beispiel #15
0
def test_hyperparameter_node_serialization():
    tn.check_serialization(tn.HyperparameterNode("a", tn.ReferenceNode("b")))
Beispiel #16
0
                               }, {
                                   "from": "sigma",
                                   "to": "REINFORCE",
                                   "to_key": "sigma"
                               }, {
                                   "from": "reward",
                                   "to": "REINFORCE",
                                   "to_key": "reward"
                               }, {
                                   "from": "sampled",
                                   "to": "REINFORCE",
                                   "to_key": "sampled"
                               }, {
                                   "from": "REINFORCE"
                               }]])

network = tn.AdamNode("adam", {
    "subtree": graph,
    "cost": tn.ReferenceNode("cost", reference="REINFORCE")
},
                      learning_rate=0.1).network()
fn = network.function([], ["graph", "mu"], include_updates=True)

mus = []
for i in range(1000):
    _, mu = fn()
    print("Iter:", i, "Predicted constant:", mu)
    mus.append(mu)

print("MSE from optimal constant:", np.mean((np.array(mus) - 3.5)**2))
Beispiel #17
0
def test_reference_node_serialization():
    tn.check_serialization(tn.ReferenceNode("a"))
    tn.check_serialization(tn.ReferenceNode("a", reference="bar"))