Ejemplo n.º 1
0
def test_graph_input_order():
    in1 = tx.Input(n_units=1, name="in1", dtype=tf.float32, constant=False)
    in2 = tx.Input(n_units=1, name="in2", dtype=tf.float32, constant=False)
    in12 = tx.Add(in1, in2)
    in3 = tx.Constant(tf.ones(shape=[1], dtype=tf.float32))
    in123 = tx.Add(in12, in3)
    graph = tx.Graph.build(inputs=None, outputs=in123)

    # print("\n")
    # for layer,p in graph.dependency_iter().items():
    #     print(layer.name)
    #     print(p)
    print(list(map(lambda x: x.name, graph.in_nodes)))
Ejemplo n.º 2
0
    def _build_graph(self, layer, previous_state):
        with layer_scope(self):

            if previous_state is None:
                input_batch = tf.shape(layer.tensor)[0]
                zero_state = tf.zeros([input_batch, self.n_units])
                self.previous_state = tx.TensorLayer(zero_state, self.n_units)

            if self.share_state_with is None:
                # determines the weight of the previous state
                # we could add the bias at the end but this way we just define a single bias for the r unit
                self.r_current_w = tx.Linear(layer,
                                             self.n_units,
                                             bias=True,
                                             weight_init=self.init,
                                             name="r_current_w")
                self.r_recurrent_w = tx.Linear(self.previous_state,
                                               self.n_units,
                                               bias=False,
                                               weight_init=self.recurrent_init,
                                               name="r_current_w")

                self.u_current_w = tx.Linear(layer,
                                             self.n_units,
                                             bias=True,
                                             weight_init=self.init,
                                             name="u_current_w")
                self.u_recurrent_w = tx.Linear(self.previous_state,
                                               self.n_units,
                                               bias=False,
                                               weight_init=self.recurrent_init,
                                               name="u_current_w")

                self.current_w = tx.Linear(layer,
                                           self.n_units,
                                           bias=True,
                                           weight_init=self.init,
                                           name="current_w")
                self.recurrent_w = tx.Linear(self.previous_state,
                                             self.n_units,
                                             bias=False,
                                             weight_init=self.recurrent_init,
                                             name="recurrent_w")

                # kernel_gate = tx.Activation()

                kernel_act = tx.Activation(kernel_linear, self.activation)
                self.kernel = tx.Compose(kernel_linear, kernel_act)

            else:
                self.kernel = self.share_state_with.kernel.reuse_with(layer)
                self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with(
                    self.previous_state)

            r_state = tx.Add(r_current_w, r_recurrent_w)
            r_state = tx.Bias(r_state)
            r_gate = tx.Activation(r_state, fn=tx.sigmoid, name="r_gate")

            # """Gated recurrent unit (GRU) with nunits cells."""
            return self.kernel.tensor + self.recurrent_kernel.tensor
Ejemplo n.º 3
0
def test_merge_add_shape():
    x1 = tx.Input([[2.]], n_units=1, name="x1")
    x2 = tx.Input([[2.]], n_units=1, name="x2")

    add = tx.Add(x1, x2)
    assert len(add.shape) == 2
    assert add.shape[-1] == 1
    assert add.shape[0] is None
Ejemplo n.º 4
0
def test_module_reuse_order():
    x1 = tx.Input([[2.]], n_units=1, name="x1")
    x2 = tx.Input([[2.]], n_units=1, name="x2")
    x3 = tx.Input([[1.]], n_units=1, name="x3")

    h = tx.Add(x2, x3)
    y = tx.Add(x1, h)

    module = tx.Module(inputs=[x1, x2, x3], output=y)

    x1_ = tx.Constant([[2.]], name="x1b")
    x2_ = tx.Constant([[2.]], name="x2b")

    m2 = module.reuse_with(x1_, x2_)

    m1 = module()
    m2 = m2()

    assert tx.tensor_equal(m1, m2)
Ejemplo n.º 5
0
def test_build_graph():
    x1 = tx.Input(n_units=1000, constant=False, dtype=tf.float32)
    x2 = tx.Input(init_value=tf.ones([1, 3]), dtype=tf.float32, constant=True)

    y10 = tx.Linear(x1, n_units=3)
    y11 = tx.Activation(y10)
    y1 = tx.Module(x1, y11)
    y2 = tx.Add(y1, x2)
    output = y2

    graph = Graph.build(inputs=None, outputs=[y1, y2])
    # module condenses 2 nodes so it's 4 and not 6
    assert len(graph.nodes) == 4

    @tf.function
    def simple_graph(in0):
        x1.value = in0
        return y2()

    simple_graph_2 = Graph.build(inputs=[x1, x2], outputs=y2)
    simple_graph_2 = tf.function(simple_graph_2)
    g = Graph.build(inputs=[x1, x2], outputs=y2)
    y2fn = y2.as_function()
    data = tf.ones([256, 1000])
    x1.value = data

    compiled_fn = g.as_function(ord_inputs=x1, ord_outputs=output)

    assert tx.tensor_equal(compiled_fn(data), y2fn())
    assert tx.tensor_equal(compiled_fn(data), simple_graph_2()[0])

    from timeit import timeit

    def update_run():
        x1.value = tf.random.uniform([256, 1000])
        return y2fn()

    n = 1000
    t_update_run = timeit(update_run, number=n)
    t_generated = timeit(lambda: compiled_fn(tf.random.uniform([256, 1000])),
                         number=n)
    t_compile_value_set = timeit(
        lambda: simple_graph(tf.random.uniform([256, 1000])), number=n)
    t_graph_call_tf = timeit(
        lambda: simple_graph_2(tf.random.uniform([256, 1000])), number=n)

    assert t_generated < t_update_run
    assert t_generated < t_compile_value_set
    assert t_generated < t_graph_call_tf
    assert t_update_run > t_compile_value_set

    o1 = compiled_fn(tf.random.uniform([256, 1000]))
    o2 = compiled_fn(tf.random.uniform([256, 1000]))
    assert not tx.tensor_equal(o1, o2)
Ejemplo n.º 6
0
def test_dependency_iter():
    """ Dependency iterator after adding leaves to the graph
    """

    x1 = tx.Input(n_units=2, name="x1", constant=False)
    x2 = tx.Input(n_units=2, name="x2", constant=False)

    y1 = tx.Linear(x2, 2, name="y1")
    y2 = tx.Linear(y1, 2, name="y2")
    y3 = tx.Linear(x1, 2, name="y3")

    graph = Graph.build(inputs=[x1, x2], outputs=[y2, y3])
    dep = graph.dependency_iter()
    dep_iter = list(dep)

    assert sorted(dep.values())

    assert dep_iter[0] is x1
    assert dep_iter[1] is x2
    assert y1 in dep_iter[-2:]
    assert y2 in dep_iter[-2:]

    # ANOTHER GRAPH
    x1 = tx.Input(n_units=1, name="x1")
    x2 = tx.Input(n_units=1, name="x2")
    x3 = tx.Input(n_units=1, name="x3")

    h = tx.Add(x1, x2, name="h")
    y = tx.Add(x3, h, name="y")

    g = Graph.build(inputs=None, outputs=y)

    priorities = g.dependency_iter()

    assert priorities[y] == (2, 0)
    assert priorities[x1] == (0, 1)
    assert priorities[y] > priorities[h]
Ejemplo n.º 7
0
def test_multi_output_graph():
    data1 = [[1., 1.]]
    data2 = [[2., 1.]]

    in1 = tx.Input(data1, 2, name="in1", constant=False)
    in2 = tx.Input(data2, 2, name="in2")

    linear1 = tx.Linear(in1, 1)
    linear2 = tx.Linear(tx.Add(in1, in2), 1)

    graph = tx.Graph.build(inputs=None, outputs=[linear1, linear2])

    result1 = graph()
    assert len(result1) == 2

    graph2 = tx.Graph.build(inputs=None, outputs=[linear2])
    result2 = graph2()
    assert len(result2) == 1
    assert tx.tensor_equal(result2[0], result1[-1])
Ejemplo n.º 8
0
def test_module_gate():
    """ Module + Gate Integration
    """
    x1 = tx.Input([[1, 1, 1, 1]], n_units=4, dtype=tf.float32)
    x2 = tx.Input([[1, 1]], n_units=2, dtype=tf.float32)
    x1 = tx.Add(x1, x1)

    gate = tx.Gate(input_layer=x1, gate_input=x2, gate_fn=tf.sigmoid)
    gate_module = tx.Module([x1, x2], gate)

    x3 = tx.Input([[1, 1, 1, 1]], n_units=4, dtype=tf.float32)
    x4 = tx.Input([[1, 1]], n_units=2, dtype=tf.float32)

    m2 = gate_module.reuse_with(x3, x4)

    result1 = gate_module()
    result2 = m2()
    result3 = gate_module.compute(x3, x4)

    assert tx.tensor_equal(result1, result2 * 2)
    assert tx.tensor_equal(result2, result3)
Ejemplo n.º 9
0
def test_graph_as_function():
    data = [[1., 2.]]

    in1 = tx.Input(n_units=1, name="in1", dtype=tf.float32, constant=False)
    in2 = tx.Input(n_units=1, name="in1", dtype=tf.float32, constant=False)
    in3 = tx.Constant(tf.ones(shape=[1], dtype=tf.float32))
    in12 = tx.Add(in1, in2, in3)
    graph = tx.Graph.build(inputs=[in1, in2, in3], outputs=in12)

    fn = graph.as_function_v2(ord_inputs=[in1, in2, in3],
                              stateful_inputs=True,
                              compile=False,
                              fn_name="add")
    # fn = graph.as_function_v2(stateful_inputs=True, compile=False)

    # TODO I should make sure the function converts the inputs to tensors
    # to make sure I don't pass lists around
    assert fn(np.array([[1.]], dtype=np.float),
              np.array([[1.]], dtype=np.float)) == [[3]]
    assert fn() == [[3]]
    assert fn([[1.]], [[2.]]) == [[4]]
    assert fn() == [[4]]
    assert fn([[2.]]) == [[5]]
Ejemplo n.º 10
0
    def __init__(self,
                 ctx_size,
                 vocab_size,
                 embed_dim,
                 embed_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 x_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 logit_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 embed_share=True,
                 use_gate=True,
                 use_hidden=False,
                 h_dim=100,
                 h_activation=tx.elu,
                 h_init=tx.he_normal_init(),
                 h_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 use_dropout=True,
                 embed_dropout=False,
                 keep_prob=0.95,
                 l2_loss=False,
                 l2_loss_coef=1e-5,
                 use_nce=False,
                 nce_samples=100):

        # GRAPH INPUTS
        run_inputs = tx.Input(ctx_size, dtype=tf.int32, name="input")
        loss_inputs = tx.Input(n_units=1, dtype=tf.int32, name="target")
        eval_inputs = loss_inputs

        # RUN GRAPH
        # if I create a scope here the Tensorboard graph will be a mess to read
        # because it groups everything by nested scope names
        # instead if I choose to create different scopes for train and eval only
        # the graph stays readable because it allows us to use the same names
        # under different scopes while still sharing variables
        var_reg = []
        with tf.name_scope("run"):
            feature_lookup = tx.Lookup(run_inputs,
                                       ctx_size, [vocab_size, embed_dim],
                                       embed_init,
                                       name="lookup")
            var_reg.append(feature_lookup.weights)
            feature_lookup = feature_lookup.as_concat()

            if use_gate or use_hidden:
                hl = tx.Linear(feature_lookup,
                               h_dim,
                               h_init,
                               bias=True,
                               name="h_linear")
                ha = tx.Activation(hl, h_activation, name="h_activation")
                h = tx.Compose(hl, ha, name="hidden")
                var_reg.append(hl.weights)

            features = feature_lookup
            if use_gate:
                gate_w = tx.Linear(h, ctx_size, bias=True)
                gate = tx.Gate(features, gate_input=gate_w)

                # gate = tx.Module([h, features], gate)

                features = gate
                var_reg.append(gate_w.weights)

            x_to_f = tx.Linear(features,
                               embed_dim,
                               x_to_f_init,
                               bias=True,
                               name="x_to_f")
            var_reg.append(x_to_f.weights)
            f_prediction = x_to_f

            if use_hidden:
                h_to_f = tx.Linear(h,
                                   embed_dim,
                                   h_to_f_init,
                                   bias=True,
                                   name="h_to_f")
                var_reg.append(h_to_f.weights)
                f_prediction = tx.Add(x_to_f, h_to_f, name="f_predicted")

            # RI DECODING ===============================================
            shared_weights = tf.transpose(
                feature_lookup.weights) if embed_share else None
            logit_init = logit_init if not embed_share else None
            run_logits = tx.Linear(f_prediction,
                                   vocab_size,
                                   logit_init,
                                   shared_weights,
                                   bias=True,
                                   name="logits")
            if not embed_share:
                var_reg.append(run_logits.weights)
            y_prob = tx.Activation(run_logits, tx.softmax)

        # TRAIN GRAPH ===============================================
        with tf.name_scope("train"):
            if use_dropout and embed_dropout:
                feature_lookup = feature_lookup.reuse_with(run_inputs)
                features = tx.Dropout(feature_lookup, probability=keep_prob)
            else:
                features = feature_lookup

            if use_gate or use_hidden:
                if use_dropout:
                    h = h.reuse_with(features)
                    h = tx.Dropout(h, probability=keep_prob)

                if use_gate:
                    gate_w = gate_w.reuse_with(h)
                    features = gate.reuse_with(layer=features,
                                               gate_input=gate_w)

                f_prediction = x_to_f.reuse_with(features)

                if use_hidden:
                    h_to_f = h_to_f.reuse_with(h)
                    if use_dropout:
                        h_to_f = tx.Dropout(h_to_f, probability=keep_prob)
                    f_prediction = tx.Add(f_prediction, h_to_f)
            else:
                f_prediction = f_prediction.reuse_with(features)

            train_logits = run_logits.reuse_with(f_prediction)

            if use_nce:
                # uniform gets good enough results if enough samples are used
                # but we can load the empirical unigram distribution
                # or learn the unigram distribution during training
                sampled_values = uniform_sampler(loss_inputs.tensor, 1,
                                                 nce_samples, True, vocab_size)
                train_loss = tf.nn.nce_loss(weights=tf.transpose(
                    train_logits.weights),
                                            biases=train_logits.bias,
                                            inputs=f_prediction.tensor,
                                            labels=loss_inputs.tensor,
                                            num_sampled=nce_samples,
                                            num_classes=vocab_size,
                                            num_true=1,
                                            sampled_values=sampled_values)
            else:
                one_hot = tx.dense_one_hot(column_indices=loss_inputs.tensor,
                                           num_cols=vocab_size)
                train_loss = tx.categorical_cross_entropy(
                    one_hot, train_logits.tensor)

            train_loss = tf.reduce_mean(train_loss)

            if l2_loss:
                losses = [tf.nn.l2_loss(var) for var in var_reg]
                train_loss = train_loss + l2_loss_coef * tf.add_n(losses)

        # EVAL GRAPH ===============================================
        with tf.name_scope("eval"):
            one_hot = tx.dense_one_hot(column_indices=eval_inputs.tensor,
                                       num_cols=vocab_size)
            eval_loss = tx.categorical_cross_entropy(one_hot,
                                                     run_logits.tensor)
            eval_loss = tf.reduce_mean(eval_loss)

        # SETUP MODEL CONTAINER ====================================
        super().__init__(run_inputs=run_inputs,
                         run_outputs=y_prob,
                         train_inputs=run_inputs,
                         train_outputs=y_prob,
                         eval_inputs=run_inputs,
                         eval_outputs=y_prob,
                         train_out_loss=train_loss,
                         train_in_loss=loss_inputs,
                         eval_out_score=eval_loss,
                         eval_in_score=eval_inputs)
Ejemplo n.º 11
0
    def __init__(self,
                 ctx_size,
                 vocab_size,
                 k_dim,
                 ri_tensor: RandomIndexTensor,
                 embed_dim,
                 embed_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 x_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 logit_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 embed_share=True,
                 logit_bias=False,
                 use_gate=True,
                 use_hidden=False,
                 h_dim=100,
                 h_activation=tx.elu,
                 h_init=tx.he_normal_init(),
                 h_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 use_dropout=True,
                 embed_dropout=False,
                 keep_prob=0.95,
                 l2_loss=False,
                 l2_loss_coef=1e-5):

        # GRAPH INPUTS
        run_inputs = tx.Input(ctx_size, dtype=tf.int32, name="input")
        loss_inputs = tx.Input(n_units=1, dtype=tf.int32, name="target")
        eval_inputs = loss_inputs

        # RUN GRAPH =====================================================
        var_reg = []
        with tf.name_scope("run"):
            # RI ENCODING ===============================================
            # convert ids to ris gather a set of random indexes based on the ids in a sequence

            # ri_layer = tx.TensorLayer(ri_tensor, n_units=k_dim)
            # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor)
            with tf.name_scope("ri_encode"):
                # used to compute logits
                if isinstance(ri_tensor, RandomIndexTensor):
                    ri_layer = tx.TensorLayer(ri_tensor.to_sparse_tensor(),
                                              k_dim)

                    ri_inputs = ri_tensor.gather(run_inputs.tensor)
                    ri_inputs = ri_inputs.to_sparse_tensor()
                    ri_inputs = tx.TensorLayer(ri_inputs, k_dim)
                else:
                    ri_layer = tx.TensorLayer(ri_tensor, k_dim)
                    ri_inputs = tx.gather_sparse(ri_layer.tensor,
                                                 run_inputs.tensor)
                    ri_inputs = tx.TensorLayer(ri_inputs, k_dim)

            # use those sparse indexes to lookup a set of features based on the ri values
            feature_lookup = tx.Lookup(ri_inputs,
                                       ctx_size, [k_dim, embed_dim],
                                       embed_init,
                                       name="lookup")
            var_reg.append(feature_lookup.weights)
            feature_lookup = feature_lookup.as_concat()
            # ===========================================================

            if use_gate or use_hidden:
                hl = tx.Linear(feature_lookup,
                               h_dim,
                               h_init,
                               bias=True,
                               name="h_linear")
                ha = tx.Activation(hl, h_activation, name="h_activation")
                h = tx.Compose(hl, ha, name="hidden")
                var_reg.append(hl.weights)

            features = feature_lookup
            if use_gate:
                features = tx.Gate(features, ctx_size, gate_input=h)
                gate = features
                var_reg.append(features.gate_weights)

            x_to_f = tx.Linear(features,
                               embed_dim,
                               x_to_f_init,
                               bias=True,
                               name="x_to_f")
            var_reg.append(x_to_f.weights)
            f_prediction = x_to_f

            if use_hidden:
                h_to_f = tx.Linear(h,
                                   embed_dim,
                                   h_to_f_init,
                                   bias=True,
                                   name="h_to_f")
                var_reg.append(h_to_f.weights)
                f_prediction = tx.Add(x_to_f, h_to_f, name="f_predicted")

            # RI DECODING ===============================================
            shared_weights = feature_lookup.weights if embed_share else None
            logit_init = logit_init if not embed_share else None
            # embedding feature vectors for all words: shape [vocab_size, embed_dim]
            # later, for NCE we don't need to get all the features

            all_embeddings = tx.Linear(ri_layer,
                                       embed_dim,
                                       logit_init,
                                       shared_weights,
                                       name="logits",
                                       bias=False)

            # dot product of f_predicted . all_embeddings with bias for each target word

            run_logits = tx.Linear(f_prediction,
                                   n_units=vocab_size,
                                   shared_weights=all_embeddings.tensor,
                                   transpose_weights=True,
                                   bias=logit_bias)

            if not embed_share:
                var_reg.append(all_embeddings.weights)

            # ===========================================================
            run_embed_prob = tx.Activation(run_logits, tx.softmax)

        # TRAIN GRAPH ===================================================
        with tf.name_scope("train"):
            if use_dropout and embed_dropout:
                feature_lookup = feature_lookup.reuse_with(ri_inputs)
                features = tx.Dropout(feature_lookup, probability=keep_prob)
            else:
                features = feature_lookup

            if use_gate or use_hidden:
                if use_dropout:
                    h = h.reuse_with(features)
                    h = tx.Dropout(h, probability=keep_prob)

                if use_gate:
                    features = gate.reuse_with(features, gate_input=h)

                f_prediction = x_to_f.reuse_with(features)

                if use_hidden:
                    h_to_f = h_to_f.reuse_with(h)
                    if use_dropout:
                        h_to_f = tx.Dropout(h_to_f, probability=keep_prob)
                    f_prediction = tx.Add(f_prediction, h_to_f)
            else:
                f_prediction = f_prediction.reuse_with(features)

            # we already define all_embeddings from which these logits are computed before so this should be ok
            train_logits = run_logits.reuse_with(f_prediction)

            train_embed_prob = tx.Activation(train_logits,
                                             tx.softmax,
                                             name="train_output")

            one_hot = tx.dense_one_hot(column_indices=loss_inputs.tensor,
                                       num_cols=vocab_size)
            train_loss = tx.categorical_cross_entropy(one_hot,
                                                      train_logits.tensor)

            train_loss = tf.reduce_mean(train_loss)

            if l2_loss:
                losses = [tf.nn.l2_loss(var) for var in var_reg]
                train_loss = train_loss + l2_loss_coef * tf.add_n(losses)

        # EVAL GRAPH ===============================================
        with tf.name_scope("eval"):
            one_hot = tx.dense_one_hot(column_indices=eval_inputs.tensor,
                                       num_cols=vocab_size)
            eval_loss = tx.categorical_cross_entropy(one_hot,
                                                     run_logits.tensor)
            eval_loss = tf.reduce_mean(eval_loss)

        # SETUP MODEL CONTAINER ====================================
        super().__init__(run_inputs=run_inputs,
                         run_outputs=run_embed_prob,
                         train_inputs=run_inputs,
                         train_outputs=train_embed_prob,
                         eval_inputs=run_inputs,
                         eval_outputs=run_embed_prob,
                         train_out_loss=train_loss,
                         train_in_loss=loss_inputs,
                         eval_out_score=eval_loss,
                         eval_in_score=eval_inputs)