Exemple #1
0
def test_dynamic_concat():
    seq1 = [[1, 2], [3, 4]]
    seq2 = [[1, 2, 3], [4, 5, 6]]

    n = 10
    m = 4

    inputs = tx.Input(seq2, shape=[None, None], dtype=tf.int32, constant=False)
    inputs2 = tx.Input(seq2, dtype=tf.int32, constant=True)

    lookup = tx.Lookup(inputs, seq_size=None, embedding_shape=[n, m])
    lookup2 = tx.Lookup(inputs2, seq_size=3, embedding_shape=[n, m])
    concat1 = lookup.as_concat()
    concat2 = lookup2.as_concat()

    assert concat1.n_units is None
    assert concat2.n_units is not None

    concat3 = tx.SeqConcat(lookup, time_major=False)
    concat4 = tx.SeqConcat(lookup, seq_size=3, time_major=False)
    assert tx.shape_equal(concat4.shape, (None, 3 * 4))

    c1, c2 = concat1(), concat3()
    assert tx.tensor_equal(c1, c2)
    assert concat3.n_units is None
    assert concat4.n_units == 3 * lookup.n_units

    inputs.value = seq1
    l1 = lookup()
    inputs.value = seq2
    l2 = lookup()

    assert np.shape(l1)[-1] == m
    assert np.shape(l2)[-1] == m
Exemple #2
0
def test_coupled_gate():
    vocab_size = 4
    n_features = 3
    seq_size = 2

    inputs = tx.Input(init_value=np.array([[2, 0], [1, 2]]),
                      n_units=seq_size,
                      dtype=tf.int32,
                      constant=True)

    features1 = tx.Lookup(inputs,
                          seq_size,
                          embedding_shape=[vocab_size,
                                           n_features]).as_concat()
    features2 = tx.Lookup(inputs,
                          seq_size,
                          embedding_shape=[vocab_size,
                                           n_features]).as_concat()
    gate_w = tx.Linear(features1, seq_size, add_bias=True)
    coupled_gate = tx.CoupledGate(features1, features2, gate_w)

    sp_features1 = tx.ToSparse(features1)
    assert tx.tensor_equal(tf.sparse.to_dense(sp_features1()), features1())

    sp_gate = tx.CoupledGate(sp_features1, features2, gate_w)
    print(sp_gate())
    print(sp_gate.shape)
    # coupled_gate2 = coupled_gate.reuse_with(sp_features1, features2)

    r1 = coupled_gate()
Exemple #3
0
def test_lookup_sequence_transform():
    vocab_size = 4
    embed_dim = 2
    seq_size = 2

    inputs = tx.Input(n_units=seq_size, dtype=tf.int32)
    input_data = np.array([[2, 0], [1, 2], [0, 2]])
    lookup = tx.Lookup(inputs,
                       seq_size=seq_size,
                       embedding_shape=[vocab_size, embed_dim],
                       add_bias=True)
    concat_lookup = lookup.as_concat()
    seq_lookup = lookup.permute_batch_time()

    assert hasattr(lookup, "seq_size")

    inputs.value = input_data

    v1 = lookup()
    v2 = concat_lookup()
    v3 = seq_lookup()

    assert np.shape(v1) == (np.shape(input_data)[0], seq_size, embed_dim)
    assert np.shape(v2) == (np.shape(input_data)[0], seq_size * embed_dim)

    assert np.shape(v3) == (seq_size, np.shape(input_data)[0], embed_dim)
    assert tx.tensor_equal(v1[:, 0], v3[0])
def test_conv1d():
    n_features = 3
    embed_size = 128
    seq_size = 3
    batch_size = 2

    inputs = tx.Constant(np.random.random([batch_size, seq_size]),
                         n_units=seq_size,
                         dtype=tf.int32)
    emb = tx.Lookup(inputs,
                    seq_size=seq_size,
                    embedding_shape=[n_features, embed_size])
    seq = emb()

    n_units = 100
    filter_size = 4
    cnn = tf.keras.layers.Conv1D(filters=n_units,
                                 kernel_size=filter_size,
                                 padding='same')

    res = cnn(seq)

    cnn2 = tx.Conv1D(emb, n_units=100, filter_size=filter_size)
    res2 = cnn2(seq)

    assert len(cnn.variables) == len(cnn.variables)

    cnn.kernel = cnn2.filters
    cnn.bias = cnn2.bias
    res3 = cnn(seq)

    assert not tx.tensor_equal(res, res2)
    assert tx.tensor_equal(res2, res3)
Exemple #5
0
def test_lookup_dynamic_sequence():
    seq1 = [[1, 2], [3, 4]]
    seq2 = [[1, 2, 3], [4, 5, 6]]

    n = 10
    h = 4

    inputs = tx.Input(dtype=tf.int32, constant=False)

    lookup = tx.Lookup(inputs, seq_size=None, embedding_shape=[n, h])
    assert tx.shape_equal(lookup.shape, (None, None, h))
    concat = lookup.as_concat()
    inputs.value = seq1

    inputs.value = seq1
    inputs()

    inputs.value = seq2
    inputs()

    inputs.value = seq1
    l1 = lookup()
    inputs.value = seq2
    l2 = lookup()

    inputs.value = seq1
    c1 = concat()
    inputs.value = seq2
    c2 = concat()

    assert np.shape(l1)[-1] == h
    assert np.shape(l2)[-1] == h

    assert np.shape(c1)[-1] == h * 2
    assert np.shape(c2)[-1] == h * 3
Exemple #6
0
def test_as_concat_wrap():
    n = 10
    h = 4

    inputs = tx.Input(dtype=tf.int32, constant=False)
    lookup = tx.Lookup(inputs, seq_size=None, embedding_shape=[n, h])
    assert tx.shape_equal(lookup.shape, (None, None, h))
    concat = lookup.as_concat()
    assert tx.shape_equal(concat.shape, (None, None))

    lookup = tx.Lookup(inputs, seq_size=2, embedding_shape=[n, h])
    concat = lookup.as_concat()
    assert tx.shape_equal(concat.shape, (None, 2 * 4))

    seq1 = [[1, 2], [3, 4]]
    inputs.value = seq1
    concat_tensor = concat()
    assert concat_tensor.shape[-1] == concat.shape[-1]
Exemple #7
0
def test_lookup_sequence_mismatch():
    inputs = tx.Input(np.array([[2, 0], [1, 2]]), 2, dtype=tf.int64)
    lookup = tx.Lookup(inputs,
                       None,
                       embedding_shape=[2, 10],
                       batch_size=None,
                       batch_padding=True)

    assert lookup.shape.is_compatible_with(lookup().shape)

    lookup = tx.Lookup(inputs,
                       1,
                       embedding_shape=[2, 10],
                       batch_size=None,
                       batch_padding=True)

    # not validating seq_len differing from input seq_len
    assert lookup.batch_size is None
    assert lookup.shape.is_compatible_with(lookup().shape)
Exemple #8
0
def test_lookup_config():
    inputs = tx.Input(np.array([[2, 0], [1, 2]]), 2, dtype=tf.int64)
    lookup = tx.Lookup(inputs,
                       None,
                       embedding_shape=[2, 10],
                       batch_size=None,
                       batch_padding=True)

    assert lookup.config['embedding_shape'] == [2, 10]
    assert lookup.config['batch_size'] is None
    assert lookup.config['batch_padding'] is True
    assert lookup.config['seq_size'] is None
Exemple #9
0
def test_lookup_sequence_sparse():
    input_dim = 10
    embed_dim = 3
    seq_size = 2
    batch_size = 3

    sparse_input = tf.SparseTensor([[0, 2], [1, 0], [2, 1]], [1, 1, 1],
                                   [3, input_dim])
    sparse_input_1d = tf.SparseTensor([[2], [0], [1]], [1, 1, 1], [input_dim])
    tensor_input = tx.Constant(sparse_input, input_dim)
    tensor_input_1d = tx.Constant(sparse_input_1d, input_dim)

    lookup = tx.Lookup(tensor_input,
                       seq_size,
                       embedding_shape=[input_dim, embed_dim],
                       batch_size=batch_size,
                       batch_padding=False)

    lookup_padding = tx.Lookup(tensor_input,
                               seq_size,
                               embedding_shape=[input_dim, embed_dim],
                               batch_size=batch_size,
                               batch_padding=True)

    lookup_1d = tx.Lookup(tensor_input_1d,
                          seq_size,
                          embedding_shape=[input_dim, embed_dim],
                          batch_size=batch_size,
                          batch_padding=True)

    result = lookup()
    result_padding = lookup_padding()
    result_1d = lookup_1d()

    assert np.shape(result) == (2, seq_size, embed_dim)
    assert np.shape(result_padding) == (batch_size, seq_size, embed_dim)
    assert np.shape(result_1d) == (batch_size, seq_size, embed_dim)
Exemple #10
0
def test_multihead_attention():
    """
    TODO check causality

    """
    n_features = 3
    embed_size = 128
    seq_size = 3
    batch_size = 2
    n_heads = 8

    inputs = tx.Constant(np.random.random([batch_size, seq_size]),
                         n_units=seq_size,
                         dtype=tf.int32)
    emb = tx.Lookup(inputs,
                    seq_size=seq_size,
                    embedding_shape=[n_features, embed_size])

    attention = tx.MHAttention(query=emb,
                               key=emb,
                               value=emb,
                               n_units=embed_size,
                               n_heads=n_heads,
                               causality=False,
                               attention_dropout=0.1,
                               regularized=False)

    assert len(attention.inputs) == 3

    # 3 "kernels" + bias
    assert len(attention.variables) == 3

    attention_reg = attention.reuse_with(emb, emb, emb, regularized=True)
    attention_2 = attention.reuse_with(emb, emb, emb, regularized=False)
    attention_causal = attention.reuse_with(emb, emb, emb, causality=True)

    attention_causal()

    result = attention()
    result_reg = attention_reg()
    result2 = attention_2()

    assert tx.same_shape(result, result_reg)
    assert tx.tensor_equal(result, result2)

    vars1 = map(lambda v: v.ref(), attention.variables)
    vars2 = map(lambda v: v.ref(), attention_2.variables)

    assert set(vars1) == set(vars2)
Exemple #11
0
def test_lookup_sequence_bias():
    vocab_size = 4
    n_features = 3
    seq_size = 2

    inputs = tx.Input(n_units=seq_size, dtype=tf.int32)
    input_data = np.array([[2, 0], [1, 2], [0, 2]])
    lookup = tx.Lookup(input_layer=inputs,
                       seq_size=seq_size,
                       embedding_shape=[vocab_size, n_features],
                       add_bias=True)

    inputs.value = input_data
    v1 = lookup()
    assert np.shape(v1) == (np.shape(input_data)[0], seq_size, n_features)
Exemple #12
0
def test_drop_lookup():
    """ Embedding Dropout
    TODO finish test
    """
    seq_size = 4
    vocab_size = 10
    embed_dim = 3
    input_data = tf.constant([[2, 0, 2, 0], [1, 2, 2, 3], [0, 3, 0, 2]])
    inputs = tx.Input(init_value=input_data, n_units=seq_size, dtype=tf.int32)
    lookup = tx.Lookup(inputs,
                       seq_size=seq_size,
                       embedding_shape=[vocab_size, embed_dim],
                       add_bias=True)

    tx.DropLookup(lookup, probability=0.5)
Exemple #13
0
def test_biRNN():
    # bidirectional RNN
    n_features = 5
    embed_size = 4
    hidden_dim = 3
    seq_size = 6
    batch_size = 2

    inputs = tx.Input(np.random.random([batch_size, seq_size]),
                      n_units=seq_size,
                      dtype=tf.int32)
    lookup = tx.Lookup(inputs,
                       seq_size=seq_size,
                       embedding_shape=[n_features, embed_size])
    seq = lookup.permute_batch_time()

    rnn_proto = tx.RNNCell.config(n_units=hidden_dim)
    rnn0 = tx.RNN(seq,
                  cell_config=rnn_proto,
                  stateful=False,
                  return_state=True)

    # because a stateful rnn0 has a variable layer as input as well
    rnn_m0 = tx.Module(inputs=rnn0.inputs, output=rnn0)

    rnn1 = rnn0.reuse_with(seq,
                           reverse=True,
                           stateful=False,
                           return_state=True)
    # this solves rnn output multiple tensors

    r01 = rnn_m0.compute(seq(), rnn0.previous_state[0]())
    rnn0.reset()
    r02 = rnn0()

    assert tx.tensor_equal(r01[0], r02[0])

    rnn0_0 = rnn0[0]
    rnn1_0 = rnn1[0]
    rnn0 = tx.Wrap(rnn0, wrap_fn=lambda y: y[0], n_units=rnn0.n_units)
    rnn1 = tx.Wrap(rnn1, wrap_fn=lambda y: y[0], n_units=rnn1.n_units)

    rnn0_tensor = rnn0()
    rnn1_tensor = rnn1()
    rnn0_0_tensor = rnn0_0()

    print(rnn0_tensor.shape)
    print(rnn0_0_tensor.shape)
def test_attention():
    n_features = 3
    embed_size = 8
    seq_size = 3
    batch_size = 2

    inputs = tx.Constant(np.random.random([batch_size, seq_size]),
                         n_units=seq_size,
                         dtype=tf.int32)
    emb = tx.Lookup(inputs,
                    seq_size=seq_size,
                    embedding_shape=[n_features, embed_size])
    seq = emb()

    # keras attention doesn't have multiple heads
    attention = Attention(use_scale=False)

    res = attention([seq, seq, seq])

    attention2 = tx.MHAttention(emb, emb, emb, n_units=embed_size, n_heads=1)
    assert len(attention2.variables) == 3

    attention2.wq = tx.Linear(emb,
                              n_units=None,
                              weights=tf.linalg.eye(embed_size, embed_size),
                              add_bias=False)
    attention2.wk = tx.Linear(emb,
                              n_units=None,
                              weights=tf.linalg.eye(embed_size, embed_size),
                              add_bias=False)
    attention2.wv = tx.Linear(emb,
                              n_units=None,
                              weights=tf.linalg.eye(embed_size, embed_size),
                              add_bias=False)

    assert tx.tensor_equal(attention2.wq(seq), seq)

    res2 = attention2()

    g = tx.Graph.build(inputs=emb, outputs=attention2)
    g = g.as_function(ord_inputs=emb, ord_outputs=attention2)

    res3 = g(seq)

    assert tx.tensor_equal(res, res2)
    assert tx.tensor_equal(res, res3)
Exemple #15
0
def test_stateful_rnn_layer():
    n_features = 5
    embed_size = 4
    hidden_dim = 3
    seq_size = 3
    batch_size = 2

    inputs = tx.Input(np.random.random([batch_size, seq_size]),
                      n_units=seq_size,
                      dtype=tf.int32)
    lookup = tx.Lookup(inputs,
                       seq_size=seq_size,
                       embedding_shape=[n_features, embed_size])
    seq = lookup.permute_batch_time()

    rnn_proto = tx.RNNCell.config(n_units=hidden_dim)

    rnn1 = tx.RNN(seq, cell_config=rnn_proto, stateful=True, return_state=True)
    lstm1 = tx.RNN(seq,
                   cell_config=tx.LSTMCell.config(n_units=hidden_dim),
                   stateful=True,
                   return_state=True)

    zero_state0 = [layer() for layer in rnn1.previous_state]

    assert len(zero_state0) == 1
    expected_state = tf.zeros([1, hidden_dim], dtype=tf.float32)
    assert tx.tensor_equal(zero_state0[0], expected_state)

    # import logging
    # logging.getLogger("tensorx").setLevel(logging.DEBUG)

    out1, state1 = rnn1()

    tx.Graph.build(inputs=None, outputs=lstm1)
    # out2, state2 = lstm1()
    lstm1()

    # state after single run
    # zero_state1 = [layer() for layer in ]
    zero_state1 = rnn1.previous_state[0]()
    assert tx.tensor_equal(zero_state1, state1)

    rnn1.reset()
    reset_state = rnn1.previous_state[0]()
    assert tx.tensor_equal(reset_state, zero_state0[0])
Exemple #16
0
def test_model_var_inputs():
    # wanted to test when our train graph has more inputs that do not need to be fed (e.g. variable state)
    n_features = 5
    embed_size = 4
    hidden_dim = 3
    seq_size = 3
    out_size = 2
    batch_size = 2

    x = tx.Input(np.random.random([batch_size, seq_size]),
                 n_units=seq_size,
                 dtype=tf.int32)
    y = tx.Input(np.random.random([batch_size, out_size]),
                 n_units=out_size,
                 dtype=tf.float32)
    lookup = tx.Lookup(x,
                       seq_size=seq_size,
                       embedding_shape=[n_features, embed_size])
    # seq = lookup.permute_batch_time()
    seq = tx.Transpose(lookup, [1, 0, 2])

    rnn1 = tx.RNN(seq, cell_config=tx.RNNCell.config(n_units=hidden_dim))
    y_ = tx.Linear(rnn1[seq_size - 1], n_units=out_size)

    # y_ = tx.Linear(tx.SeqConcat(lookup, seq_size=seq_size), n_units=out_size)

    # @tx.layer(n_units=2, dtype=tf.float32, name="loss")
    # def loss(pred, labels):
    #    return tx.mse(pred, labels)

    model = tx.Model(run_inputs=x,
                     run_outputs=y_,
                     train_inputs=[x, y],
                     train_outputs=y_,
                     train_loss=tx.MSE(y_, y))

    # model.draw("test.pdf")

    model.set_optimizer(tf.optimizers.SGD, lr=0.5)

    data1 = [[0, 1, 2], [2, 1, 0]]
    data2 = [[0., 1.], [1., 0.]]

    model.train_step(input_feed={x: data1, y: data2})
Exemple #17
0
def test_map_seq():
    n_features = 5
    embed_size = 4
    seq_size = 3
    batch_size = 2

    inputs = tx.Input(np.random.random([batch_size, seq_size]),
                      n_units=seq_size,
                      dtype=tf.int32)
    lookup = tx.Lookup(inputs,
                       seq_size=seq_size,
                       embedding_shape=[n_features, embed_size])
    seq = lookup.permute_batch_time()

    n_units = 2
    linear_fn = tx.Linear.config(n_units=n_units)
    assert tx.tensor_equal(tf.shape(seq()), [seq_size, batch_size, embed_size])

    seq_map = tx.SeqMap(seq, n_units=2, layer_config=linear_fn)
    assert tx.tensor_equal(tf.shape(seq_map), [seq_size, batch_size, n_units])
Exemple #18
0
def test_lookup_sequence_dense():
    input_dim = 4
    embed_dim = 3
    seq_size = 2
    batch_size = 3

    inputs = tx.Input(np.array([[2, 0], [1, 2]]), 2, dtype=tf.int64)
    tensor_input = tx.Input(tf.constant([2]), 1, dtype=tf.int64)

    lookup = tx.Lookup(inputs,
                       seq_size,
                       embedding_shape=[input_dim, embed_dim],
                       batch_size=batch_size,
                       batch_padding=True)

    lookup_from_tensor = lookup.reuse_with(tensor_input)

    v1 = lookup()
    v2 = lookup_from_tensor()

    assert np.shape(v1) == (batch_size, seq_size, embed_dim)
    assert np.shape(v2) == (batch_size, seq_size, embed_dim)
Exemple #19
0
def test_lookup_dynamic_sparse_sequence():
    """ Testing Sparse Inputs to Lookup with dynamic
    seq_len passed through Input layer that acts as
    a parameter (scalar, this is n_units = 0)
    """
    k = 8
    m = 3
    seq1 = tf.SparseTensor(indices=[[0, 1], [1, 2], [2, 3], [3, 4]],
                           values=[1, 2, 3, 4],
                           dense_shape=[4, k])
    seq2 = tf.SparseTensor(indices=[[0, 1], [1, 2], [2, 3], [3, 3], [4, 4],
                                    [5, 5]],
                           values=[1, 2, 3, 3, 4, 5],
                           dense_shape=[6, k])

    inputs = tx.Input(n_units=k, sparse=True, dtype=tf.int32, constant=False)
    seq_len = tx.Input(init_value=2, shape=[], constant=False)
    assert seq_len.n_units == 0

    lookup = tx.Lookup(inputs, seq_size=seq_len, embedding_shape=[k, m])
    # concat = lookup.as_concat()

    inputs.value = seq1
    inputs()
    # set seq_len to 4
    seq_len.value = 4
    lookup_4 = lookup()
    # (batch, seq_len, embed_dim)
    assert lookup_4.numpy().shape == (1, 4, m)

    # set seq len to 3
    inputs.value = seq2
    seq_len.value = 3
    lookup_4 = lookup()
    # (batch, seq_len, embed_dim)
    assert lookup_4.numpy().shape == (2, 3, 3)
Exemple #20
0
def test_lookup_sparse_padding():
    """ Sparse Lookup Padding
    Lookup adds padding if seq_size is greater than the max row indice
    in the input SparseTensor

    """
    input_dim = 6
    embed_dim = 4
    seq_size = 3

    sparse_input = tf.SparseTensor(indices=[[0, 1], [0, 3], [1, 0]],
                                   values=[1, 1, 1],
                                   dense_shape=[2, input_dim])
    sparse_input = tx.Constant(sparse_input, input_dim)

    lookup = tx.Lookup(sparse_input,
                       seq_size=seq_size,
                       embedding_shape=[input_dim, embed_dim],
                       batch_size=None,
                       batch_padding=False)

    result = lookup()
    assert tf.sparse.to_dense(sparse_input()).shape == (2, input_dim)
    assert tx.tensor_equal(result[0][-1], tf.zeros([embed_dim]))
Exemple #21
0
    def __init__(
        self,
        run_inputs,
        label_inputs,
        eval_label_input,
        ctx_size,
        k_dim,
        ri_tensor_input,
        embed_dim,
        h_dim,
        embed_init=tx.random_uniform(minval=-0.01, maxval=0.01),
        num_h=1,
        h_activation=tx.relu,
        h_init=tx.he_normal_init,
        use_dropout=False,
        embed_dropout=False,
        keep_prob=0.95,
        l2_loss=False,
        l2_loss_coef=1e-5,
        f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
        use_nce=False,
        nce_samples=2,
        nce_noise_amount=0.1,
        noise_input=None,
    ):

        self.embed_dim = embed_dim

        var_reg = []

        # ===============================================
        # RUN GRAPH
        # ===============================================

        with tf.name_scope("run"):

            feature_lookup = tx.Lookup(run_inputs,
                                       seq_size=ctx_size,
                                       lookup_shape=[k_dim, embed_dim],
                                       weight_init=embed_init,
                                       name="lookup")

            self.embeddings = feature_lookup
            var_reg.append(feature_lookup.weights)
            feature_lookup = feature_lookup.as_concat()
            # ===========================================================
            with tf.name_scope("cache_embeddings"):
                # ris = [sign_index.get_ri(sign_index.get_sign(i)) for i in range(len(sign_index))]
                # self.all_ris = ris_to_sp_tensor_value(ri_seq=ris,
                #                                      dim=sign_index.generator.dim,
                #                                      all_positive=not sign_index.generator.symmetric)

                all_embeddings = tx.Linear(
                    ri_tensor_input,
                    n_units=self.embed_dim,
                    shared_weights=self.embeddings.weights,
                    bias=False,
                    name='all_features')

                # caches all embedding computation for run/eval
                self.all_embeddings = tx.VariableLayer(all_embeddings,
                                                       trainable=False)
            # ===========================================================
            last_layer = feature_lookup
            h_layers = []
            for i in range(num_h):
                hi = tx.FC(last_layer,
                           n_units=h_dim,
                           activation=h_activation,
                           weight_init=h_init,
                           name="h_{i}".format(i=i))
                h_layers.append(hi)
                last_layer = hi
                var_reg.append(hi.linear.weights)

            self.h_layers = h_layers

            # feature prediction for Energy-Based Model

            f_prediction = tx.Linear(last_layer,
                                     embed_dim,
                                     f_init,
                                     bias=True,
                                     name="f_predict")
            var_reg.append(f_prediction.weights)

            # RI DECODING ===============================================
            # shape is (?,?) because batch size is unknown and vocab size is unknown
            # when we build the graph
            run_logits = tx.Linear(f_prediction,
                                   n_units=None,
                                   shared_weights=self.all_embeddings.variable,
                                   transpose_weights=True,
                                   bias=False,
                                   name="logits")

            # ===========================================================
            embed_prob = tx.Activation(run_logits,
                                       tx.softmax,
                                       name="run_output")

        # ===============================================
        # TRAIN GRAPH
        # ===============================================
        with tf.name_scope("train"):
            if use_dropout and embed_dropout:
                feature_lookup = feature_lookup.reuse_with(run_inputs)
                last_layer = tx.Dropout(feature_lookup, probability=keep_prob)
            else:
                last_layer = feature_lookup

            # add dropout between each layer
            for layer in h_layers:
                h = layer.reuse_with(last_layer)
                if use_dropout:
                    h = tx.Dropout(h, probability=keep_prob)
                last_layer = h

            f_prediction = f_prediction.reuse_with(last_layer)

            train_logits = run_logits.reuse_with(f_prediction,
                                                 name="train_logits")
            train_embed_prob = tx.Activation(train_logits,
                                             tx.softmax,
                                             name="train_output")

            #  convert labels to random indices
            model_prediction = f_prediction.tensor

            if use_nce:
                train_loss = tx.sparse_cnce_loss(
                    label_features=label_inputs.tensor,
                    noise_features=noise_input.tensor,
                    model_prediction=model_prediction,
                    weights=feature_lookup.weights,
                    num_samples=nce_samples,
                    noise_ratio=nce_noise_amount)
            else:
                one_hot_dense = tx.dense_one_hot(
                    column_indices=label_inputs[0].tensor,
                    num_cols=label_inputs[1].tensor)
                train_loss = tx.categorical_cross_entropy(
                    one_hot_dense, train_logits.tensor)

                train_loss = tf.reduce_mean(train_loss)

            if l2_loss:
                losses = [tf.nn.l2_loss(var) for var in var_reg]
                train_loss = train_loss + l2_loss_coef * tf.add_n(losses)

        # ===============================================
        # EVAL GRAPH
        # ===============================================
        with tf.name_scope("eval"):
            one_hot_dense = tx.dense_one_hot(
                column_indices=eval_label_input[0].tensor,
                num_cols=label_inputs[1].tensor)
            train_loss = tx.categorical_cross_entropy(one_hot_dense,
                                                      train_logits.tensor)
            eval_loss = tx.categorical_cross_entropy(one_hot_dense,
                                                     run_logits.tensor)
            eval_loss = tf.reduce_mean(eval_loss)

        if use_nce:
            train_loss_in = [label_inputs, noise_input]
        else:
            train_loss_in = label_inputs

        # BUILD MODEL
        super().__init__(run_inputs=run_inputs,
                         run_outputs=embed_prob,
                         train_inputs=run_inputs,
                         train_outputs=train_embed_prob,
                         eval_inputs=run_inputs,
                         eval_outputs=embed_prob,
                         train_out_loss=train_loss,
                         train_in_loss=train_loss_in,
                         eval_out_score=eval_loss,
                         eval_in_score=eval_label_input,
                         update_inputs=ri_tensor_input)
Exemple #22
0
import numpy as np
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

n_features = 3
embed_size = 4
cell_units = 2
seq_size = 3
batch_size = 2

inputs = tx.TensorLayer(np.random.random([batch_size, seq_size]),
                        n_units=seq_size,
                        dtype=tf.int32)
lookup = tx.Lookup(inputs,
                   seq_size=seq_size,
                   lookup_shape=[n_features, embed_size])
seq = lookup.permute_batch_time()

# first step of a sequence
t1 = seq[0]

ks_cell = tf.keras.layers.LSTMCell(units=cell_units)
tf_cell = tf.nn.rnn_cell.LSTMCell(num_units=cell_units, state_is_tuple=True)
tx_cell = tx.LSTMCell(t1, n_units=cell_units)

kernel_w = [
    tx_cell.w_i.weights, tx_cell.w_c.weights, tx_cell.w_f.weights,
    tx_cell.w_o.weights
]
kernel_u = [
Exemple #23
0
generator = Generator(k, s)
ris = [generator.generate() for _ in range(vocab_size)]
ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s)

sp_values = ri_tensor.gather(flat_labels).to_sparse_tensor()
sp_indices = tx.sparse_indices(sp_values)

print(sp_values.get_shape())
print(tensor_util.constant_value_as_shape(sp_values.dense_shape))
print(tensor_util.constant_value(sp_values.dense_shape))
print(sp_values.dense_shape[-1].eval())
print(tf.shape(sp_values).eval())

lookup = tx.Lookup(tx.TensorLayer(sp_values),
                   seq_size=1,
                   lookup_shape=[k, embed_size])

linear = tx.Linear(tx.TensorLayer(sp_values),
                   n_units=k,
                   shared_weights=lookup.weights)

w = embedding_lookup_sparse(params=lookup.weights,
                            sp_ids=sp_indices,
                            sp_weights=sp_values,
                            combiner="sum",
                            partition_strategy="mod")

tf.global_variables_initializer().run()

np.testing.assert_array_equal(w.eval(), tx.Flatten(lookup).eval())
import tensorflow as tf
import tensorx as tx
from deepsign.models.nrp import RandomIndexTensor
from deepsign.rp.ri import Generator, RandomIndex
import numpy as np

sess = tf.InteractiveSession()

vocab_size = 8
k = 6
s = 2
emebd = 3

generator = Generator(k, s)
ris = [generator.generate() for _ in range(vocab_size)]
ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s)
ri_input = ri_tensor.gather([[0, 1, 0], [1, 2, 0]])

sp = ri_input.to_sparse_tensor()
sp = tx.TensorLayer(sp, k)
print(sp.tensor.eval())

embed = tx.Lookup(sp, seq_size=3, lookup_shape=[k, 3])

tf.global_variables_initializer().run()

print(np.shape(embed.tensor.eval()))
Exemple #25
0
    def __init__(self,
                 inputs,
                 labels,
                 vocab_size,
                 embed_dim,
                 h_dim,
                 embed_init=tx.zeros_init(),
                 logit_init=tx.glorot_uniform(),
                 num_h=1,
                 h_activation=tx.tanh,
                 h_init=tx.glorot_uniform(),
                 w_dropconnect=None,
                 u_dropconnect=None,
                 r_dropout=0.4,
                 y_dropout=0.4,
                 embed_dropout=0.3,
                 other_dropout=0.3,
                 l2_loss=False,
                 l2_weight=1e-5,
                 use_f_predict=False,
                 f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 embed_share=False,
                 logit_bias=False,
                 use_nce=False,
                 nce_samples=10,
                 skip_connections=False):
        if not isinstance(inputs, tx.Input):
            raise TypeError("inputs must be an Input layer")
        self.inputs = inputs
        self.labels = labels
        if not isinstance(labels, tx.Input):
            raise TypeError("labels must be an Input layer")

        if inputs.dtype != tf.int32 and inputs.dtype != tf.int64:
            raise TypeError(
                "Invalid dtype for input: expected int32 or int64, got {}".
                format(inputs.dtype))

        if num_h < 0:
            raise ValueError("num hidden should be >= 0")

        # ===============================================
        # RUN GRAPH
        # ===============================================
        var_reg = []

        with tf.name_scope("run"):
            # feature lookup

            embeddings = tx.Lookup(inputs,
                                   seq_size=None,
                                   lookup_shape=[vocab_size, embed_dim],
                                   weight_init=embed_init)
            var_reg.append(embeddings.weights)
            feature_lookup = embeddings.permute_batch_time()

            last_layer = feature_lookup

            cell_proto = tx.LSTMCell.proto(
                n_units=h_dim,
                activation=h_activation,
                gate_activation=tx.hard_sigmoid,
                w_init=h_init,
                u_init=h_init,
                w_dropconnect=w_dropconnect,
                u_dropconnect=u_dropconnect,
                r_dropout=r_dropout,
                x_dropout=None,
                y_dropout=y_dropout,
                regularized=False,
                name="cell",
            )

            lstm_layers = []
            for i in range(num_h):
                lstm_layer = tx.RNN(last_layer,
                                    cell_proto=cell_proto,
                                    regularized=False,
                                    stateful=True,
                                    name="LSTM_{}".format(i + 1))

                lstm_layers.append(lstm_layer)

                var_reg += [wi.weights for wi in lstm_layer.cell.w]
                var_reg += [ui.weights for ui in lstm_layer.cell.u]

                last_layer = lstm_layer

            # last time step is the state used to make the prediction
            # last_layer = tx.Reshape(last_layer, [-1, h_dim])

            # TODO this is not consistent with locked dropout for the last layer
            # where the same mask should be applied across time steps
            # to do this I need either y_dropout to be available or some sort of map
            # operation I can use with layers outputting 3D tensors
            # something equivalent to https://keras.io/layers/wrappers/ which applies
            # a layer to every temporal slice of an input. They implement this the same way
            # they implement an RNN

            # feature prediction for Energy-Based Model
            if use_f_predict:
                last_layer = tx.Linear(last_layer,
                                       embed_dim,
                                       f_init,
                                       add_bias=True,
                                       name="f_predict")
                # proto = tx.GRUCell.proto(n_units=embed_dim,
                #                          activation=h_activation,
                #                          gate_activation=tx.hard_sigmoid,
                #                          w_init=h_init,
                #                          u_init=h_init,
                #                          w_dropconnect=w_dropconnect,
                #                          u_dropconnect=u_dropconnect,
                #                          r_dropout=r_dropout,
                #                          x_dropout=None,
                #                          y_dropout=y_dropout,
                #                          regularized=False)
                # last_layer1 = tx.RNN(last_layer, cell_proto=proto, regularized=False, stateful=False)
                # last_layer2 = last_layer1.reuse_with(last_layer, reverse=True)
                # last_layer = tx.Add(last_layer1, last_layer2)
                # last_layer = tx.Module(last_layer, last_layer)
                var_reg += last_layer.variables
                # var_reg.append(last_layer.weights)
                f_predict = last_layer

            shared_weights = feature_lookup.weights if embed_share else None
            transpose_weights = embed_share
            logit_init = logit_init if not embed_share else None
            run_logits = tx.Linear(last_layer,
                                   n_units=vocab_size,
                                   weight_init=logit_init,
                                   shared_weights=shared_weights,
                                   transpose_weights=transpose_weights,
                                   add_bias=logit_bias,
                                   name="logits")

            if not embed_share:
                var_reg.append(run_logits.weights)

            run_output = tx.Activation(run_logits,
                                       tx.softmax,
                                       name="run_output")

            # ===============================================
            # TRAIN GRAPH
            # ===============================================
            with tf.name_scope("train"):
                embeddings = embeddings.reuse_with(inputs)
                feature_lookup = embeddings.permute_batch_time()

                if embed_dropout:
                    feature_lookup = tx.Dropout(feature_lookup,
                                                probability=embed_dropout,
                                                name="drop_features")

                last_layer = feature_lookup

                for i in range(num_h):
                    lstm_layer = lstm_layers[i].reuse_with(last_layer,
                                                           regularized=True)
                    last_layer = lstm_layer

                # last_layer = tx.Reshape(last_layer, [-1, h_dim])

                # feature prediction for Energy-Based Model
                if use_f_predict:
                    # last_layer = f_predict.reuse_with(last_layer)
                    last_layer = f_predict.reuse_with(last_layer,
                                                      regularized=True)

                last_layer = tx.Dropout(last_layer,
                                        probability=other_dropout,
                                        locked=False)

                train_logits = run_logits.reuse_with(last_layer,
                                                     name="train_logits")

                train_output = tx.Activation(train_logits,
                                             tx.softmax,
                                             name="run_output")

            def categorical_loss(labels, logits):
                # labels come as a batch of classes [[1,2],[3,4]] -> [1,3,2,4] time steps are ordered to match logits
                labels = tx.Transpose(labels)
                labels = tx.Reshape(labels, [-1])
                labels = tx.dense_one_hot(labels, num_cols=vocab_size)
                loss = tx.categorical_cross_entropy(labels=labels,
                                                    logits=logits)

                return tf.reduce_mean(loss)

            def nce_loss(labels, weights, bias, predict):
                noise = uniform_sampler(labels, 1, nce_samples, True,
                                        vocab_size)
                loss = tf.nn.nce_loss(weights=weights,
                                      biases=bias,
                                      inputs=predict,
                                      labels=labels,
                                      num_sampled=nce_samples,
                                      num_classes=vocab_size,
                                      num_true=1,
                                      sampled_values=noise)
                return tf.reduce_mean(loss)

            if use_nce:
                bias = tx.VariableLayer(var_shape=[vocab_size],
                                        name="nce_bias")

                # wraps a layer to expose the weights as a layer but with the layer as its input
                nce_weights = tx.WrapLayer(embeddings,
                                           n_units=embeddings.n_units,
                                           wrap_fn=lambda x: x.weights,
                                           layer_fn=True)
                train_loss = tx.LambdaLayer(labels,
                                            nce_weights,
                                            bias,
                                            last_layer,
                                            apply_fn=nce_loss,
                                            name="nce_loss")
            else:
                train_loss = tx.LambdaLayer(labels,
                                            train_logits,
                                            apply_fn=categorical_loss,
                                            name="train_loss")

            if l2_loss:
                l2_losses = [tf.nn.l2_loss(var) for var in var_reg]
                train_loss = tx.LambdaLayer(
                    train_loss,
                    apply_fn=lambda x: x + l2_weight * tf.add_n(l2_losses),
                    name="train_loss_l2")

        # ===============================================
        # EVAL GRAPH
        # ===============================================
        with tf.name_scope("eval"):
            eval_loss = tx.LambdaLayer(labels,
                                       run_logits,
                                       apply_fn=categorical_loss,
                                       name="eval_loss")

        self.stateful_layers = lstm_layers
        # BUILD MODEL
        super().__init__(run_outputs=run_output,
                         run_inputs=inputs,
                         train_inputs=[inputs, labels],
                         train_outputs=train_output,
                         train_loss=train_loss,
                         eval_inputs=[inputs, labels],
                         eval_outputs=run_output,
                         eval_score=eval_loss)
print([vocab[w] for w in vocab.keys()])
ri_dict = {vocab[word]: generator.generate() for word in vocab.keys()}

tokens = [vocab[w] for w in tokens]
data_it = window_it(tokens, seq_size)
data_it = batch_it(data_it, batch_size)

vocab_tensor = [ri_dict[i] for i in range(len(vocab))]
sp_ri = deepsign.data.transform.ris_to_sp_tensor_value(vocab_tensor, dim=k)

inputs = tx.Input(n_units=2)
ri_inputs = tx.gather_sparse(sp_ri, inputs.tensor)
ri_inputs = tx.TensorLayer(ri_inputs, k)

embed = tx.Lookup(ri_inputs, seq_size, [k, embed_dim])

# logits: take the embeddings and get the features for all random indexes

ri_layer = tx.TensorLayer(sp_ri, n_units=k)
logits = tx.Linear(input_layer=ri_layer,
                   n_units=embed_dim,
                   shared_weights=embed.weights,
                   bias=True)

single_input = tx.Input(1)
ri_input = tx.TensorLayer(tx.gather_sparse(sp_ri, single_input.tensor), k)

logit = logits.reuse_with(ri_input)

session = tf.InteractiveSession()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

input_size = 10000
var_size = 500
batch_size = 20
seq_size = 30

inputs = tf.constant(np.random.randint(0, 10, size=[batch_size, seq_size]), name="inputs")
targets = tf.constant(np.random.randint(0, 10, size=[batch_size * seq_size]), name="targets")
targets = tf.one_hot(targets, input_size)

inputs = tx.TensorLayer(inputs)

with jit_scope():
    with tf.name_scope("scope1"):
        lookup = tx.Lookup(inputs, seq_size=seq_size, lookup_shape=[input_size, var_size], name="lookup")
        seq = lookup.permute_batch_time()
        seq = tx.Reshape(seq, [-1, var_size], name="flatten")
        mul1 = tx.Linear(seq, input_size, name="test_logits")
        mul2 = tx.Linear(seq,
                         n_units=input_size,
                         shared_weights=lookup.weights,
                         transpose_weights=True,
                         name="shared_embeddings")

    with tf.name_scope("scope2"):
        mul1 = mul1.reuse_with(seq)
        mul2 = mul2.reuse_with(seq)

rnd_loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=targets, logits=mul1))
rnd_loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=targets, logits=mul2))
Exemple #28
0
                       name=name)


"""
Test staged implementation
"""
n_hidden = 20
embed_dim = 10
seq_size = 2
vocab_size = 10000
feature_shape = [vocab_size, embed_dim]

loss_inputs = tx.Input(1, dtype=tf.int32)
in_layer = tx.Input(seq_size, dtype=tf.int32)

lookup = tx.Lookup(in_layer, seq_size=seq_size, lookup_shape=feature_shape)
# [batch x seq_size * feature_shape[1]]

# reshape to [batch x seq_size x feature_shape[1]]
# lookup_to_seq =
# I was thinking that this reshape could be done automatically based on the input share of
# the tensor fed to the RNN cell layer
out = tx.WrapLayer(
    lookup,
    embed_dim,
    shape=[None, seq_size, embed_dim],
    wrap_fn=lambda tensor: tf.reshape(tensor, [-1, seq_size, embed_dim]))

out = tx.WrapLayer(out, embed_dim, wrap_fn=lambda tensor: tensor[0])
# apply rnn cell to single input batch
Exemple #29
0
def test_rnn_layer():
    n_features = 5
    embed_size = 4
    hidden_dim = 3
    seq_size = 3
    batch_size = 2

    inputs = tx.Input(np.random.random([batch_size, seq_size]),
                      n_units=seq_size,
                      dtype=tf.int32)
    lookup = tx.Lookup(inputs,
                       seq_size=seq_size,
                       embedding_shape=[n_features, embed_size])
    seq = lookup.permute_batch_time()

    ones_state = tf.ones([batch_size, hidden_dim])
    zero_state = (tf.zeros([batch_size, hidden_dim]))

    rnn_proto = tx.RNNCell.config(n_units=hidden_dim)

    rnn1 = tx.RNN(seq,
                  cell_config=rnn_proto,
                  previous_state=ones_state,
                  return_state=True)
    rnn2 = rnn1.reuse_with(seq)

    #  problem with RNN layer is that it uses modules that require
    #  all the params to output the right answer
    #  we need to supply the default values for the rest or all the inputs
    out1, last1 = rnn1()
    out2, last2 = rnn2()

    assert tx.tensor_equal(out1, out2)
    assert tx.tensor_equal(last1, last2)

    rnn3 = rnn1.reuse_with(seq, zero_state)
    rnn4 = rnn3.reuse_with(seq)
    rnn5 = rnn4.reuse_with(seq, ones_state)

    assert tx.tensor_equal(rnn2.previous_state, rnn1.previous_state)
    assert tx.tensor_equal(rnn3.previous_state, rnn4.previous_state)

    out3, last3 = rnn3()
    out4, last4 = rnn4()

    assert tx.tensor_equal(out3, out4)
    assert tx.tensor_equal(last3, last4)

    cell_state1 = rnn1.cell.previous_state[0]()
    cell_state2 = rnn2.cell.previous_state[0]()
    cell_state3 = rnn3.cell.previous_state[0]()
    cell_state4 = rnn4.cell.previous_state[0]()

    assert len(rnn1.cell.previous_state) == 1

    assert tx.tensor_equal(cell_state1, cell_state2)
    assert tx.tensor_equal(cell_state3, cell_state4)

    assert not tx.tensor_equal(out1, out3)

    out5, last5 = rnn5()

    assert tx.tensor_equal(out1, out5)
    assert tx.tensor_equal(last1, last5)

"""
Test staged implementation
"""
n_hidden = 20
embed_dim = 3
seq_size = 2
vocab_size = 10000
feature_shape = [vocab_size, embed_dim]

loss_inputs = tx.Input(1, dtype=tf.int32)
in_layer = tx.Input(seq_size, dtype=tf.int32)

lookup = tx.Lookup(in_layer,
                   seq_size=seq_size,
                   lookup_shape=feature_shape,
                   as_sequence=True)

lookup_flat = lookup.reuse_with(in_layer, as_sequence=False)

with tf.name_scope("rnn"):
    rnn1 = RNNCell(lookup[0], 4, name="rnn1")
    rnn2 = rnn1.reuse_with(lookup[1], state=rnn1, name="rnn2")

# setup optimizer
optimizer = tx.AMSGrad(learning_rate=0.01)

model = tx.Model(run_inputs=in_layer, run_outputs=[rnn1, rnn2])
runner = tx.ModelRunner(model)

runner.set_session(runtime_stats=True)