Esempio n. 1
0
    def _build_graph(self, layer, previous_state):
        with layer_scope(self):

            if previous_state is None:
                input_batch = tf.shape(layer.tensor)[0]
                zero_state = tf.zeros([input_batch, self.n_units])
                self.previous_state = tx.TensorLayer(zero_state, self.n_units)

            if self.share_state_with is None:
                # determines the weight of the previous state
                # we could add the bias at the end but this way we just define a single bias for the r unit
                self.r_current_w = tx.Linear(layer,
                                             self.n_units,
                                             bias=True,
                                             weight_init=self.init,
                                             name="r_current_w")
                self.r_recurrent_w = tx.Linear(self.previous_state,
                                               self.n_units,
                                               bias=False,
                                               weight_init=self.recurrent_init,
                                               name="r_current_w")

                self.u_current_w = tx.Linear(layer,
                                             self.n_units,
                                             bias=True,
                                             weight_init=self.init,
                                             name="u_current_w")
                self.u_recurrent_w = tx.Linear(self.previous_state,
                                               self.n_units,
                                               bias=False,
                                               weight_init=self.recurrent_init,
                                               name="u_current_w")

                self.current_w = tx.Linear(layer,
                                           self.n_units,
                                           bias=True,
                                           weight_init=self.init,
                                           name="current_w")
                self.recurrent_w = tx.Linear(self.previous_state,
                                             self.n_units,
                                             bias=False,
                                             weight_init=self.recurrent_init,
                                             name="recurrent_w")

                # kernel_gate = tx.Activation()

                kernel_act = tx.Activation(kernel_linear, self.activation)
                self.kernel = tx.Compose(kernel_linear, kernel_act)

            else:
                self.kernel = self.share_state_with.kernel.reuse_with(layer)
                self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with(
                    self.previous_state)

            r_state = tx.Add(r_current_w, r_recurrent_w)
            r_state = tx.Bias(r_state)
            r_gate = tx.Activation(r_state, fn=tx.sigmoid, name="r_gate")

            # """Gated recurrent unit (GRU) with nunits cells."""
            return self.kernel.tensor + self.recurrent_kernel.tensor
Esempio n. 2
0
    def _build_graph(self, layer, previous_state):
        with layer_scope(self):

            if previous_state is None:
                input_batch = tf.shape(layer.tensor)[0]
                zero_state = tf.zeros([input_batch, self.n_units])
                self.previous_state = tx.TensorLayer(zero_state, self.n_units)

            if self.share_state_with is None:
                kernel_linear = tx.Linear(layer,
                                          self.n_units,
                                          bias=True,
                                          weight_init=self.init,
                                          name="linear_kernel")
                kernel_act = tx.Activation(kernel_linear, self.activation)
                self.kernel = tx.Compose([kernel_linear, kernel_act])

                self.recurrent_kernel = tx.Linear(
                    self.previous_state,
                    self.n_units,
                    bias=False,
                    weight_init=self.recurrent_init,
                    name="recurrent_kernel")
            else:
                self.kernel = self.share_state_with.kernel.reuse_with(layer)
                self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with(
                    self.previous_state)

            # TODO this might be wrong, I might need to couple the activation: act(kernel + recurrent + bias)
            # TODO it is wrong https://github.com/tensorflow/tensorflow/blob/r1.8/tensorflow/python/ops/rnn_cell_impl.py
            # """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
            return self.kernel.tensor + self.recurrent_kernel.tensor
Esempio n. 3
0
import tensorflow as tf
import tensorx as tx
from deepsign.models.nrp import RandomIndexTensor
from deepsign.rp.ri import Generator, RandomIndex
import numpy as np

sess = tf.InteractiveSession()

vocab_size = 8
k = 6
s = 2
emebd = 3

generator = Generator(k, s)
ris = [generator.generate() for _ in range(vocab_size)]
ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s)
ri_input = ri_tensor.gather([[0, 1, 0], [1, 2, 0]])

sp = ri_input.to_sparse_tensor()
sp = tx.TensorLayer(sp, k)
print(sp.tensor.eval())

embed = tx.Lookup(sp, seq_size=3, lookup_shape=[k, 3])

tf.global_variables_initializer().run()

print(np.shape(embed.tensor.eval()))
batch_size = 2
generator = Generator(k, s)

print([vocab[w] for w in vocab.keys()])
ri_dict = {vocab[word]: generator.generate() for word in vocab.keys()}

tokens = [vocab[w] for w in tokens]
data_it = window_it(tokens, seq_size)
data_it = batch_it(data_it, batch_size)

vocab_tensor = [ri_dict[i] for i in range(len(vocab))]
sp_ri = deepsign.data.transform.ris_to_sp_tensor_value(vocab_tensor, dim=k)

inputs = tx.Input(n_units=2)
ri_inputs = tx.gather_sparse(sp_ri, inputs.tensor)
ri_inputs = tx.TensorLayer(ri_inputs, k)

embed = tx.Lookup(ri_inputs, seq_size, [k, embed_dim])

# logits: take the embeddings and get the features for all random indexes

ri_layer = tx.TensorLayer(sp_ri, n_units=k)
logits = tx.Linear(input_layer=ri_layer,
                   n_units=embed_dim,
                   shared_weights=embed.weights,
                   bias=True)

single_input = tx.Input(1)
ri_input = tx.TensorLayer(tx.gather_sparse(sp_ri, single_input.tensor), k)

logit = logits.reuse_with(ri_input)
from tensorflow.contrib.compiler import xla

jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

input_size = 10000
var_size = 500
batch_size = 20
seq_size = 30

inputs = tf.constant(np.random.randint(0, 10, size=[batch_size, seq_size]), name="inputs")
targets = tf.constant(np.random.randint(0, 10, size=[batch_size * seq_size]), name="targets")
targets = tf.one_hot(targets, input_size)

inputs = tx.TensorLayer(inputs)

with jit_scope():
    with tf.name_scope("scope1"):
        lookup = tx.Lookup(inputs, seq_size=seq_size, lookup_shape=[input_size, var_size], name="lookup")
        seq = lookup.permute_batch_time()
        seq = tx.Reshape(seq, [-1, var_size], name="flatten")
        mul1 = tx.Linear(seq, input_size, name="test_logits")
        mul2 = tx.Linear(seq,
                         n_units=input_size,
                         shared_weights=lookup.weights,
                         transpose_weights=True,
                         name="shared_embeddings")

    with tf.name_scope("scope2"):
        mul1 = mul1.reuse_with(seq)
Esempio n. 6
0
# *************************************
generator = Generator(k, s)
ris = [generator.generate() for _ in range(vocab_size)]

ri_tensor = ris_to_sp_tensor_value(ris, k)
ri_tensor = tf.convert_to_tensor_or_sparse_tensor(ri_tensor)

# *************************************
#   DUMMY INPUT DATA
# *************************************
# batch of word sequence indices
ctx_size = 3
input_data = np.array([[0, 1, 2], [0, 2, 2], [1, 3, 5], [3, 0, 2]])

input_labels = tf.constant(np.array([[3], [1], [10], [25]], dtype=np.int64))
input_labels = tx.TensorLayer(input_labels, n_units=1)

input_layer = tx.TensorLayer(input_data, n_units=3, dtype=tf.int64)

ri_layer = tx.TensorLayer(ri_tensor, k)
ri_inputs = tx.gather_sparse(ri_layer.tensor, input_layer.tensor)
ri_inputs = tx.TensorLayer(ri_inputs, k)
lookup = tx.Lookup(ri_inputs,
                   ctx_size, [k, embed_size],
                   weight_init=tx.random_normal(0, 0.1),
                   name="lookup")
feature_predict = tx.Linear(lookup, embed_size, bias=True)

all_embeddings = tx.Linear(ri_layer,
                           embed_size,
                           shared_weights=lookup.weights,
Esempio n. 7
0
                   lookup_shape=feature_shape)
# [batch x seq_size * feature_shape[1]]

# reshape to [batch x seq_size x feature_shape[1]]
lookup_to_seq = tf.reshape(lookup.tensor, [-1, seq_size, embed_dim])

# type of rnn cell
cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(cell, lookup_to_seq, dtype=tf.float32)

val = tf.transpose(val, [1, 0, 2])

# last = tf.gather(val, int(val.get_shape()[0]) - 1)
last = val[-1]

lstm_out = tx.TensorLayer(last, n_hidden)
logits = tx.Linear(lstm_out, vocab_size, bias=True)
out = tx.Activation(logits, tx.softmax)

labels = tx.dense_one_hot(loss_inputs.tensor, vocab_size)
loss = tf.reduce_mean(tx.categorical_cross_entropy(labels=labels, logits=logits.tensor))

# setup optimizer
optimizer = tx.AMSGrad(learning_rate=0.01)

model = tx.Model(run_inputs=in_layer, run_outputs=out,
                 train_inputs=in_layer, train_outputs=out,
                 train_in_loss=loss_inputs, train_out_loss=loss,
                 eval_out_score=loss, eval_in_score=loss_inputs)

print(model.feedable_train())
Esempio n. 8
0
    def __init__(self,
                 ctx_size,
                 vocab_size,
                 k_dim,
                 s_active,
                 ri_tensor,
                 embed_dim,
                 h_dim,
                 embed_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 logit_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 num_h=1,
                 h_activation=tx.relu,
                 h_init=tx.he_normal_init,
                 use_dropout=False,
                 embed_dropout=False,
                 keep_prob=0.95,
                 l2_loss=False,
                 l2_loss_coef=1e-5,
                 f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 embed_share=True,
                 logit_bias=False,
                 use_nce=False,
                 nce_samples=100,
                 noise_level=0.1):

        run_inputs = tx.Input(ctx_size, dtype=tf.int32)
        loss_inputs = tx.Input(n_units=1, dtype=tf.int64)
        eval_inputs = loss_inputs

        if run_inputs.dtype != tf.int32 and run_inputs.dtype != tf.int64:
            raise TypeError(
                "Invalid dtype for input: expected int32 or int64, got {}".
                format(run_inputs.dtype))

        if num_h < 0:
            raise ValueError("num hidden should be >= 0")

        # ===============================================
        # RUN GRAPH
        # ===============================================
        var_reg = []

        with tf.name_scope("run"):
            # RI ENCODING ===============================================
            # convert ids to ris gather a set of random indexes based on the ids in a sequence
            # ri_layer = tx.TensorLayer(ri_tensor, n_units=k_dim)
            # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor)
            # ri_inputs = tx.TensorLayer(ri_inputs, n_units=k_dim)
            with tf.name_scope("ri_encode"):
                if isinstance(ri_tensor, RandomIndexTensor):
                    ri_tensor = ri_tensor
                    ri_layer = tx.TensorLayer(ri_tensor.to_sparse_tensor(),
                                              k_dim,
                                              shape=[vocab_size, k_dim])

                    ri_inputs = ri_tensor.gather(run_inputs.tensor)
                    ri_inputs = ri_inputs.to_sparse_tensor()
                    ri_inputs = tx.TensorLayer(
                        ri_inputs,
                        k_dim,
                        shape=[ri_inputs.get_shape()[0], k_dim])
                # ri_tensor is a sparse tensor
                else:
                    raise TypeError(
                        "please supply RandomIndexTensor instead of sparse Tensor"
                    )
                    # ri_layer = tx.TensorLayer(ri_tensor, k_dim)
                    # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor)
                    # ri_inputs = tx.TensorLayer(ri_inputs, k_dim)

            feature_lookup = tx.Lookup(ri_inputs,
                                       ctx_size, [k_dim, embed_dim],
                                       embed_init,
                                       name="lookup")
            self.embeddings = feature_lookup
            var_reg.append(feature_lookup.weights)
            feature_lookup = feature_lookup.as_concat()
            # ===========================================================

            last_layer = feature_lookup
            h_layers = []
            for i in range(num_h):
                h_i = tx.Linear(last_layer,
                                h_dim,
                                h_init,
                                bias=True,
                                name="h_{i}_linear".format(i=i))
                h_a = tx.Activation(h_i, h_activation)
                h = tx.Compose(h_i, h_a, name="h_{i}".format(i=i))
                h_layers.append(h)
                last_layer = h
                var_reg.append(h_i.weights)

            self.h_layers = h_layers

            # feature prediction for Energy-Based Model

            f_prediction = tx.Linear(last_layer,
                                     embed_dim,
                                     f_init,
                                     bias=True,
                                     name="f_predict")
            var_reg.append(f_prediction.weights)

            # RI DECODING ===============================================

            # Shared Embeddings
            if embed_share:
                shared_weights = feature_lookup.weights if embed_share else None
                logit_init = logit_init if not embed_share else None

                # ri_dense = tx.ToDense(ri_layer)
                all_embeddings = tx.Linear(ri_layer,
                                           embed_dim,
                                           logit_init,
                                           shared_weights,
                                           name="all_features",
                                           bias=False)

                # dot product of f_predicted . all_embeddings with bias for each target word
                run_logits = tx.Linear(f_prediction,
                                       vocab_size,
                                       shared_weights=all_embeddings.tensor,
                                       transpose_weights=True,
                                       bias=logit_bias,
                                       name="logits")
            else:
                run_logits = tx.Linear(f_prediction,
                                       vocab_size,
                                       bias=logit_bias,
                                       name="logits")

            if not embed_share:
                var_reg.append(run_logits.weights)
            # ===========================================================

            embed_prob = tx.Activation(run_logits,
                                       tx.softmax,
                                       name="run_output")

        # ===============================================
        # TRAIN GRAPH
        # ===============================================
        with tf.name_scope("train"):
            if use_dropout and embed_dropout:
                feature_lookup = feature_lookup.reuse_with(ri_inputs)
                last_layer = tx.Dropout(feature_lookup, probability=keep_prob)
            else:
                last_layer = feature_lookup

            # add dropout between each layer
            for layer in h_layers:
                h = layer.reuse_with(last_layer)
                if use_dropout:
                    h = tx.Dropout(h, probability=keep_prob)
                last_layer = h

            f_prediction = f_prediction.reuse_with(last_layer)

            train_logits = run_logits.reuse_with(f_prediction,
                                                 name="train_logits")
            train_embed_prob = tx.Activation(train_logits,
                                             tx.softmax,
                                             name="train_output")

            if use_nce:
                # labels
                labels = loss_inputs.tensor

                #  convert labels to random indices
                def labels_to_ri(x):
                    random_index_tensor = ri_tensor.gather(x)
                    sp_features = random_index_tensor.to_sparse_tensor()
                    return sp_features

                model_prediction = f_prediction.tensor

                train_loss = tx.sparse_cnce_loss(
                    label_features=labels,
                    model_prediction=model_prediction,
                    weights=feature_lookup.weights,
                    noise_ratio=noise_level,
                    num_samples=nce_samples,
                    labels_to_sparse_features=labels_to_ri)

            else:
                one_hot = tx.dense_one_hot(column_indices=loss_inputs.tensor,
                                           num_cols=vocab_size)
                train_loss = tx.categorical_cross_entropy(
                    one_hot, train_logits.tensor)

                train_loss = tf.reduce_mean(train_loss)

            if l2_loss:
                losses = [tf.nn.l2_loss(var) for var in var_reg]
                train_loss = train_loss + l2_loss_coef * tf.add_n(losses)

        # ===============================================
        # EVAL GRAPH
        # ===============================================
        with tf.name_scope("eval"):
            one_hot = tx.dense_one_hot(column_indices=eval_inputs.tensor,
                                       num_cols=vocab_size)
            eval_loss = tx.categorical_cross_entropy(one_hot,
                                                     run_logits.tensor)
            eval_loss = tf.reduce_mean(eval_loss)

        # BUILD MODEL
        super().__init__(run_inputs=run_inputs,
                         run_outputs=embed_prob,
                         train_inputs=run_inputs,
                         train_outputs=train_embed_prob,
                         eval_inputs=run_inputs,
                         eval_outputs=embed_prob,
                         train_out_loss=train_loss,
                         train_in_loss=loss_inputs,
                         eval_out_score=eval_loss,
                         eval_in_score=eval_inputs)
out2 = out2.stack()
""" ********************************************************************************************
"""

ta_output = tf.TensorArray(dtype=tf.float32,
                           size=seq_size,
                           tensor_array_name="output_tensors")

# I cant accumulate objects inside while loop so I cant use the following in graph mode
# cells = []
# cells.append(tx.RNNCell(x0, n_units=H, previous_cell=None))
# use cell[0]
# also the states are wrong so I must use a TensorArray to pass the states

x0 = ta_input.read(0)
x0 = tx.TensorLayer(x0)
cell = tx.RNNCell(x0, M)
ta_output = ta_output.write(0, cell.tensor)

init_vars = (1, ta_output, cell.state)
cond_rnn = lambda i, *_: tf.less(i, seq_size)

print("creating rnn body")


def rnn_unroll(i, y, state):
    xt = ta_input.read(i)
    xt = tx.TensorLayer(xt)
    c = cell.reuse_with(xt, previous_state=state)
    y = y.write(i, c.tensor)
    return i + 1, y, c.state
def rnn_unroll(i, y, state):
    xt = ta_input.read(i)
    xt = tx.TensorLayer(xt)
    c = cell.reuse_with(xt, previous_state=state)
    y = y.write(i, c.tensor)
    return i + 1, y, c.state
Esempio n. 11
0
    unique=True,
    range_max=vocab_size,
    seed=None)

sampled, true_expected_count, sampled_expected_count = (
    tf.stop_gradient(s) for s in sampled_values)
sampled = tf.cast(sampled, tf.int64)

all_ids = tf.concat([labels_flat, sampled], 0)

all_ris = tx.gather_sparse(ri_tensor, all_ids)

# Retrieve the true weights and the logits of the sampled weights.

# weights shape is [num_classes, dim]
ri_layer = tx.TensorLayer(ri_tensor, k)
l = tx.Linear(ri_layer, embed_size, weight_init=tx.random_normal(0, 1), bias=True)
weights = l.weights

sp_values = all_ris
sp_indices = tx.sparse_indices(sp_values)

all_w = tf.nn.embedding_lookup_sparse(
    weights, sp_indices, sp_values, combiner="sum")

tf.global_variables_initializer().run()
print("labels flat: ", labels_flat.eval())
print("all labels: ", all_ids.eval())

print("ri_tensor \n", all_ris.eval())
Esempio n. 12
0
W: width of the image
C: number of channels of the image (ex: 3 for RGB, 1 for grayscale...)

since we're processing vector representations

N == batch_size == 2
H == 1 (we're working with vectors)
W == input_dim == 2
C == channels == 1

NWC, channels are last we only use one
"""

x = tf.reshape(x_concat, [batch_size, seq_size, input_dim])

x_layer = tx.TensorLayer(x, input_dim)

print(x.eval())
print(x_layer.tensor)

filters = tf.get_variable("filters",
                          shape=filter_shape,
                          dtype=tf.float32,
                          initializer=tf.initializers.random_uniform(-1, 1))

filters = tf.ones(filter_shape)

c_layer = tx.Conv1D(x_layer,
                    num_filters,
                    kernel_size,
                    shared_filters=filters,
Esempio n. 13
0
    def __init__(self,
                 ctx_size,
                 vocab_size,
                 k_dim,
                 ri_tensor: RandomIndexTensor,
                 embed_dim,
                 embed_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 x_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 logit_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 embed_share=True,
                 logit_bias=False,
                 use_gate=True,
                 use_hidden=False,
                 h_dim=100,
                 h_activation=tx.elu,
                 h_init=tx.he_normal_init(),
                 h_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01),
                 use_dropout=True,
                 embed_dropout=False,
                 keep_prob=0.95,
                 l2_loss=False,
                 l2_loss_coef=1e-5):

        # GRAPH INPUTS
        run_inputs = tx.Input(ctx_size, dtype=tf.int32, name="input")
        loss_inputs = tx.Input(n_units=1, dtype=tf.int32, name="target")
        eval_inputs = loss_inputs

        # RUN GRAPH =====================================================
        var_reg = []
        with tf.name_scope("run"):
            # RI ENCODING ===============================================
            # convert ids to ris gather a set of random indexes based on the ids in a sequence

            # ri_layer = tx.TensorLayer(ri_tensor, n_units=k_dim)
            # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor)
            with tf.name_scope("ri_encode"):
                # used to compute logits
                if isinstance(ri_tensor, RandomIndexTensor):
                    ri_layer = tx.TensorLayer(ri_tensor.to_sparse_tensor(),
                                              k_dim)

                    ri_inputs = ri_tensor.gather(run_inputs.tensor)
                    ri_inputs = ri_inputs.to_sparse_tensor()
                    ri_inputs = tx.TensorLayer(ri_inputs, k_dim)
                else:
                    ri_layer = tx.TensorLayer(ri_tensor, k_dim)
                    ri_inputs = tx.gather_sparse(ri_layer.tensor,
                                                 run_inputs.tensor)
                    ri_inputs = tx.TensorLayer(ri_inputs, k_dim)

            # use those sparse indexes to lookup a set of features based on the ri values
            feature_lookup = tx.Lookup(ri_inputs,
                                       ctx_size, [k_dim, embed_dim],
                                       embed_init,
                                       name="lookup")
            var_reg.append(feature_lookup.weights)
            feature_lookup = feature_lookup.as_concat()
            # ===========================================================

            if use_gate or use_hidden:
                hl = tx.Linear(feature_lookup,
                               h_dim,
                               h_init,
                               bias=True,
                               name="h_linear")
                ha = tx.Activation(hl, h_activation, name="h_activation")
                h = tx.Compose(hl, ha, name="hidden")
                var_reg.append(hl.weights)

            features = feature_lookup
            if use_gate:
                features = tx.Gate(features, ctx_size, gate_input=h)
                gate = features
                var_reg.append(features.gate_weights)

            x_to_f = tx.Linear(features,
                               embed_dim,
                               x_to_f_init,
                               bias=True,
                               name="x_to_f")
            var_reg.append(x_to_f.weights)
            f_prediction = x_to_f

            if use_hidden:
                h_to_f = tx.Linear(h,
                                   embed_dim,
                                   h_to_f_init,
                                   bias=True,
                                   name="h_to_f")
                var_reg.append(h_to_f.weights)
                f_prediction = tx.Add(x_to_f, h_to_f, name="f_predicted")

            # RI DECODING ===============================================
            shared_weights = feature_lookup.weights if embed_share else None
            logit_init = logit_init if not embed_share else None
            # embedding feature vectors for all words: shape [vocab_size, embed_dim]
            # later, for NCE we don't need to get all the features

            all_embeddings = tx.Linear(ri_layer,
                                       embed_dim,
                                       logit_init,
                                       shared_weights,
                                       name="logits",
                                       bias=False)

            # dot product of f_predicted . all_embeddings with bias for each target word

            run_logits = tx.Linear(f_prediction,
                                   n_units=vocab_size,
                                   shared_weights=all_embeddings.tensor,
                                   transpose_weights=True,
                                   bias=logit_bias)

            if not embed_share:
                var_reg.append(all_embeddings.weights)

            # ===========================================================
            run_embed_prob = tx.Activation(run_logits, tx.softmax)

        # TRAIN GRAPH ===================================================
        with tf.name_scope("train"):
            if use_dropout and embed_dropout:
                feature_lookup = feature_lookup.reuse_with(ri_inputs)
                features = tx.Dropout(feature_lookup, probability=keep_prob)
            else:
                features = feature_lookup

            if use_gate or use_hidden:
                if use_dropout:
                    h = h.reuse_with(features)
                    h = tx.Dropout(h, probability=keep_prob)

                if use_gate:
                    features = gate.reuse_with(features, gate_input=h)

                f_prediction = x_to_f.reuse_with(features)

                if use_hidden:
                    h_to_f = h_to_f.reuse_with(h)
                    if use_dropout:
                        h_to_f = tx.Dropout(h_to_f, probability=keep_prob)
                    f_prediction = tx.Add(f_prediction, h_to_f)
            else:
                f_prediction = f_prediction.reuse_with(features)

            # we already define all_embeddings from which these logits are computed before so this should be ok
            train_logits = run_logits.reuse_with(f_prediction)

            train_embed_prob = tx.Activation(train_logits,
                                             tx.softmax,
                                             name="train_output")

            one_hot = tx.dense_one_hot(column_indices=loss_inputs.tensor,
                                       num_cols=vocab_size)
            train_loss = tx.categorical_cross_entropy(one_hot,
                                                      train_logits.tensor)

            train_loss = tf.reduce_mean(train_loss)

            if l2_loss:
                losses = [tf.nn.l2_loss(var) for var in var_reg]
                train_loss = train_loss + l2_loss_coef * tf.add_n(losses)

        # EVAL GRAPH ===============================================
        with tf.name_scope("eval"):
            one_hot = tx.dense_one_hot(column_indices=eval_inputs.tensor,
                                       num_cols=vocab_size)
            eval_loss = tx.categorical_cross_entropy(one_hot,
                                                     run_logits.tensor)
            eval_loss = tf.reduce_mean(eval_loss)

        # SETUP MODEL CONTAINER ====================================
        super().__init__(run_inputs=run_inputs,
                         run_outputs=run_embed_prob,
                         train_inputs=run_inputs,
                         train_outputs=train_embed_prob,
                         eval_inputs=run_inputs,
                         eval_outputs=run_embed_prob,
                         train_out_loss=train_loss,
                         train_in_loss=loss_inputs,
                         eval_out_score=eval_loss,
                         eval_in_score=eval_inputs)
Esempio n. 14
0
embed_size = 4

generator = Generator(k, s)
ris = [generator.generate() for _ in range(vocab_size)]
ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s)

sp_values = ri_tensor.gather(flat_labels).to_sparse_tensor()
sp_indices = tx.sparse_indices(sp_values)

print(sp_values.get_shape())
print(tensor_util.constant_value_as_shape(sp_values.dense_shape))
print(tensor_util.constant_value(sp_values.dense_shape))
print(sp_values.dense_shape[-1].eval())
print(tf.shape(sp_values).eval())

lookup = tx.Lookup(tx.TensorLayer(sp_values),
                   seq_size=1,
                   lookup_shape=[k, embed_size])

linear = tx.Linear(tx.TensorLayer(sp_values),
                   n_units=k,
                   shared_weights=lookup.weights)

w = embedding_lookup_sparse(params=lookup.weights,
                            sp_ids=sp_indices,
                            sp_weights=sp_values,
                            combiner="sum",
                            partition_strategy="mod")

tf.global_variables_initializer().run()
Esempio n. 15
0
def _sampled_logits_from_parametric_noise(ri_tensors,
                                          k_dim,
                                          weights,
                                          labels,
                                          inputs,
                                          input_dim,
                                          num_true=1,
                                          partition_strategy="mod",
                                          name=None):
    if isinstance(weights, variables.PartitionedVariable):
        weights = list(weights)
    if not isinstance(weights, list):
        weights = [weights]

    with ops.name_scope(name, "compute_sampled_logits",
                        weights + [inputs, labels]):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        # true_ris
        true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat)

        true_w = embedding_lookup_sparse(params=weights,
                                         sp_ids=tx.sparse_indices(true_ris),
                                         sp_weights=true_ris,
                                         combiner="sum",
                                         partition_strategy=partition_strategy)

        label_layer = tx.TensorLayer(true_w, input_dim)
        noise_fn = tx.FC(label_layer, 512, activation=tx.relu)
        noise_fn_sp = tx.ToSparse(noise_fn)
        noise_ris = tx.Linear(noise_fn_sp, k_dim, weight_init=tx.glorot_uniform(), bias=True)
        noise_ris_sp = tx.ToSparse(noise_ris)

        noise_w = embedding_lookup_sparse(params=weights,
                                          sp_ids=tx.sparse_indices(noise_ris_sp.tensor),
                                          sp_weights=noise_ris_sp.tensor,
                                          combiner="sum",
                                          partition_strategy=partition_strategy)

        noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True)

        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0)
        true_w_e = array_ops.reshape(true_w, new_true_w_shape)

        row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1),
                                          true_w_e)
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat([[-1], dim], 0))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat([true_logits, noise_logits], 1)

        # true_logits is a float tensor, ones_like(true_logits) is a float
        # tensor of ones. We then divide by num_true to ensure the per-example
        # labels sum to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat([
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(noise_logits)
        ], 1)

        # out_logits = out_logits * math_ops.exp(partition_const)
        # out_logits = out_logits / (partition_const + 1)
        return out_logits, out_labels
Esempio n. 16
0
import tensorflow as tf
import tensorx as tx
import numpy as np
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

n_features = 3
embed_size = 4
cell_units = 2
seq_size = 3
batch_size = 2

inputs = tx.TensorLayer(np.random.random([batch_size, seq_size]),
                        n_units=seq_size,
                        dtype=tf.int32)
lookup = tx.Lookup(inputs,
                   seq_size=seq_size,
                   lookup_shape=[n_features, embed_size])
seq = lookup.permute_batch_time()

# first step of a sequence
t1 = seq[0]

ks_cell = tf.keras.layers.LSTMCell(units=cell_units)
tf_cell = tf.nn.rnn_cell.LSTMCell(num_units=cell_units, state_is_tuple=True)
tx_cell = tx.LSTMCell(t1, n_units=cell_units)

kernel_w = [
    tx_cell.w_i.weights, tx_cell.w_c.weights, tx_cell.w_f.weights,
    tx_cell.w_o.weights
Esempio n. 17
0
gate = tx.Linear(h, 2, bias=True)
gate = tx.Activation(gate, tx.sigmoid)

# lookup might output a sequence format with [batch,seq_size,m_dim]
# lookup_out = lookup.tensor
lookup_out = tf.reshape(lookup.tensor, [-1, seq_size, m_dim])

# reshape works anyway
gated_out = tf.reshape(lookup_out, [-1, seq_size, m_dim]) * tf.expand_dims(
    gate.tensor, -1)

# gated_out = tf.reshape(gated_out, [-1, seq_size * m_dim])
# gated_out = tf.reshape(gated_out, [-1, lookup.n_units])
gated_out = tf.reshape(gated_out, tf.shape(lookup.tensor))
gated_out = tx.TensorLayer(gated_out, lookup.n_units)
# END GATING MECHANISM

y = tx.Linear(gated_out, m_dim, bias=True)

ss.run(tf.global_variables_initializer())

lookup_out = lookup.tensor.eval({inputs.placeholder: w})

assert (np.shape(lookup_out) == (3, 2 * m_dim))
print(np.shape(lookup_out))

gated_out = gated_out.tensor.eval({inputs.placeholder: w})
print(np.shape(gated_out))

gate_values = gate.tensor.eval({inputs.placeholder: w})