def __load_model(self, num_layers):
        # Initial memory value for recurrence.
        self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim))

        # choose RNN/GRU/LSTM cell
        with tf.variable_scope("forward"):
            fw_single_cell = rnn_cell.GRUCell(self.memory_dim)
            # Stacks layers of RNN's to form a stacked decoder
            self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] *
                                                      num_layers)

        with tf.variable_scope("backward"):
            bw_single_cell = rnn_cell.GRUCell(self.memory_dim)
            # Stacks layers of RNN's to form a stacked decoder
            self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] *
                                                       num_layers)

        # embedding model
        if not self.attention:
            with tf.variable_scope("forward"):
                self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("forward", reuse=True):
                self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

            with tf.variable_scope("backward"):
                self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)

            with tf.variable_scope("backward", reuse=True):
                self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

        else:
            with tf.variable_scope("forward"):
                self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("forward", reuse=True):
                self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

            with tf.variable_scope("backward"):
                self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)

            with tf.variable_scope("backward", reuse=True):
                self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
Esempio n. 2
0
def char_rnn_model(X, y):
    byte_list = skflow.ops.one_hot_matrix(X, 256)
    byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
    cell = rnn_cell.GRUCell(HIDDEN_SIZE)
    #cell = rnn_cell.BasicLSTMCell(HIDDEN_SIZE)
    _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding, y)
def RNN(X, num_words_in_X, hidden_size, input_vector_size, max_input_size):
    """
  Passes the input data through an RNN and outputs the final states.

  X: Input is a MAX_INPUT_LENGTH X BATCH_SIZE X WORD_VECTOR_LENGTH matrix
  num_words_in_X: Number of words in X, which is needed because X is zero padded
  hidden_size: The dimensionality of the hidden layer of the RNN
  input_vector_size: This is the dimensionality of each input vector, in this case it is WORD_VECTOR_LENGTH
  max_input_size: This is the max number of input vectors that can be passed in to the RNN.

  """

    # Split X into a list of tensors of length max_input_size where each tensor is a BATCH_SIZE x input_vector_size vector
    X = tf.split(0, max_input_size, X)

    squeezed = []
    for i in range(len(X)):
        squeezed.append(tf.squeeze(X[i]))

    gru_cell = rnn_cell.GRUCell(num_units=hidden_size,
                                input_size=input_vector_size)
    output, state = rnn.rnn(gru_cell,
                            squeezed,
                            sequence_length=num_words_in_X,
                            dtype=tf.float32)
    return output, state
Esempio n. 4
0
    def testEmbeddingAttentionDecoder(self):
        with self.test_session() as sess:
            with tf.variable_scope("root",
                                   initializer=tf.constant_initializer(0.5)):
                inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)]
                cell = rnn_cell.GRUCell(2)
                enc_outputs, enc_states = rnn.rnn(cell, inp, dtype=tf.float32)
                attn_states = tf.concat(1, [
                    tf.reshape(e, [-1, 1, cell.output_size])
                    for e in enc_outputs
                ])
                dec_inp = [
                    tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)
                ]
                dec, mem = seq2seq.embedding_attention_decoder(dec_inp,
                                                               enc_states[-1],
                                                               attn_states,
                                                               cell,
                                                               4,
                                                               output_size=3)
                sess.run([tf.initialize_all_variables()])
                res = sess.run(dec)
                self.assertEqual(len(res), 3)
                self.assertEqual(res[0].shape, (2, 3))

                res = sess.run(mem)
                self.assertEqual(len(res), 4)
                self.assertEqual(res[0].shape, (2, 2))
Esempio n. 5
0
  def testRNNDecoder(self):
    with self.test_session() as sess:
      with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
        inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)]
        _, enc_states = rnn.rnn(rnn_cell.GRUCell(2), inp, dtype=tf.float32)
        dec_inp = [tf.constant(0.4, shape=[2, 2]) for _ in xrange(3)]
        cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4)
        dec, mem = seq2seq.rnn_decoder(dec_inp, enc_states[-1], cell)
        sess.run([tf.initialize_all_variables()])
        res = sess.run(dec)
        self.assertEqual(len(res), 3)
        self.assertEqual(res[0].shape, (2, 4))

        res = sess.run(mem)
        self.assertEqual(len(res), 4)
        self.assertEqual(res[0].shape, (2, 2))
Esempio n. 6
0
def rnn_model(X, y):
    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)]
    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding[-1], y)
def RNN(X, num_words_in_X, hidden_size, max_input_size):
    # Reshape `X` as a vector. -1 means "set this dimension automatically".
    X_as_vector = tf.reshape(X, [-1])

    # Create another vector containing zeroes to pad `X` to (MAX_INPUT_LENGTH * WORD_VECTOR_LENGTH) elements.
    zero_padding = tf.zeros([max_input_size * WORD_VECTOR_LENGTH] -
                            tf.shape(X_as_vector),
                            dtype=X.dtype)

    # Concatenate `X_as_vector` with the padding.
    X_padded_as_vector = tf.concat(0, [X_as_vector, zero_padding])

    # Reshape the padded vector to the desired shape.
    X_padded = tf.reshape(X_padded_as_vector,
                          [max_input_size, WORD_VECTOR_LENGTH])

    # Split X into a list of tensors of length MAX_INPUT_LENGTH where each tensor is a 1xWORD_VECTOR_LENGTH vector
    # of the word vectors
    # TODO change input to be a list of tensors of length MAX_INPUT_LENGTH where each tensor is a BATCH_SIZExWORD_VECTOR_LENGTH vector
    X = tf.split(0, max_input_size, X_padded)

    print "Length X: {}".format(len(X))

    gru_cell = rnn_cell.GRUCell(num_units=hidden_size,
                                input_size=WORD_VECTOR_LENGTH)

    output, state = rnn.rnn(gru_cell,
                            X,
                            sequence_length=(num_words_in_X),
                            dtype=tf.float32)

    print "State: {}".format(state)

    return output, state, X_padded
Esempio n. 8
0
 def final_state_of_rnn_over_embedded_sequence(idx, embedded_seq):
     with tf.variable_scope("rnn_%s" % idx):
         gru = rnn_cell.GRUCell(opts.hidden_dim)
         initial_state = gru.zero_state(opts.batch_size, tf.float32)
         outputs, _states = rnn.rnn(gru,
                                    embedded_seq,
                                    initial_state=initial_state)
         return outputs[-1]
Esempio n. 9
0
 def GRUSeq2Seq(enc_inp, dec_inp):
     cell = rnn_cell.MultiRNNCell([rnn_cell.GRUCell(24)] * 2)
     return seq2seq.embedding_attention_seq2seq(
         enc_inp,
         dec_inp,
         cell,
         classes,
         classes,
         output_projection=(w, b))
Esempio n. 10
0
    def __init__(self, params, emb_mat):
        self.params = params
        V, d, L, e = params.vocab_size, params.hidden_size, params.rnn_num_layers, params.word_size
        prev_size = e
        hidden_sizes = [d for _ in range(params.emb_num_layers)]
        for layer_idx in range(params.emb_num_layers):
            with tf.variable_scope("emb_%d" % layer_idx):
                cur_hidden_size = hidden_sizes[layer_idx]
                emb_mat = tf.tanh(
                    my.nn.linear([V, prev_size], cur_hidden_size, emb_mat))
                prev_size = cur_hidden_size
        self.emb_mat = emb_mat

        self.emb_hidden_sizes = [d for _ in range(params.emb_num_layers)]
        self.input_size = self.emb_hidden_sizes[
            -1] if self.emb_hidden_sizes else e

        if params.lstm == 'basic':
            self.first_cell = my.rnn_cell.BasicLSTMCell(
                d, input_size=self.input_size, forget_bias=params.forget_bias)
            self.second_cell = my.rnn_cell.BasicLSTMCell(
                d, forget_bias=params.forget_bias)
        elif params.lstm == 'regular':
            self.first_cell = rnn_cell.LSTMCell(d,
                                                self.input_size,
                                                cell_clip=params.cell_clip)
            self.second_cell = rnn_cell.LSTMCell(d,
                                                 d,
                                                 cell_clip=params.cell_clip)
        elif params.lstm == 'gru':
            self.first_cell = rnn_cell.GRUCell(d, input_size=self.input_size)
            self.second_cell = rnn_cell.GRUCell(d)
        else:
            raise Exception()

        if params.train and params.keep_prob < 1.0:
            self.first_cell = tf.nn.rnn_cell.DropoutWrapper(
                self.first_cell,
                input_keep_prob=params.keep_prob,
                output_keep_prob=params.keep_prob)
        self.cell = rnn_cell.MultiRNNCell([self.first_cell] +
                                          [self.second_cell] * (L - 1))
        self.scope = tf.get_variable_scope()
        self.used = False
 def prediction(self):
     # Recurrent network.
     output, _ = rnn.dynamic_rnn(
         rnn_cell.GRUCell(self._num_hidden),
         data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     last = self._last_relevant(output, self.length)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Esempio n. 12
0
 def testGRUCell(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 2])
             m = tf.zeros([1, 2])
             g, _ = rnn_cell.GRUCell(2)(x, m)
             sess.run([tf.variables.initialize_all_variables()])
             res = sess.run([g], {
                 x.name: np.array([[1., 1.]]),
                 m.name: np.array([[0.1, 0.1]])
             })
             # Smoke test
             self.assertAllClose(res[0], [[0.175991, 0.175991]])
Esempio n. 13
0
  def testTiedRNNSeq2Seq(self):
    with self.test_session() as sess:
      with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
        inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)]
        dec_inp = [tf.constant(0.4, shape=[2, 2]) for _ in xrange(3)]
        cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4)
        dec, mem = seq2seq.tied_rnn_seq2seq(inp, dec_inp, cell)
        sess.run([tf.initialize_all_variables()])
        res = sess.run(dec)
        self.assertEqual(len(res), 3)
        self.assertEqual(res[0].shape, (2, 4))

        res = sess.run(mem)
        self.assertEqual(len(res), 4)
        self.assertEqual(res[0].shape, (2, 2))
Esempio n. 14
0
 def testEmbeddingWrapper(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 1], dtype=tf.int32)
             m = tf.zeros([1, 2])
             g, new_m = rnn_cell.EmbeddingWrapper(rnn_cell.GRUCell(2), 3)(x,
                                                                          m)
             sess.run([tf.variables.initialize_all_variables()])
             res = sess.run([g, new_m], {
                 x.name: np.array([[1]]),
                 m.name: np.array([[0.1, 0.1]])
             })
             self.assertEqual(res[1].shape, (1, 2))
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res[0], [[0.17139, 0.17139]])
 def prediction(self):
     #运行结果给cost计算交叉熵或者计算error等损失函数
     # Recurrent network.
     output, _ = rnn.dynamic_rnn(
         rnn_cell.GRUCell(self._num_hidden),
         data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     #训练结束后,传进来一个序列进行预测时,dynamic_rnn的output要进行last_relevant
     last = self._last_relevant(output, self.length)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Esempio n. 16
0
 def testMultiRNNCell(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 2])
             m = tf.zeros([1, 4])
             _, ml = rnn_cell.MultiRNNCell([rnn_cell.GRUCell(2)] * 2)(x, m)
             sess.run([tf.variables.initialize_all_variables()])
             res = sess.run(
                 ml, {
                     x.name: np.array([[1., 1.]]),
                     m.name: np.array([[0.1, 0.1, 0.1, 0.1]])
                 })
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res,
                                 [[0.175991, 0.175991, 0.13248, 0.13248]])
Esempio n. 17
0
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.GRUCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(
         network, output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32)
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
Esempio n. 18
0
 def prediction(self):
     # Recurrent network.
     output, _ = rnn.dynamic_rnn(
         rnn_cell.GRUCell(self._num_hidden),
         self.data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
Esempio n. 19
0
 def testInputProjectionWrapper(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 2])
             m = tf.zeros([1, 3])
             cell = rnn_cell.InputProjectionWrapper(rnn_cell.GRUCell(3), 2)
             g, new_m = cell(x, m)
             sess.run([tf.variables.initialize_all_variables()])
             res = sess.run(
                 [g, new_m], {
                     x.name: np.array([[1., 1.]]),
                     m.name: np.array([[0.1, 0.1, 0.1]])
                 })
             self.assertEqual(res[1].shape, (1, 3))
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res[0], [[0.154605, 0.154605, 0.154605]])
Esempio n. 20
0
    def RNN(self, scope):
        # input shape: (batch_size, step_size, input_dim)
        # we need to permute step_size and batch_size(change the position of step and batch size)
        data = tf.transpose(self.input_data, [1, 0, 2])
        # Reshape to prepare input to hidden activation
        # (step_size*batch_size, n_input), flattens the batch and step
        #after the above transformation, data is now (step_size*batch_size, input_dim)
        data = tf.reshape(data, [-1, self.config.input_dim + 1])

        with tf.variable_scope(str(scope)):
            data = tf.nn.dropout(
                tf.matmul(data, self.weights['hidden']) +
                self.biases['hidden'], self.config.dropout)

            # Define a lstm cell with tensorflow
            if self.config.cell_type == 'GRU':
                lstm_cell = rnn_cell.GRUCell(self.config.hidden_dim)
            else:
                lstm_cell = rnn_cell.LSTMCell(
                    self.config.hidden_dim,
                    forget_bias=self.config.forget_bias)

            # Split data because rnn cell needs a list of inputs for the RNN inner loop
            data = tf.split(0, self.config.step_size,
                            data)  # step_size * (batch_size, hidden_dim)

            # Get lstm cell output
            outputs, states = rnn.rnn(lstm_cell,
                                      data,
                                      initial_state=self.init_state)

            # we really just interested in the last state's output
            return [
                tf.matmul(outputs[-1], self.weights['out1']) +
                self.biases['out1'],
                tf.matmul(outputs[-1], self.weights['out2']) +
                self.biases['out2'],
                tf.matmul(outputs[-1], self.weights['out3']) +
                self.biases['out3'],
                tf.matmul(outputs[-1], self.weights['out4']) +
                self.biases['out4'],
                tf.matmul(outputs[-1], self.weights['out5']) +
                self.biases['out5']
            ]
Esempio n. 21
0
    def __init__(self, config):

        self.config = config

        self.vocab_size = vocab_size = config.vocab_size
        self.y_size = y_size = config.y_size

        self.batch_size = batch_size = config.batch_size
        self.steps = config.steps

        self.layers = layers = config.layers

        self.dim_ictx = dim_ictx = config.dim_ictx
        self.dim_iemb = dim_iemb = config.dim_iemb
        self.dim_wemb = dim_wemb = config.dim_wemb
        self.dim_hidden = dim_hidden = config.dim_hidden

        self.lr = tf.Variable(config.lr, trainable=False)

        rnn_type = config.rnn_type
        if rnn_type == 'gru':
            rnn_ = rnn_cell.GRUCell(dim_hidden)
        elif rnn_type == 'lstm':
            rnn_ = rnn_cell.BasicLSTMCell(dim_hidden)

        if layers is not None:
            self.my_rnn = my_rnn = rnn_cell.MultiRNNCell([rnn_] * layers)
            self.init_state = my_rnn.zero_state(batch_size, tf.float32)
        else:
            self.my_rnn = my_rnn = rnn_
            self.init_state = tf.zeros([batch_size, my_rnn.state_size])

        self.W_iemb = tf.get_variable("W_iemb", [dim_ictx, dim_iemb])
        self.b_iemb = tf.get_variable("b_iemb", [dim_iemb])
        with tf.device("/cpu:0"):
            self.W_wemb = tf.get_variable("W_wemb", [vocab_size, dim_wemb])

        if config.is_birnn:  # add 보다 concat이 더 잘나오는듯..
            self.W_pred = tf.get_variable("W_pred", [dim_hidden * 2, y_size])
        else:
            self.W_pred = tf.get_variable("W_pred", [dim_hidden, y_size])

        self.b_pred = tf.get_variable("b_pred", [y_size])
Esempio n. 22
0
def rnn_model(X, y):
    """Recurrent neural network model to predict from sequence of words
    to a class."""
    # Convert indexes of words into embeddings.
    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
    # maps word indexes of the sequence into [batch_size, sequence_length,
    # EMBEDDING_SIZE].
    word_vectors = skflow.ops.categorical_variable(
        X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
    # Split into list of embedding per word, while removing doc length dim.
    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
    # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
    # Create an unrolled Recurrent Neural Networks to length of
    # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
    # Given encoding of RNN, take encoding of last step (e.g hidden size of the
    # neural network of last step) and pass it as features for logistic
    # regression over output classes.
    return skflow.models.logistic_regression(encoding[-1], y)
Esempio n. 23
0
    def build(self):

        self.input_0 = tf.placeholder(
            tf.float32,
            [self.config.max_length_0_input, 1, self.config.embedding_size])
        self.input_0_length = tf.placeholder(tf.int32)

        self.input_1 = tf.placeholder(
            tf.float32,
            [self.config.max_length_0_input, 1, self.config.embedding_size])
        self.input_1_length = tf.placeholder(tf.int32)

        input_0 = array_ops.unpack(self.input_0)
        input_1 = array_ops.unpack(self.input_1)

        # bidirectional rnn
        cell = rnn_cell.GRUCell(self.config.embedding_size)

        initial_state_fw = array_ops.zeros(array_ops.pack([1,
                                                           cell.state_size]),
                                           dtype=tf.float32)
        initial_state_fw.set_shape([1, cell.state_size])
        initial_state_bw = array_ops.zeros(array_ops.pack([1,
                                                           cell.state_size]),
                                           dtype=tf.float32)
        initial_state_bw.set_shape([1, cell.state_size])

        states = bidirectional_rnn(
            cell,
            cell,
            input_0,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            dtype=tf.float32,
            # sequence_length=3
        )

        self.test = array_ops.pack(states)
Esempio n. 24
0
    def _init_neural_network(self):
        """Initializing the NN (building a TensorFlow graph and initializing session)."""

        # set TensorFlow random seed
        tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint))

        # create placeholders for input & output (always batch-size * 1, list of up to num. steps)
        self.enc_inputs = []
        self.enc_inputs_drop = []
        for i in xrange(self.max_da_len):
            enc_input = tf.placeholder(tf.int32, [None],
                                       name=('enc_inp-%d' % i))
            self.enc_inputs.append(enc_input)
            if self.dropout_keep_prob < 1:
                enc_input_drop = tf.nn.dropout(enc_input,
                                               self.dropout_keep_prob,
                                               name=('enc_inp-drop-%d' % i))
                self.enc_inputs_drop.append(enc_input_drop)

        self.dec_inputs = []
        for i in xrange(self.max_tree_len):
            self.dec_inputs.append(
                tf.placeholder(tf.int32, [None], name=('dec_inp-%d' % i)))

        # targets are just decoder inputs shifted by one (+pad with one empty spot)
        self.targets = [
            self.dec_inputs[i + 1] for i in xrange(len(self.dec_inputs) - 1)
        ]
        self.targets.append(
            tf.placeholder(tf.int32, [None], name=('target-pad')))

        # prepare cells
        self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size])
        if self.cell_type.startswith('gru'):
            self.cell = rnn_cell.GRUCell(self.emb_size)
        else:
            self.cell = rnn_cell.BasicLSTMCell(self.emb_size)

        if self.cell_type.endswith('/2'):
            self.cell = rnn_cell.MultiRNNCell([self.cell] * 2)

        # build the actual LSTM Seq2Seq network (for training and decoding)
        with tf.variable_scope(self.scope_name) as scope:

            rnn_func = embedding_rnn_seq2seq
            if self.nn_type == 'emb_attention_seq2seq':
                rnn_func = embedding_attention_seq2seq
            elif self.nn_type == 'emb_attention2_seq2seq':
                rnn_func = partial(embedding_attention_seq2seq, num_heads=2)
            elif self.nn_type == 'emb_attention_seq2seq_context':
                rnn_func = embedding_attention_seq2seq_context
            elif self.nn_type == 'emb_attention2_seq2seq_context':
                rnn_func = partial(embedding_attention_seq2seq_context,
                                   num_heads=2)

            # for training: feed_previous == False, using dropout if available
            # outputs = batch_size * num_decoder_symbols ~ i.e. output logits at each steps
            # states = cell states at each steps
            self.outputs, self.states = rnn_func(
                self.enc_inputs_drop
                if self.enc_inputs_drop else self.enc_inputs,
                self.dec_inputs,
                self.cell,
                self.da_dict_size,
                self.tree_dict_size,
                scope=scope)

            scope.reuse_variables()

            # for decoding: feed_previous == True
            self.dec_outputs, self.dec_states = rnn_func(self.enc_inputs,
                                                         self.dec_inputs,
                                                         self.cell,
                                                         self.da_dict_size,
                                                         self.tree_dict_size,
                                                         feed_previous=True,
                                                         scope=scope)

        # TODO use output projection ???

        # target weights
        # TODO change to actual weights, zero after the end of tree ???
        self.cost_weights = [
            tf.ones_like(trg, tf.float32, name='cost_weights')
            for trg in self.targets
        ]

        # cost
        self.tf_cost = sequence_loss(self.outputs, self.targets,
                                     self.cost_weights, self.tree_dict_size)
        self.dec_cost = sequence_loss(self.dec_outputs, self.targets,
                                      self.cost_weights, self.tree_dict_size)
        if self.use_dec_cost:
            self.cost = 0.5 * (self.tf_cost + self.dec_cost)
        else:
            self.cost = self.tf_cost

        self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")

        # optimizer (default to Adam)
        if self.optimizer_type == 'sgd':
            self.optimizer = tf.train.GradientDescentOptimizer(
                self.learning_rate)
        if self.optimizer_type == 'adagrad':
            self.optimizer = tf.train.AdagradOptimizer(self.learning_rate)
        else:
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_func = self.optimizer.minimize(self.cost)

        # initialize session
        session_config = None
        if self.max_cores:
            session_config = tf.ConfigProto(
                inter_op_parallelism_threads=self.max_cores,
                intra_op_parallelism_threads=self.max_cores)
        self.session = tf.Session(config=session_config)

        # this helps us load/save the model
        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 25
0
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 use_lstm=False,
                 num_samples=512,
                 forward_only=False):
        """Create the model.

    Args:
      source_vocab_size: size of the source vocabulary.
      target_vocab_size: size of the target vocabulary.
      buckets: a list of pairs (I, O), where I specifies maximum input length
        that will be processed in that bucket, and O specifies maximum output
        length. Training instances that have inputs longer than I or outputs
        longer than O will be pushed to the next bucket and padded accordingly.
        We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
      size: number of units in each layer of the model.
      num_layers: number of layers in the model.
      max_gradient_norm: gradients will be clipped to maximally this norm.
      batch_size: the size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: learning rate to start with.
      learning_rate_decay_factor: decay learning rate by this much when needed.
      use_lstm: if true, we use LSTM cells instead of GRU cells.
      num_samples: number of samples for sampled softmax.
      forward_only: if set, we do not construct the backward pass in the model.
    """
        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [size, self.target_vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device("/cpu:0"):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                      num_samples,
                                                      self.target_vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = rnn_cell.GRUCell(size)
        if use_lstm:
            single_cell = rnn_cell.BasicLSTMCell(size)
        cell = single_cell
        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                source_vocab_size,
                target_vocab_size,
                output_projection=output_projection,
                feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses = seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)
            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 26
0
    def __init__(self, max_len, input_size, size, num_layers,
                 max_gradient_norm, batch_size, learning_rate,
                 learning_rate_decay_factor):
        """Create the network. A simplified network that handles only sorting.

        Args:
            max_len: maximum length of the model.
            input_size: size of the inputs data.
            size: number of units in each layer of the model.
            num_layers: number of layers in the model.
            max_gradient_norm: gradients will be clipped to maximally this norm.
            batch_size: the size of the batches used during training;
                the model construction is independent of batch_size, so it can be
                changed after initialization if this is convenient, e.g., for decoding.
            learning_rate: learning rate to start with.
            learning_rate_decay_factor: decay learning rate by this much when needed.
        """
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        cell = rnn_cell.GRUCell(size)
        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.decoder_targets = []
        self.target_weights = []
        for i in range(max_len):
            self.encoder_inputs.append(
                tf.placeholder(tf.float32, [batch_size, input_size],
                               name="EncoderInput%d" % i))

        for i in range(max_len + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.float32, [batch_size, input_size],
                               name="DecoderInput%d" % i))
            self.decoder_targets.append(
                tf.placeholder(tf.float32, [batch_size, max_len + 1],
                               name="DecoderTarget%d" % i))  # one hot
            self.target_weights.append(
                tf.placeholder(tf.float32, [batch_size, 1],
                               name="TargetWeight%d" % i))

        # Encoder

        # Need for attention
        encoder_outputs, final_state = rnn.rnn(cell,
                                               self.encoder_inputs,
                                               dtype=tf.float32)

        # Need a dummy output to point on it. End of decoding.
        encoder_outputs = [tf.zeros([FLAGS.batch_size, FLAGS.rnn_size])
                           ] + encoder_outputs

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            tf.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs
        ]
        attention_states = tf.concat(1, top_states)

        with tf.variable_scope("decoder"):
            outputs, states, _ = pointer_decoder(self.decoder_inputs,
                                                 final_state, attention_states,
                                                 cell)

        with tf.variable_scope("decoder", reuse=True):
            predictions, _, inps = pointer_decoder(self.decoder_inputs,
                                                   final_state,
                                                   attention_states,
                                                   cell,
                                                   feed_prev=True)

        self.predictions = predictions

        self.outputs = outputs
        self.inps = inps
Esempio n. 27
0
    def __init__(self,
                 vocab_size,
                 buckets_or_sentence_length,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 model_type,
                 use_lstm=True,
                 num_samples=512,
                 forward_only=False):
        """Create the model.  This constructor can be used to created an embedded or embedded-attention, bucketed or non-bucketed model made of single or multi-layer RNN cells. 
    Args:
      vocab_size: Size of the vocabulary.
      target_vocab_size: Size of the target vocabulary.
      buckets_or_sentence_length: 
        If using buckets:
          A list of pairs (I, O), where I specifies maximum input length
          that will be processed in that bucket, and O specifies maximum output
          length. Training instances that have inputs longer than I or outputs
          longer than O will be pushed to the next bucket and padded accordingly.
          We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
        Else:
          Number of the maximum number of words per sentence.
      size: Number of units in each layer of the model.
      num_layers: Number of layers in the model.
      max_gradient_norm: Gradients will be clipped to maximally this norm.
      batch_size: The size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: Learning rate to start with.
      learning_rate_decay_factor: Decay learning rate by this much when needed.
      num_samples: Number of samples for sampled softmax.
      forward_only: If set, we do not construct the backward pass in the model.
    """
        # Need to determine if we're using buckets or not:
        if type(buckets_or_sentence_length) == list:
            self.buckets = buckets_or_sentence_length
        else:
            self.max_sentence_length = buckets_or_sentence_length

        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.vocab_size:
            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [size, self.vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [self.vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device("/cpu:0"):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                      num_samples,
                                                      self.vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = rnn_cell.GRUCell(size)
        if use_lstm:
            single_cell = rnn_cell.BasicLSTMCell(size)
        cell = single_cell  #i, j, f, o = array_ops.split(1, 4, concat)
        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell(
                [single_cell] *
                num_layers)  #cur_inp, array_ops.concat(1, new_states)

        # The seq2seq function: we use embedding for the input and attention (if applicable).
        if model_type is 'embedding_attention':

            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    vocab_size,
                    vocab_size,
                    output_projection=output_projection,
                    feed_previous=do_decode)
        else:  # just build embedding model, I should probably change this to throw an error

            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return seq2seq.embedding_rnn_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    vocab_size,
                    vocab_size,
                    output_projection=output_projection,
                    feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []

        # NOTE: If the model is not bucketed, these try blocks will throw an AttributeError and execute code to build a non-bucketed model.
        try:
            encoder_range = self.buckets[-1][0]
            decoder_range = self.buckets[-1][1]
        except AttributeError:
            encoder_range, decoder_range = self.max_sentence_length, self.max_sentence_length

        for i in xrange(encoder_range):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(decoder_range + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        try:
            if forward_only:
                self.outputs, self.losses = seq2seq.model_with_buckets(
                    self.encoder_inputs,
                    self.decoder_inputs,
                    targets,
                    self.target_weights,
                    self.buckets,
                    self.vocab_size,
                    lambda x, y: seq2seq_f(x, y, True),
                    softmax_loss_function=softmax_loss_function)
                # If we use output projection, we need to project outputs for decoding.
                if output_projection is not None:
                    for b in xrange(len(self.buckets)):
                        self.outputs[b] = [
                            tf.nn.xw_plus_b(output, output_projection[0],
                                            output_projection[1])
                            for output in self.outputs[b]
                        ]
            else:
                self.outputs, self.losses = seq2seq.model_with_buckets(
                    self.encoder_inputs,
                    self.decoder_inputs,
                    targets,
                    self.target_weights,
                    self.buckets,
                    self.vocab_size,
                    lambda x, y: seq2seq_f(x, y, False),
                    softmax_loss_function=softmax_loss_function)

        except AttributeError:
            if forward_only:
                self.outputs, self.states = seq2seq_f(self.encoder_inputs,
                                                      self.decoder_inputs[:-1],
                                                      True)
                self.losses = seq2seq.sequence_loss(
                    self.outputs,
                    targets,
                    self.target_weights[:-1],
                    self.vocab_size,
                    softmax_loss_function=softmax_loss_function)
                # Project outputs for decoding
                if output_projection is not None:
                    self.outputs = [
                        tf.nn.xw_plus_b(output, output_projection[0],
                                        output_projection[1])
                        for output in self.outputs
                    ]
            else:
                self.outputs, self.states = seq2seq_f(self.encoder_inputs,
                                                      self.decoder_inputs[:-1],
                                                      False)
                self.losses = (seq2seq.sequence_loss(
                    self.outputs,
                    targets,
                    self.target_weights[:-1],
                    self.vocab_size,
                    softmax_loss_function=softmax_loss_function))

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        self.params = params  # Hold onto this for Woz
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)

            try:
                for b in xrange(len(self.buckets)):
                    gradients = tf.gradients(self.losses[b], params)
                    clipped_gradients, norm = tf.clip_by_global_norm(
                        gradients, max_gradient_norm)
                    self.gradient_norms.append(norm)
                    self.updates.append(
                        opt.apply_gradients(zip(clipped_gradients, params),
                                            global_step=self.global_step))
            except AttributeError:
                gradients = tf.gradients(self.losses, params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms = norm
                self.updates = opt.apply_gradients(
                    zip(clipped_gradients, params),
                    global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 28
0
    def __init__(self,
                 embedding_mat,
                 non_static,
                 lstm_type,
                 hidden_unit,
                 sequence_length,
                 max_pool_size,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.batch_size = tf.placeholder(tf.int32)
        self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1],
                                  name="pad")
        self.real_len = tf.placeholder(tf.int32, [None], name="real_len")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Extend input to a 4D Tensor, because tf.nn.conv2d requires so.
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            if not non_static:
                W = tf.constant(embedding_mat, name="W")
            else:
                W = tf.Variable(embedding_mat, name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            emb = tf.expand_dims(self.embedded_chars, -1)

        # CNN
        pooled_concat = []
        reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size))
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):

                # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel
                num_prio = (filter_size - 1) // 2
                num_post = (filter_size - 1) - num_prio
                pad_prio = tf.concat(1, [self.pad] * num_prio)
                pad_post = tf.concat(1, [self.pad] * num_post)
                emb_pad = tf.concat(1, [pad_prio, emb, pad_post])

                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")
                conv = tf.nn.conv2d(emb_pad,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, max_pool_size, 1, 1],
                                        strides=[1, max_pool_size, 1, 1],
                                        padding='SAME',
                                        name="pool")
                pooled = tf.reshape(pooled, [-1, reduced, num_filters])
                pooled_concat.append(pooled)

        pooled_concat = tf.concat(2, pooled_concat)
        pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob)

        # LSTM
        if lstm_type == "gru":
            lstm_cell = rnn_cell.GRUCell(num_units=hidden_unit,
                                         input_size=embedding_size)
        else:
            if lstm_type == "basic":
                lstm_cell = rnn_cell.BasicLSTMCell(num_units=hidden_unit,
                                                   input_size=embedding_size)
            else:
                lstm_cell = rnn_cell.LSTMCell(num_units=hidden_unit,
                                              input_size=embedding_size,
                                              use_peepholes=True)
        lstm_cell = rnn_cell.DropoutWrapper(
            lstm_cell, output_keep_prob=self.dropout_keep_prob)

        self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)
        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, reduced, pooled_concat)
        ]
        outputs, state = rnn.rnn(lstm_cell,
                                 inputs,
                                 initial_state=self._initial_state,
                                 sequence_length=self.real_len)

        # Collect the appropriate last words into variable output (dimension = batch x embedding_size)
        output = outputs[0]
        with tf.variable_scope("Output"):
            tf.get_variable_scope().reuse_variables()
            one = tf.ones([1, hidden_unit], tf.float32)
            for i in range(1, len(outputs)):
                ind = self.real_len < (i + 1)
                ind = tf.to_float(ind)
                ind = tf.expand_dims(ind, -1)
                mat = tf.matmul(ind, one)
                output = tf.add(tf.mul(output, mat),
                                tf.mul(outputs[i], 1.0 - mat))

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            self.W = tf.Variable(tf.truncated_normal(
                [hidden_unit, num_classes], stddev=0.1),
                                 name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(output, self.W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Esempio n. 29
0
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 use_lstm=False,
                 num_samples=512,
                 forward_only=False):

        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        output_projection = None
        softmax_loss_function = None

        if num_samples > 0 and num_samples < self.target_vocab_size:
            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [size, self.target_vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device("/cpu:0"):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                      num_samples,
                                                      self.target_vocab_size)

            softmax_loss_function = sampled_loss

        single_cell = rnn_cell.GRUCell(size)
        if use_lstm:
            single_cell = rnn_cell.BasicLSTMCell(size)
        cell = single_cell
        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                source_vocab_size,
                target_vocab_size,
                output_projection=output_projection,
                feed_previous=do_decode)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        if forward_only:
            self.outputs, self.losses = seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                self.target_vocab_size,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)

            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                self.target_vocab_size,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        params = tf.trainable_variables()
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 30
0
    def BiRNN(self, scope):
        # input shape: (batch_size, step_size, input_dim)
        # we need to permute step_size and batch_size(change the position of step and batch size)
        data = tf.transpose(self.input_data, [1, 0, 2])

        # Reshape to prepare input to hidden activation
        # (step_size*batch_size, n_input), flattens the batch and step
        #after the above transformation, data is now (step_size*batch_size, input_dim)
        data = tf.reshape(data, [-1, self.config.input_dim + 1])

        # Define lstm cells with tensorflow
        with tf.variable_scope(str(scope)):
            # Linear activation
            data = tf.matmul(data,
                             self.weights['hidden']) + self.biases['hidden']
            data = tf.nn.dropout(data, self.config.dropout)
            # Define a cell
            if self.config.cell_type == 'GRU':
                lstm_fw_cell = rnn_cell.GRUCell(self.config.hidden_dim)
                lstm_bw_cell = rnn_cell.GRUCell(self.config.hidden_dim)
            else:
                lstm_fw_cell = rnn_cell.LSTMCell(
                    self.config.hidden_dim,
                    forget_bias=self.config.forget_bias,
                    use_peepholes=self.config.use_peepholes,
                    cell_clip=self.config.cell_clip)
                lstm_bw_cell = rnn_cell.LSTMCell(
                    self.config.hidden_dim,
                    forget_bias=self.config.forget_bias,
                    use_peepholes=self.config.use_peepholes,
                    cell_clip=self.config.cell_clip)
            # Split data because rnn cell needs a list of inputs for the RNN inner loop
            data = tf.split(0, self.config.step_size,
                            data)  # step_size * (batch_size, hidden_dim)
            # Get lstm cell output
            print 'running single stack Bi-directional RNN.......'
            outputs = rnn.bidirectional_rnn(
                lstm_fw_cell,
                lstm_bw_cell,
                data,
                initial_state_fw=self.init_state_fw,
                initial_state_bw=self.init_state_bw,
                scope="RNN1")
            # for basic rnn prediction we really just interested in the last state's output, we need to average them in this case
            total_outputs = tf.div(tf.add_n([outputs[2], outputs[1]]), 2.0)
            return [
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out1']) +
                    self.biases['out1'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out2']) +
                    self.biases['out2'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out3']) +
                    self.biases['out3'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out4']) +
                    self.biases['out4'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out5']) +
                    self.biases['out5'], self.config.dropout),
            ]