コード例 #1
0
ファイル: bilstm.py プロジェクト: koth/kcws
    def inference(self, X, length, reuse=False):
        length_64 = tf.cast(length, tf.int64)
        with tf.variable_scope("bilstm", reuse=reuse):
            forward_output, _ = tf.nn.dynamic_rnn(
                tf.contrib.rnn.LSTMCell(self.num_hidden,
                                        reuse=reuse),
                X,
                dtype=tf.float32,
                sequence_length=length,
                scope="RNN_forward")
            backward_output_, _ = tf.nn.dynamic_rnn(
                tf.contrib.rnn.LSTMCell(self.num_hidden,
                                        reuse=reuse),
                inputs=tf.reverse_sequence(X,
                                           length_64,
                                           seq_dim=1),
                dtype=tf.float32,
                sequence_length=length,
                scope="RNN_backword")

        backward_output = tf.reverse_sequence(backward_output_,
                                              length_64,
                                              seq_dim=1)

        output = tf.concat([forward_output, backward_output], 2)
        output = tf.reshape(output, [-1, self.num_hidden * 2])
        if reuse is None or not reuse:
            output = tf.nn.dropout(output, 0.5)

        matricized_unary_scores = tf.matmul(output, self.W) + self.b
        unary_scores = tf.reshape(
            matricized_unary_scores,
            [-1, self.max_seq_len, self.num_tags],
            name="Reshape_7" if reuse else None)
        return unary_scores
コード例 #2
0
ファイル: qa_network.py プロジェクト: MorLong/qa_network
 def _composition_function(self, inputs, length, init_state=None):
     if self._composition == "GRU":
         cell = GRUCell(self._size)
         return dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                            initial_state=init_state, dtype=tf.float32)[0]
     elif self._composition == "LSTM":
         cell = BasicLSTMCell(self._size)
         init_state = tf.concat(1, [tf.zeros_like(init_state, tf.float32), init_state]) if init_state else None
         outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                            initial_state=init_state, dtype=tf.float32)[0]
         return outs
     elif self._composition == "BiGRU":
         cell = GRUCell(self._size // 2, self._size)
         init_state_fw, init_state_bw = tf.split(1, 2, init_state) if init_state else (None, None)
         with tf.variable_scope("forward"):
             fw_outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                                   initial_state=init_state_fw, dtype=tf.float32)[0]
         with tf.variable_scope("backward"):
             rev_inputs = tf.reverse_sequence(tf.pack(inputs), length, 0, 1)
             rev_inputs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), rev_inputs)]
             bw_outs = dynamic_rnn(cell, rev_inputs, sequence_length=length, time_major=True,
                                   initial_state=init_state_bw, dtype=tf.float32)[0]
             bw_outs = tf.reverse_sequence(tf.pack(bw_outs), length, 0, 1)
             bw_outs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), bw_outs)]
         return [tf.concat(1, [fw_out, bw_out]) for fw_out, bw_out in zip(fw_outs, bw_outs)]
     else:
         raise NotImplementedError("Other compositions not implemented yet.")
コード例 #3
0
ファイル: utils.py プロジェクト: RileyShe/DeepPavlov
    def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True):
        outputs = [tf.transpose(inputs, [1, 0, 2])]
        for layer in range(self.num_layers):
            gru_fw, gru_bw = self.grus[layer]
            init_fw, init_bw = self.inits[layer]
            mask_fw, mask_bw = self.dropout_mask[layer]
            with tf.variable_scope('fw_{}'.format(layer), reuse=tf.AUTO_REUSE):
                with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE):
                    out_fw, _ = tf.nn.dynamic_rnn(cell=gru_fw, inputs=outputs[-1] * mask_fw, time_major=True,
                                                  initial_state=tuple(tf.unstack(init_fw, axis=0)))

            with tf.variable_scope('bw_{}'.format(layer), reuse=tf.AUTO_REUSE):
                with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE):
                    inputs_bw = tf.reverse_sequence(
                        outputs[-1] * mask_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1)
                    out_bw, _ = tf.nn.dynamic_rnn(cell=gru_bw, inputs=inputs_bw, time_major=True,
                                                  initial_state=tuple(tf.unstack(init_bw, axis=0)))
                    out_bw = tf.reverse_sequence(
                        out_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1)

            outputs.append(tf.concat([out_fw, out_bw], axis=2))
        if concat_layers:
            res = tf.concat(outputs[1:], axis=2)
        else:
            res = outputs[-1]
        res = tf.transpose(res, [1, 0, 2])
        return res
コード例 #4
0
ファイル: vrnn.py プロジェクト: 812864539/models
  def set_observations(self, observations, seq_lengths):
    """Stores the model's observations.

    Stores the observations (inputs and targets) in TensorArrays and precomputes
    things for later like the reverse RNN output and encoded targets.

    Args:
      observations: The observations of the model, a tuple containing two
        Tensors of shape [max_seq_len, batch_size, data_size]. The Tensors
        should be the inputs and targets, respectively.
      seq_lengths: An int Tensor of shape [batch_size] containing the length
        of each sequence in observations.
    """
    inputs, targets = observations
    self.seq_lengths = seq_lengths
    self.max_seq_len = tf.reduce_max(seq_lengths)
    self.inputs_ta = base.ta_for_tensor(inputs, clear_after_read=False)
    self.targets_ta = base.ta_for_tensor(targets, clear_after_read=False)
    targets_encoded = base.encode_all(targets, self.data_encoder)
    self.targets_encoded_ta = base.ta_for_tensor(targets_encoded,
                                                 clear_after_read=False)
    if self.rev_rnn_cell:
      reverse_targets_encoded = tf.reverse_sequence(
          targets_encoded, seq_lengths, seq_axis=0, batch_axis=1)
      # Compute the reverse rnn over the targets.
      reverse_rnn_out, _ = tf.nn.dynamic_rnn(self.rev_rnn_cell,
                                             reverse_targets_encoded,
                                             time_major=True,
                                             dtype=tf.float32)
      reverse_rnn_out = tf.reverse_sequence(reverse_rnn_out, seq_lengths,
                                            seq_axis=0, batch_axis=1)
      self.reverse_rnn_ta = base.ta_for_tensor(reverse_rnn_out,
                                               clear_after_read=False)
コード例 #5
0
ファイル: default.py プロジェクト: Styrke/master-code
    def build(self):
        print('Building model')
        self.x_embeddings = tf.Variable(
            tf.random_normal([self.alphabet_src_size, self.embedd_dims],
            stddev=0.1), name='x_embeddings')
        self.t_embeddings = tf.Variable(
            tf.random_normal([self.alphabet_tar_size, self.embedd_dims],
            stddev=0.1), name='t_embeddings')

        X_embedded = tf.gather(self.x_embeddings, self.Xs, name='embed_X')
        t_embedded = tf.gather(self.t_embeddings, self.ts_go, name='embed_t')

        with tf.variable_scope('dense_out'):
            W_out = tf.get_variable('W_out', [self.word_encoder_units*2, self.alphabet_tar_size])
            b_out = tf.get_variable('b_out', [self.alphabet_tar_size])

        # forward encoding
        char_enc_state, char_enc_out = encoder(X_embedded, self.X_len, 'char_encoder', self.char_encoder_units)
        char2word = _grid_gather(char_enc_out, self.X_spaces)
        char2word.set_shape([None, None, self.char_encoder_units])
        word_enc_state, word_enc_out = encoder(char2word, self.X_spaces_len, 'word_encoder', self.word_encoder_units)

        # backward encoding words
        char2word = tf.reverse_sequence(char2word, tf.to_int64(self.X_spaces_len), 1)
        char2word.set_shape([None, None, self.char_encoder_units])
        word_enc_state_bck, word_enc_out_bck = encoder(char2word, self.X_spaces_len, 'word_encoder_backwards', self.word_encoder_units)
        word_enc_out_bck = tf.reverse_sequence(word_enc_out_bck, tf.to_int64(self.X_spaces_len), 1)

        word_enc_state = tf.concat(1, [word_enc_state, word_enc_state_bck])
        word_enc_out = tf.concat(2, [word_enc_out, word_enc_out_bck])

        # decoding
        dec_state, dec_out, valid_dec_out, valid_attention_tracker = (
            attention_decoder(word_enc_out, self.X_spaces_len, word_enc_state,
                              t_embedded, self.t_len, self.attn_units,
                              self.t_embeddings, W_out, b_out))

        out_tensor = tf.reshape(dec_out, [-1, self.word_encoder_units*2])
        out_tensor = tf.matmul(out_tensor, W_out) + b_out
        out_shape = tf.concat(0, [tf.expand_dims(tf.shape(self.X_len)[0], 0),
                                  tf.expand_dims(tf.shape(t_embedded)[1], 0),
                                  tf.expand_dims(tf.constant(self.alphabet_tar_size), 0)])
        self.valid_attention_tracker = valid_attention_tracker.pack()
        self.out_tensor = tf.reshape(out_tensor, out_shape)
        self.out_tensor.set_shape([None, None, self.alphabet_tar_size])

        valid_out_tensor = tf.reshape(valid_dec_out, [-1, self.word_encoder_units*2])
        valid_out_tensor = tf.matmul(valid_out_tensor, W_out) + b_out
        self.valid_out_tensor = tf.reshape(valid_out_tensor, out_shape)

        self.out = None

        # add TensorBoard summaries for all variables
        tf.contrib.layers.summarize_variables()
コード例 #6
0
ファイル: train_pos.py プロジェクト: koth/kcws
    def inference(self, wX, cX, reuse=None, trainMode=True):
        word_vectors = tf.nn.embedding_lookup(self.words, wX)
        char_vectors = tf.nn.embedding_lookup(self.chars, cX)
        char_vectors = tf.reshape(char_vectors, [-1, FLAGS.max_sentence_len,
                                                 FLAGS.max_chars_per_word,
                                                 FLAGS.embedding_char_size])
        char_vectors = tf.transpose(char_vectors, perm=[1, 0, 2, 3])
        char_vectors = tf.expand_dims(char_vectors, -1)
        length = self.length(wX)
        length_64 = tf.cast(length, tf.int64)

        # do conv
        def do_char_conv(x): return self.char_convolution(x)
        char_vectors_x = tf.map_fn(do_char_conv, char_vectors)
        char_vectors_x = tf.transpose(char_vectors_x, perm=[1, 0, 2])
        word_vectors = tf.concat([word_vectors, char_vectors_x], axis=2)
        # if trainMode:
        #  word_vectors = tf.nn.dropout(word_vectors, 0.5)
        reuse = None if trainMode else True
        with tf.variable_scope("rnn_fwbw", reuse=reuse) as scope:
            forward_output, _ = tf.nn.dynamic_rnn(
                tf.contrib.rnn.LSTMCell(self.numHidden,
                                        reuse=reuse),
                word_vectors,
                dtype=tf.float32,
                sequence_length=length,
                scope="RNN_forward")
            backward_output_, _ = tf.nn.dynamic_rnn(
                tf.contrib.rnn.LSTMCell(self.numHidden,
                                        reuse=reuse),
                inputs=tf.reverse_sequence(word_vectors,
                                           length_64,
                                           seq_dim=1),
                dtype=tf.float32,
                sequence_length=length,
                scope="RNN_backword")

        backward_output = tf.reverse_sequence(backward_output_,
                                              length_64,
                                              seq_dim=1)

        output = tf.concat([forward_output, backward_output], 2)
        output = tf.reshape(output, [-1, self.numHidden * 2])
        if trainMode:
            output = tf.nn.dropout(output, 0.5)

        matricized_unary_scores = tf.matmul(output, self.W) + self.b
        # matricized_unary_scores = tf.nn.log_softmax(matricized_unary_scores)
        unary_scores = tf.reshape(
            matricized_unary_scores,
            [-1, FLAGS.max_sentence_len, self.distinctTagNum])

        return unary_scores, length
コード例 #7
0
ファイル: tf_layers.py プロジェクト: RileyShe/DeepPavlov
def cudnn_bi_gru(units,
                 n_hidden,
                 seq_lengths=None,
                 n_layers=1,
                 trainable_initial_states=False,
                 name='cudnn_bi_gru',
                 reuse=False):
    """ Fast CuDNN Bi-GRU implementation

    Args:
        units: tf.Tensor with dimensions [B x T x F], where
            B - batch size
            T - number of tokens
            F - features
        n_hidden: dimensionality of hidden state
        seq_lengths: number of tokens in each sample in the batch
        n_layers: number of layers
        trainable_initial_states: whether to create a special trainable variable
                to initialize the hidden states of the network or use just zeros
        name: name of the variable scope to use
        reuse:whether to reuse already initialized variable


    Returns:
        h - all hidden states along T dimension,
            tf.Tensor with dimensionality [B x T x F]
        h_last - last hidden state, tf.Tensor with dimensionality [B x H * 2]
            where H - number of hidden units
    """

    with tf.variable_scope(name, reuse=reuse):
        if seq_lengths is None:
            seq_lengths = tf.ones([tf.shape(units)[0]], dtype=tf.int32) * tf.shape(units)[1]
        with tf.variable_scope('Forward'):
            h_fw, h_last_fw = cudnn_gru_wrapper(units,
                                                n_hidden,
                                                n_layers=n_layers,
                                                trainable_initial_states=trainable_initial_states,
                                                seq_lengths=seq_lengths,
                                                reuse=reuse)

        with tf.variable_scope('Backward'):
            reversed_units = tf.reverse_sequence(units, seq_lengths=seq_lengths, seq_dim=1, batch_dim=0)
            h_bw, h_last_bw = cudnn_gru_wrapper(reversed_units,
                                                n_hidden,
                                                n_layers=n_layers,
                                                trainable_initial_states=trainable_initial_states,
                                                seq_lengths=seq_lengths,
                                                reuse=reuse)
            h_bw = tf.reverse_sequence(h_bw, seq_lengths=seq_lengths, seq_dim=1, batch_dim=0)

    return (h_fw, h_bw), (h_last_fw, h_last_bw)
コード例 #8
0
  def testShapeFunctionEdgeCases(self):
    # Batch size mismatched between input and seq_lengths.
    with self.assertRaises(ValueError):
      tf.reverse_sequence(
          tf.placeholder(tf.float32, shape=(32, 2, 3)),
          seq_lengths=tf.placeholder(tf.int64, shape=(33,)),
          seq_dim=3)

    # seq_dim out of bounds.
    with self.assertRaisesRegexp(ValueError, "seq_dim must be < input.dims()"):
      tf.reverse_sequence(
          tf.placeholder(tf.float32, shape=(32, 2, 3)),
          seq_lengths=tf.placeholder(tf.int64, shape=(32,)),
          seq_dim=3)
コード例 #9
0
ファイル: lstm.py プロジェクト: qixiuai/tensor2tensor
def lstm_seq2seq_internal(inputs, targets, hparams, train):
  """The basic LSTM seq2seq model, main step used for training."""
  with tf.variable_scope("lstm_seq2seq"):
    if inputs is not None:
      inputs_length = common_layers.length_from_embedding(inputs)
      # Flatten inputs.
      inputs = common_layers.flatten4d3d(inputs)

      # LSTM encoder.
      inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
      _, final_encoder_state = lstm(inputs, inputs_length, hparams, train,
                                    "encoder")
    else:
      final_encoder_state = None

    # LSTM decoder.
    shifted_targets = common_layers.shift_right(targets)
    # Add 1 to account for the padding added to the left from shift_right
    targets_length = common_layers.length_from_embedding(shifted_targets) + 1
    decoder_outputs, _ = lstm(
        common_layers.flatten4d3d(shifted_targets),
        targets_length,
        hparams,
        train,
        "decoder",
        initial_state=final_encoder_state)
    return tf.expand_dims(decoder_outputs, axis=2)
コード例 #10
0
  def testFloatReverseSequenceGrad(self):
    x = np.asarray([
        [[1, 2, 3, 4], [5, 6, 7, 8]],
        [[9, 10, 11, 12], [13, 14, 15, 16]],
        [[17, 18, 19, 20], [21, 22, 23, 24]]], dtype=np.float)
    x = x.reshape(3, 2, 4, 1, 1)
    x = x.transpose([2, 1, 0, 3, 4])  # transpose axes 0 <=> 2

    # reverse dim 0 up to (0:3, none, 0:4) along dim=2
    seq_dim = 0
    batch_dim = 2
    seq_lengths = np.asarray([3, 0, 4], dtype=np.int64)

    with self.test_session():
      input_t = tf.constant(x, shape=x.shape)
      seq_lengths_t = tf.constant(seq_lengths, shape=seq_lengths.shape)
      reverse_sequence_out = tf.reverse_sequence(input_t,
                                                 batch_dim=batch_dim,
                                                 seq_dim=seq_dim,
                                                 seq_lengths=seq_lengths_t)
      err = tf.test.compute_gradient_error(input_t,
                                           x.shape,
                                           reverse_sequence_out,
                                           x.shape,
                                           x_init_value=x)
    print("ReverseSequence gradient error = %g" % err)
    self.assertLess(err, 1e-8)
コード例 #11
0
  def step(self, time_, inputs, state, name=None):
    cell_output, cell_state = self.cell(inputs, state)
    cell_output_new, logits, attention_scores, attention_context = \
      self.compute_output(cell_output)

    if self.reverse_scores_lengths is not None:
      attention_scores = tf.reverse_sequence(
          input=attention_scores,
          seq_lengths=self.reverse_scores_lengths,
          seq_dim=1,
          batch_dim=0)

    sample_ids = self.helper.sample(
        time=time_, outputs=logits, state=cell_state)

    outputs = AttentionDecoderOutput(
        logits=logits,
        predicted_ids=sample_ids,
        cell_output=cell_output_new,
        attention_scores=attention_scores,
        attention_context=attention_context)

    finished, next_inputs, next_state = self.helper.next_inputs(
        time=time_, outputs=outputs, state=cell_state, sample_ids=sample_ids)

    return (outputs, next_state, next_inputs, finished)
コード例 #12
0
ファイル: biLSTM.py プロジェクト: easonnie/landOfflol
    def __init__(self, embedding=None, hidden_state_d=100, max_length=80, learning_rate=0.001, dropout_rate=0.5, vocab_size=400001, embedding_d=300, num_classes=2):
        self.data = tf.placeholder(dtype=tf.int32, shape=[None, max_length])
        self.len = tf.placeholder(dtype=tf.int32, shape=[None])
        self.label = tf.placeholder(dtype=tf.float32, shape=[None])

        self.neg_label = 1 - self.label

        self.co_label = tf.transpose(tf.reshape(tf.concat(0, [self.label, self.neg_label]), [2, -1]))

        self.init_embedding(embedding, vocab_size, embedding_d)

        # filter len to maxlength
        self.maxlen = tf.cast(tf.fill([tf.shape(self.len)[0]], max_length), tf.int64)
        self.filter = tf.less_equal(tf.cast(self.len, tf.int64), self.maxlen)
        self.clean_len = tf.select(self.filter, tf.cast(self.len, tf.int64), self.maxlen)

        self.vec_data = tf.nn.embedding_lookup(self.embedding, self.data)
        self.reversed_vec_data = tf.reverse_sequence(self.vec_data, seq_dim=1, seq_lengths=self.clean_len)

        with tf.variable_scope('left2right'):
            left2right_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_state_d, state_is_tuple=True)
            self.output, self.state = tf.nn.dynamic_rnn(
                left2right_lstm_cell,
                self.vec_data,
                dtype=tf.float32,
                sequence_length=self.len,
            )

        with tf.variable_scope('right2left'):
            right2left_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_state_d, state_is_tuple=True)
            self.reversed_output, self.reversed_state = tf.nn.dynamic_rnn(
                right2left_lstm_cell,
                self.reversed_vec_data,
                dtype=tf.float32,
                sequence_length=self.len,
            )

        self.last = BiLSTM.last_relevant(self.output, self.len)
        self.reversed_last = BiLSTM.last_relevant(self.reversed_output, self.len)

        self.final_output = tf.concat(1, [self.last, self.reversed_last])

        self.dropout_last = tf.nn.dropout(self.final_output, keep_prob=dropout_rate)

        self.weight = tf.Variable(tf.truncated_normal([hidden_state_d * 2, num_classes], stddev=0.1))
        self.bias = tf.Variable(tf.constant(0.1, shape=[num_classes]))
        self.prediction = tf.nn.softmax(tf.matmul(self.final_output, self.weight) + self.bias)

        self.cost = tf.nn.softmax_cross_entropy_with_logits(tf.matmul(self.dropout_last, self.weight) + self.bias, self.co_label)
        self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
        self.init_op = tf.initialize_all_variables()

        self.prediction_a = tf.argmax(self.prediction, dimension=1)
        self.prediction_b = tf.argmax(self.co_label, dimension=1)

        self.score = tf.reduce_sum(tf.cast(tf.equal(self.prediction_a, self.prediction_b), dtype=tf.int32)) / tf.size(self.label)

        self.sess = tf.Session()
        self.sess.run(self.init_op)
コード例 #13
0
 def _bidirectional_rnn(self, data, length):
     length_64 = tf.cast(length, tf.int64)
     forward, _ = tf.nn.dynamic_rnn(
         cell=self.params.rnn_cell(self.params.rnn_hidden),
         inputs=data,
         dtype=tf.float32,
         sequence_length=length,
         scope='rnn-forward')
     backward, _ = tf.nn.dynamic_rnn(
         cell=self.params.rnn_cell(self.params.rnn_hidden),
         inputs=tf.reverse_sequence(data, length_64, seq_dim=1),
         dtype=tf.float32,
         sequence_length=self.length,
         scope='rnn-backward')
     backward = tf.reverse_sequence(backward, length_64, seq_dim=1)
     output = tf.concat(2, [forward, backward])
     return output
コード例 #14
0
ファイル: base.py プロジェクト: BinbinBian/expr_snli
    def __init__(self, input_, length_, hidden_state_d, name, cell=None, input_keep_rate=1.0, output_keep_rate=1.0,
                 init_state=None):
        """
        lstm_step, input_d, hidden_state_d
        :param name:
        :return:
        self.input  (shape=[None, lstm_step, input_d], dtype=tf.float32, name='input')
        self.length (shape=[None], dtype=tf.int32, name='length')
        """
        with tf.variable_scope(name):
            self.input = input_
            self.length = length_

            self.reverse_input = tf.reverse_sequence(self.input, seq_dim=1, seq_lengths=tf.cast(self.length, tf.int64))

            if len(cell) > 1:
                cell_f, cell_r = cell
            elif len(cell) == 1:
                cell_f = cell[0]
                cell_r = cell[0]
            else:  # cell is None
                cell_f = tf.nn.rnn_cell.BasicLSTMCell(hidden_state_d, state_is_tuple=True)
                cell_r = tf.nn.rnn_cell.BasicLSTMCell(hidden_state_d, state_is_tuple=True)

            if not init_state:
                init_state_f = None
                init_state_b = None
            elif len(init_state) > 1:
                init_state_f = init_state[0]
                init_state_b = init_state[1]
            else:
                init_state_f = init_state[0]
                init_state_b = init_state[0]

            # print('blala', init_state_f)
            # print('blala', init_state_b)

            with tf.variable_scope('forward'):
                self.output, self.last_state = tf.nn.dynamic_rnn(
                    cell_f,
                    tf.nn.dropout(self.input, input_keep_rate),
                    dtype=tf.float32,
                    sequence_length=self.length,
                    initial_state=init_state_f
                )
                self.last = tf.nn.dropout(BasicSeqModel.last_relevant(self.output, self.length),
                                          output_keep_rate)

            with tf.variable_scope('backward'):
                self.reverse_output, self.reverse_last_state = tf.nn.dynamic_rnn(
                    cell_r,
                    tf.nn.dropout(self.reverse_input, input_keep_rate),
                    dtype=tf.float32,
                    sequence_length=self.length,
                    initial_state=init_state_b
                )
                self.reverse_last = tf.nn.dropout(BasicSeqModel.last_relevant(self.reverse_output, self.length),
                                                  output_keep_rate)
コード例 #15
0
  def testShapeFunctionEdgeCases(self):
    # Batch size mismatched between input and seq_lengths.
    with self.assertRaises(ValueError):
      tf.reverse_sequence(
          tf.placeholder(tf.float32, shape=(32, 2, 3)),
          seq_lengths=tf.placeholder(tf.int64, shape=(33,)),
          seq_dim=3)

    # seq_dim out of bounds.
    with self.assertRaisesRegexp(ValueError, "seq_dim must be < input.dims()"):
      tf.reverse_sequence(
          tf.placeholder(tf.float32, shape=(32, 2, 3)),
          seq_lengths=tf.placeholder(tf.int64, shape=(32,)),
          seq_dim=3)

    # batch_dim out of bounds.
    with self.assertRaisesRegexp(
        ValueError, "batch_dim must be < input.dims()"):
      tf.reverse_sequence(
          tf.placeholder(tf.float32, shape=(32, 2, 3)),
          seq_lengths=tf.placeholder(tf.int64, shape=(32,)),
          seq_dim=0,
          batch_dim=3)

    with self.test_session():
      inputs = tf.placeholder(tf.float32, shape=(32, 2, 3))
      seq_lengths = tf.placeholder(tf.int64, shape=(32,))
      output = tf.reverse_sequence(
          inputs,
          seq_lengths=seq_lengths,
          seq_dim=0)  # batch_dim default is 0
      with self.assertRaisesOpError("batch_dim == seq_dim"):
        output.eval(feed_dict={inputs: np.random.rand(32, 2, 3),
                               seq_lengths: xrange(32)})
コード例 #16
0
ファイル: nlc_model.py プロジェクト: sdlg/nlc
  def bidirectional_rnn(self, cell, inputs, lengths, scope=None):
    name = scope.name or "BiRNN"
    # Forward direction
    with vs.variable_scope(name + "_FW") as fw_scope:
      output_fw, output_state_fw = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtypes.float32,
                                                   sequence_length=lengths, scope=fw_scope)
    # Backward direction
    inputs_bw = tf.reverse_sequence(inputs, tf.to_int64(lengths), seq_dim=0, batch_dim=1)
    with vs.variable_scope(name + "_BW") as bw_scope:
      output_bw, output_state_bw = rnn.dynamic_rnn(cell, inputs_bw, time_major=True, dtype=dtypes.float32,
                                                   sequence_length=lengths, scope=bw_scope)

    output_bw = tf.reverse_sequence(output_bw, tf.to_int64(lengths), seq_dim=0, batch_dim=1)

    outputs = output_fw + output_bw
    output_state = output_state_fw + output_state_bw

    return (outputs, output_state)
コード例 #17
0
ファイル: conv_seq2seq.py プロジェクト: clren/conv_seq2seq
 def encode(self, features, labels):
   
   features["source_ids"] = tf.reverse_sequence(features["source_ids"], features["source_len"], batch_dim=0, seq_dim=1)  # [[1,2,3,4,PAD,PAD,PAD],[2,3,PAD,PAD,PAD,PAD,PAD]]   [4,2]
   features["source_ids"] = tf.reverse(features["source_ids"],[1])  # --> [[4,3,2,1,PAD,PAD,PAD],[3,2,PAD,PAD,PAD,PAD,PAD]] --> [[PAD,PAD,PAD,1,2,3,4],[PAD,PAD,PAD,PAD,PAD,2,3]]
    
   source_embedded = tf.nn.embedding_lookup(self.source_embedding_fairseq(),
                                            features["source_ids"])
   encoder_fn = self.encoder_class(self.params["encoder.params"], self.mode, self.source_pos_embedding_fairseq())
   return encoder_fn(source_embedded, features["source_len"])
コード例 #18
0
ファイル: arrays.py プロジェクト: KellyChan/python-examples
 def reverse(self):
     x = tf.Variable(self.image, name='x')
     model = tf.initialize_all_variables()
     
     with tf.Session() as session:
         x = tf.reverse_sequence(x, [self.width] * self.height, 1, batch_dim=0)
         session.run(model)
         result = session.run(x)
     return result
コード例 #19
0
ファイル: rnn.py プロジェクト: codealphago/convai-bot-1337
def bw_dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
                   dtype=None, parallel_iterations=None, swap_memory=False,
                   time_major=False, scope=None):
    assert not time_major  # TODO : to be implemented later!

    flat_inputs = flatten(inputs, 2)  # [-1, J, d]
    flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64')

    flat_inputs = tf.reverse(flat_inputs, 1) if sequence_length is None \
        else tf.reverse_sequence(flat_inputs, sequence_length, 1)
    flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len,
                                             initial_state=initial_state, dtype=dtype,
                                             parallel_iterations=parallel_iterations, swap_memory=swap_memory,
                                             time_major=time_major, scope=scope)
    flat_outputs = tf.reverse(flat_outputs, 1) if sequence_length is None \
        else tf.reverse_sequence(flat_outputs, sequence_length, 1)

    outputs = reconstruct(flat_outputs, inputs, 2)
    return outputs, final_state
コード例 #20
0
def create_graph(g):
    initer = tf.random_uniform_initializer(0.0,INIT_SCALE)

    with tf.variable_scope("graph", reuse=None, initializer=initer):
        g['x'] = list()
        g['y'] = list()
        g['s'] = list()
        g['seq_lengths'] = tf.placeholder(tf.int64,shape=[BATCH_SIZE]);
    
        for _ in range(UNROLLS):
            g['x'].append( tf.placeholder(tf.float32,shape=[BATCH_SIZE,INPUT_SIZE]) )
            g['y'].append( tf.placeholder(tf.float32,shape=[BATCH_SIZE,INPUT_SIZE]) )
            g['s'].append( tf.placeholder(tf.float32,shape=[BATCH_SIZE]) )

        num_inputs  = INPUT_SIZE * UNROLLS
        # num_outputs = OUTPUT_SIZE * UNROLLS
            
        g['w'] = tf.get_variable("softmax_w", [num_inputs,OUTPUT_SIZE])
        g['b'] = tf.get_variable("softmax_b", [OUTPUT_SIZE])

        g['cat_x'] = tf.concat(1, g['x'] )

        g['logits'] = tf.nn.xw_plus_b(g['cat_x'], g['w'], g['b'] )
        
        g['cat_y'] = tf.unpack(tf.reverse_sequence(tf.reshape( tf.concat(1, g['y'] ),
            [BATCH_SIZE,UNROLLS,OUTPUT_SIZE] ),g['seq_lengths'],1,0),axis=1)[0]

        g['loss'] = tf.nn.softmax_cross_entropy_with_logits(g['logits'], g['cat_y'])

        g['r_s'] = tf.unpack(tf.reverse_sequence(tf.transpose(
            tf.reshape( tf.concat(0, g['s'] ), [UNROLLS, BATCH_SIZE] ) ),
            g['seq_lengths'],1,0),axis=1)[0]

        g['train_loss'] = tf.mul( g['loss'], g['r_s'] )
        
        g['preds'] = tf.nn.softmax(g['logits'])
        
        g['class_preds'] =  tf.floor( g['preds'] + 0.5 )

        g['accy'] = tf.mul( g['class_preds'],  g['cat_y'] )

        g['w_accy'] = tf.mul(g['accy'], tf.reshape(g['r_s'],shape=[BATCH_SIZE,1]))
コード例 #21
0
ファイル: rnn.py プロジェクト: eunchung/qrn
def dynamic_bidirectional_rnn(cell, pre_inputs, sequence_length=None, initial_state=None,
                              dtype=None, parallel_iterations=None, swap_memory=False,
                              time_major=False, scope=None, feed_prev_out=False,
                              num_layers=1, reuse_layers=True):
    isinstance(cell, BiRNNCell)
    with vs.variable_scope(scope or "Bi-RNN") as root_scope:
        inputs_list = []
        outputs_list = []
        outputs_fw_list = []
        outputs_bw_list = []
        state_fw_list = []
        state_bw_list = []
        for layer_idx in range(num_layers):
            scope_name = "layer_{}".format(layer_idx)
            with name_scope(scope_name) if reuse_layers else vs.variable_scope(scope_name):
                inputs = cell.pre(pre_inputs)
                outputs_fw, state_fw = dynamic_rnn(cell, inputs, sequence_length=sequence_length, initial_state=initial_state,
                    dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory,
                    time_major=time_major, feed_prev_out=feed_prev_out, scope='FW')
                inputs_rev = reverse_sequence(inputs, sequence_length, 1)
                outputs_bw_rev, state_bw = dynamic_rnn(cell, inputs_rev, sequence_length=sequence_length, initial_state=initial_state,
                    dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory,
                    time_major=time_major, feed_prev_out=feed_prev_out, scope='BW')
                outputs_bw = reverse_sequence(outputs_bw_rev, sequence_length, 1)
                outputs = cell.post(outputs_fw, outputs_bw)
                pre_inputs = outputs
                inputs_list.append(inputs)
                outputs_list.append(outputs)
                outputs_fw_list.append(outputs_fw)
                outputs_bw_list.append(outputs_bw)
                state_fw_list.append(state_fw)
                state_bw_list.append(state_bw)
                if reuse_layers:
                    root_scope.reuse_variables()
        tensors = dict()
        tensors['in'] = transpose(pack(inputs_list), [1, 0, 2, 3])
        tensors['out'] = transpose(pack(outputs_list), [1, 0, 2, 3])
        tensors['fw_out'] = transpose(pack(outputs_fw_list), [1, 0, 2, 3])  # [N, L, M, d]
        tensors['bw_out'] = transpose(pack(outputs_bw_list), [1, 0, 2, 3])  # [N, L, M, d]
        tensors['fw_state'] = transpose(pack(state_fw_list), [1, 0, 2])  # [N, L, d]
        tensors['bw_state'] = transpose(pack(state_bw_list), [1, 0, 2])  # [N, L, d]
    return outputs_list[-1], state_fw_list[-1], state_bw_list[-1], tensors
コード例 #22
0
ファイル: coref_model.py プロジェクト: qq547276542/e2e-coref
  def encode_sentences(self, text_emb, text_len, text_len_mask):
    num_sentences = tf.shape(text_emb)[0]
    max_sentence_length = tf.shape(text_emb)[1]

    # Transpose before and after for efficiency.
    inputs = tf.transpose(text_emb, [1, 0, 2]) # [max_sentence_length, num_sentences, emb]

    with tf.variable_scope("fw_cell"):
      cell_fw = util.CustomLSTMCell(self.config["lstm_size"], num_sentences, self.dropout)
      preprocessed_inputs_fw = cell_fw.preprocess_input(inputs)
    with tf.variable_scope("bw_cell"):
      cell_bw = util.CustomLSTMCell(self.config["lstm_size"], num_sentences, self.dropout)
      preprocessed_inputs_bw = cell_bw.preprocess_input(inputs)
      preprocessed_inputs_bw = tf.reverse_sequence(preprocessed_inputs_bw,
                                                   seq_lengths=text_len,
                                                   seq_dim=0,
                                                   batch_dim=1)
    state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]), tf.tile(cell_fw.initial_state.h, [num_sentences, 1]))
    state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]), tf.tile(cell_bw.initial_state.h, [num_sentences, 1]))
    with tf.variable_scope("lstm"):
      with tf.variable_scope("fw_lstm"):
        fw_outputs, fw_states = tf.nn.dynamic_rnn(cell=cell_fw,
                                                  inputs=preprocessed_inputs_fw,
                                                  sequence_length=text_len,
                                                  initial_state=state_fw,
                                                  time_major=True)
      with tf.variable_scope("bw_lstm"):
        bw_outputs, bw_states = tf.nn.dynamic_rnn(cell=cell_bw,
                                                  inputs=preprocessed_inputs_bw,
                                                  sequence_length=text_len,
                                                  initial_state=state_bw,
                                                  time_major=True)

    bw_outputs = tf.reverse_sequence(bw_outputs,
                                     seq_lengths=text_len,
                                     seq_dim=0,
                                     batch_dim=1)

    text_outputs = tf.concat([fw_outputs, bw_outputs], 2)
    text_outputs = tf.transpose(text_outputs, [1, 0, 2]) # [num_sentences, max_sentence_length, emb]
    return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
コード例 #23
0
ファイル: nn.py プロジェクト: ys2899/mean-teacher
def flip_randomly(inputs, horizontally, vertically, is_training, name=None):
    """Flip images randomly. Make separate flipping decision for each image.

    Args:
        inputs (4-D tensor): Input images (batch size, height, width, channels).
        horizontally (bool): If True, flip horizontally with 50% probability. Otherwise, don't.
        vertically (bool): If True, flip vertically with 50% probability. Otherwise, don't.
        is_training (bool): If False, no flip is performed.
        scope: A name for the operation.
    """
    with tf.name_scope(name, "flip_randomly") as scope:
        batch_size, height, width, _ = tf.unstack(tf.shape(inputs))
        vertical_choices = (tf.random_uniform([batch_size], 0, 2, tf.int32) *
                            tf.to_int32(vertically) *
                            tf.to_int32(is_training))
        horizontal_choices = (tf.random_uniform([batch_size], 0, 2, tf.int32) *
                              tf.to_int32(horizontally) *
                              tf.to_int32(is_training))
        vertically_flipped = tf.reverse_sequence(inputs, vertical_choices * height, 1)
        both_flipped = tf.reverse_sequence(vertically_flipped, horizontal_choices * width, 2)
        return tf.identity(both_flipped, name=scope)
コード例 #24
0
ファイル: recurrent.py プロジェクト: ufal/neuralmonkey
def rnn_layer(rnn_input: tf.Tensor,
              lengths: tf.Tensor,
              rnn_spec: RNNSpec) -> Tuple[tf.Tensor, tf.Tensor]:
    """Construct a RNN layer given its inputs and specs.

    Arguments:
        rnn_inputs: The input sequence to the RNN.
        lengths: Lengths of input sequences.
        rnn_spec: A valid RNNSpec tuple specifying the network architecture.
        add_residual: Add residual connections to the layer output.
    """
    if rnn_spec.direction == "bidirectional":
        fw_cell = _make_rnn_cell(rnn_spec)
        bw_cell = _make_rnn_cell(rnn_spec)

        outputs_tup, states_tup = tf.nn.bidirectional_dynamic_rnn(
            fw_cell, bw_cell, rnn_input, sequence_length=lengths,
            dtype=tf.float32)

        outputs = tf.concat(outputs_tup, 2)

        if rnn_spec.cell_type == "LSTM":
            states_tup = (state.h for state in states_tup)

        final_state = tf.concat(list(states_tup), 1)
    else:
        if rnn_spec.direction == "backward":
            rnn_input = tf.reverse_sequence(rnn_input, lengths, seq_axis=1)

        cell = _make_rnn_cell(rnn_spec)
        outputs, final_state = tf.nn.dynamic_rnn(
            cell, rnn_input, sequence_length=lengths, dtype=tf.float32)

        if rnn_spec.direction == "backward":
            outputs = tf.reverse_sequence(outputs, lengths, seq_axis=1)

        if rnn_spec.cell_type == "LSTM":
            final_state = final_state.h

    return outputs, final_state
コード例 #25
0
ファイル: lstm.py プロジェクト: qixiuai/tensor2tensor
 def body(self, features):
   if self._hparams.initializer == "orthogonal":
     raise ValueError("LSTM models fail with orthogonal initializer.")
   train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
   inputs = features.get("inputs")
   inputs_length = common_layers.length_from_embedding(inputs)
   # Flatten inputs.
   inputs = common_layers.flatten4d3d(inputs)
   # LSTM encoder.
   inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
   encoder_output, _ = lstm(inputs, inputs_length, self._hparams, train,
                            "encoder")
   return tf.expand_dims(encoder_output, axis=2)
コード例 #26
0
 def _testReverseSequence(self, x, seq_dim, seq_lengths,
                          truth, use_gpu=False, expected_err_re=None):
   with self.test_session(use_gpu=use_gpu):
     ans = tf.reverse_sequence(x,
                               seq_dim=seq_dim,
                               seq_lengths=seq_lengths)
     if expected_err_re is None:
       tf_ans = ans.eval()
       self.assertAllClose(tf_ans, truth, atol=1e-10)
       self.assertShapeEqual(truth, ans)
     else:
       with self.assertRaisesOpError(expected_err_re):
         ans.eval()
コード例 #27
0
ファイル: qa_network.py プロジェクト: MorLong/qa_network
    def _comp_f(self):
        """
        Encodes all queries (including supporting queries)
        :return: encoded queries
        """
        with tf.device("/cpu:0"):
            max_length = tf.cast(tf.reduce_max(self._length), tf.int32)
            context_t = tf.transpose(self._context)
            context_t = tf.slice(context_t, [0, 0], tf.pack([max_length, -1]))
            embedded = tf.nn.embedding_lookup(self.input_embedding, context_t)
            embedded = tf.nn.dropout(embedded, self.keep_prob)
            batch_size = tf.shape(self._context)[0]
            batch_size_32 = tf.reshape(batch_size, [1])
            batch_size_64 = tf.cast(batch_size, tf.int64)

        with tf.device(self._device1):
            #use other device for backward rnn
            with tf.variable_scope("backward"):
                min_end = tf.segment_min(self._ends, self._span_context)
                init_state = tf.get_variable("init_state", [self._size], initializer=self._init)
                init_state = tf.reshape(tf.tile(init_state, batch_size_32), [-1, self._size])
                rev_embedded = tf.reverse_sequence(embedded, self._length, 0, 1)
                # TIME-MAJOR: [T, B, S]
                outs_bw = self._composition_function(rev_embedded, self._length - min_end, init_state)
                # reshape to all possible queries for all sequences. Dim[0]=batch_size*(max_length+1).
                # "+1" because we include the initial state
                outs_bw = tf.reshape(tf.concat(0, [tf.expand_dims(init_state, 0), outs_bw]), [-1, self._size])
                # gather respective queries via their lengths-start (because reversed sequence)
                lengths_aligned = tf.gather(self._length, self._span_context)
                out_bw = tf.gather(outs_bw, (lengths_aligned - self._ends) * batch_size_64 + self._span_context)

        with tf.device(self._device2):
            with tf.variable_scope("forward"):
                #e_inputs = [tf.reshape(e, [-1, self._size]) for e in tf.split(1, self._max_length, embedded)]
                max_start = tf.segment_max(self._starts, self._span_context)
                init_state = tf.get_variable("init_state", [self._size], initializer=self._init)
                init_state = tf.reshape(tf.tile(init_state, batch_size_32), [-1, self._size])
                # TIME-MAJOR: [T, B, S]
                outs_fw = self._composition_function(embedded, max_start, init_state)
                # reshape to all possible queries for all sequences. Dim[0]=batch_size*(max_length+1).
                # "+1" because we include the initial state
                outs_fw = tf.reshape(tf.concat(0, [tf.expand_dims(init_state, 0), outs_fw]), [-1, self._size])
                # gather respective queries via their positions (with offset of batch_size*ends)
                out_fw = tf.gather(outs_fw, self._starts * batch_size_64 + self._span_context)
            # form query from forward and backward compositions
            query = tf.contrib.layers.fully_connected(tf.concat(1, [out_fw, out_bw]), self._size,
                                                      activation_fn=None, weights_initializer=None, biases_initializer=None)
            query = tf.add_n([query, out_bw, out_fw])

        return query
コード例 #28
0
  def create_model(self, model_input, vocab_size, num_frames, is_training=True, **unused_params):
    """Creates a model which uses a stack of LSTMs to represent the video.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
    lstm_size = FLAGS.lstm_cells
    number_of_layers = FLAGS.lstm_layers
    random_frames = FLAGS.lstm_random_sequence
    iterations = FLAGS.iterations
    backward = FLAGS.lstm_backward

    if random_frames:
      num_frames_2 = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
      model_input = utils.SampleRandomFrames(model_input, num_frames_2,
                                             iterations)
    if backward:
      model_input = tf.reverse_sequence(model_input, num_frames, seq_axis=1) 
 
    stacked_lstm = tf.contrib.rnn.MultiRNNCell(
            [
                tf.contrib.rnn.BasicLSTMCell(
                    lstm_size, forget_bias=1.0, state_is_tuple=False)
                for _ in range(number_of_layers)
                ], state_is_tuple=False)

    loss = 0.0
    with tf.variable_scope("RNN"):
      outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input,
                                         sequence_length=num_frames,
                                         dtype=tf.float32)

    aggregated_model = getattr(video_level_models,
                               FLAGS.video_level_classifier_model)

    return aggregated_model().create_model(
        model_input=state,
        vocab_size=vocab_size,
        is_training=is_training,
        **unused_params)
コード例 #29
0
  def _create_position_embedding(self, lengths, maxlen):

    # Slice to size of current sequence
    pe_slice = self.pos_embed[2:maxlen+2, :]
    # Replicate encodings for each element in the batch
    batch_size = tf.shape(lengths)[0]
    pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])

    # Mask out positions that are padded
    positions_mask = tf.sequence_mask(
        lengths=lengths, maxlen=maxlen, dtype=tf.float32)
    positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)
    
    positions_embed = tf.reverse_sequence(positions_embed, lengths, batch_dim=0, seq_dim=1)  # [[1,2,3,4,PAD,PAD,PAD],[2,3,PAD,PAD,PAD,PAD,PAD]]   [4,2]
    positions_embed = tf.reverse(positions_embed,[1])  # --> [[4,3,2,1,PAD,PAD,PAD],[3,2,PAD,PAD,PAD,PAD,PAD]] --> [[PAD,PAD,PAD,1,2,3,4],[PAD,PAD,PAD,PAD,PAD,2,3]]

    return positions_embed
コード例 #30
0
ファイル: sequence_ops_test.py プロジェクト: wmiao1769/trfl
  def testScanSumEquivalenceWithSeqLen(self):
    with self.test_session() as sess:
      sequence_lengths = [0, 2]
      bootstrap = tf.constant([0.5, 1.5], dtype=tf.float32)

      sequence = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
      decays = [[.1, .2, .3, .4, .5], [.6, .7, .8, .9, .10]]

      eq_sequence = [[0, 0, 0, 0, 0], [6, 7, 0, 0, 0]]
      eq_decays = [[0, 0, 0, 0, 0], [.6, .7, 0, 0, 0]]

      eq_reverse_sequence = [[0, 0, 0, 0, 0], [7, 6, 0, 0, 0]]
      eq_reverse_decays = [[0, 0, 0, 0, 0], [.7, .6, 0, 0, 0]]

      # We use transpose because it is easier to define the input data in
      # BxT (batch x time) form, while scan_discounted_sum assumes TxB form.
      sequence_in = tf.transpose(tf.constant(sequence, dtype=tf.float32))
      decays_in = tf.transpose(tf.constant(decays, dtype=tf.float32))
      eq_sequence_in = tf.transpose(tf.constant(eq_sequence, dtype=tf.float32))
      eq_decays_in = tf.transpose(tf.constant(eq_decays, dtype=tf.float32))
      eq_reverse_sequence_in = tf.transpose(
          tf.constant(eq_reverse_sequence, dtype=tf.float32))
      eq_reverse_decays_in = tf.transpose(
          tf.constant(eq_reverse_decays, dtype=tf.float32))

      eq_result = sequence_ops.scan_discounted_sum(
          sequence_in, decays_in, bootstrap, reverse=False,
          sequence_lengths=sequence_lengths)
      exp_eq_result = sequence_ops.scan_discounted_sum(
          eq_sequence_in, eq_decays_in, bootstrap)

      eq_reverse_result = sequence_ops.scan_discounted_sum(
          sequence_in, decays_in, bootstrap, reverse=True,
          sequence_lengths=sequence_lengths)
      exp_eq_reverse_result = sequence_ops.scan_discounted_sum(
          eq_reverse_sequence_in, eq_reverse_decays_in, bootstrap)
      exp_eq_reverse_result = tf.reverse_sequence(
          exp_eq_reverse_result, sequence_lengths, seq_axis=0, batch_axis=1)

      self.assertAllClose(sess.run(eq_result),
                          sess.run(exp_eq_result))
      self.assertAllClose(sess.run(eq_reverse_result),
                          sess.run(exp_eq_reverse_result))
コード例 #31
0
    def construct(self, args, source_chars, target_chars, bow, eow):
        with self.session.graph.as_default():
            if args.recodex:
                tf.get_variable_scope().set_initializer(tf.glorot_uniform_initializer(seed=42))

            # Inputs
            self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens")
            self.source_ids = tf.placeholder(tf.int32, [None, None], name="source_ids")
            self.source_seqs = tf.placeholder(tf.int32, [None, None], name="source_seqs")
            self.source_seq_lens = tf.placeholder(tf.int32, [None], name="source_seq_lens")
            self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids")
            self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs")
            self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens")

            # Append EOW after target_seqs
            target_seqs = tf.reverse_sequence(self.target_seqs, self.target_seq_lens, 1)
            target_seqs = tf.pad(target_seqs, [[0, 0], [1, 0]], constant_values=eow)
            target_seq_lens = self.target_seq_lens + 1
            target_seqs = tf.reverse_sequence(target_seqs, target_seq_lens, 1)

            # Encoder
            # TODO: Generate source embeddings for source chars, of shape [source_chars, args.char_dim].

            # TODO: Embed the self.source_seqs using the source embeddings.

            # TODO: Using a GRU with dimension args.rnn_dim, process the embedded self.source_seqs
            # using forward RNN and store the resulting states into `source_states`.

            # Index the unique words using self.source_ids and self.target_ids.
            sentence_mask = tf.sequence_mask(self.sentence_lens)
            source_states = tf.boolean_mask(tf.nn.embedding_lookup(source_states, self.source_ids), sentence_mask)
            source_lens = tf.boolean_mask(tf.nn.embedding_lookup(self.source_seq_lens, self.source_ids), sentence_mask)

            target_seqs = tf.boolean_mask(tf.nn.embedding_lookup(target_seqs, self.target_ids), sentence_mask)
            target_lens = tf.boolean_mask(tf.nn.embedding_lookup(target_seq_lens, self.target_ids), sentence_mask)

            # Decoder
            # TODO: Generate target embeddings for target chars, of shape [target_chars, args.char_dim].

            # TODO: Embed the target_seqs using the target embeddings.

            # TODO: Generate a decoder GRU with wimension args.rnn_dim.

            # TODO: Create a `decoder_layer` -- a fully connected layer with
            # target_chars neurons used in the decoder to classify into target characters.

            # The DecoderTraining will be used during training. It will output logits for each
            # target character.
            class DecoderTraining(tf.contrib.seq2seq.Decoder):
                @property
                def batch_size(self): return # TODO: Return size of the batch, using for example source_states size
                @property
                def output_dtype(self): return tf.float32 # Type for logits of target characters
                @property
                def output_size(self): return target_chars # Length of logits for every output

                def initialize(self, name=None):
                    finished = # TODO: False if target_lens > 0, True otherwise
                    states = # TODO: Initial decoder state to use
                    inputs = # TODO: embedded BOW characters of shape [self.batch_size] using target embeddings.
                             # You can use tf.fill to generate BOWs of appropriate size.
                    return finished, inputs, states

                def step(self, time, inputs, states, name=None):
                    outputs, states = # TODO: Run the decoder GRU cell using inputs and states.
                    outputs = # TODO: Apply the decoder_layer on outputs.
                    next_input = # TODO: Next input are character embeddings with index `time` in target_embedded.
                    finished = # TODO: False if target_lens > time + 1, True otherwise.
                    return outputs, states, next_input, finished
            output_layer, _, _ = tf.contrib.seq2seq.dynamic_decode(DecoderTraining())
            self.predictions_training = tf.argmax(output_layer, axis=2, output_type=tf.int32)

            # The DecoderPrediction will be used during prediction. It will
            # directly output the predicted target characters.
            class DecoderPrediction(tf.contrib.seq2seq.Decoder):
                @property
                def batch_size(self): return # TODO: Return size of the batch, using for example source_states size
                @property
                def output_dtype(self): return tf.int32 # Type for predicted target characters
                @property
                def output_size(self): return 1 # Will return just one output

                def initialize(self, name=None):
                    finished = # TODO: False of shape [self.batch_size].
                    states = # TODO: Initial decoder state to use.
                    inputs = # TODO: embedded BOW characters of shape [self.batch_size] using target embeddings.
                             # You can use tf.fill to generate BOWs of appropriate size.
                    return finished, inputs, states

                def step(self, time, inputs, states, name=None):
                    outputs, states = # TODO: Run the decoder GRU cell using inputs and states.
                    outputs = # TODO: Apply the decoder_layer on outputs.
                    outputs = # TODO: Use tf.argmax to choose most probable class (supply parameter `output_type=tf.int32`).
                    next_input = # TODO: Embed `outputs` using target_embeddings
                    finished = # TODO: True where outputs==eow, False otherwise
                               # Use tf.equal for the comparison, Python's '==' is not overloaded
                    return outputs, states, next_input, finished
            self.predictions, _, self.prediction_lens = tf.contrib.seq2seq.dynamic_decode(
                DecoderPrediction(), maximum_iterations=tf.reduce_max(source_lens) + 10)

            # Training
            weights = tf.sequence_mask(target_lens, dtype=tf.float32)
            loss = tf.losses.sparse_softmax_cross_entropy(target_seqs, output_layer, weights=weights)
            global_step = tf.train.create_global_step()
            self.training = tf.train.AdamOptimizer().minimize(loss, global_step=global_step, name="training")

            # Summaries
            accuracy_training = tf.reduce_all(tf.logical_or(
                tf.equal(self.predictions_training, target_seqs),
                tf.logical_not(tf.sequence_mask(target_lens))), axis=1)
            self.current_accuracy_training, self.update_accuracy_training = tf.metrics.mean(accuracy_training)

            minimum_length = tf.minimum(tf.shape(self.predictions)[1], tf.shape(target_seqs)[1])
            accuracy = tf.logical_and(
                tf.equal(self.prediction_lens, target_lens),
                tf.reduce_all(tf.logical_or(
                    tf.equal(self.predictions[:, :minimum_length], target_seqs[:, :minimum_length]),
                    tf.logical_not(tf.sequence_mask(target_lens, maxlen=minimum_length))), axis=1))
            self.current_accuracy, self.update_accuracy = tf.metrics.mean(accuracy)

            self.current_loss, self.update_loss = tf.metrics.mean(loss, weights=tf.reduce_sum(weights))
            self.reset_metrics = tf.variables_initializer(tf.get_collection(tf.GraphKeys.METRIC_VARIABLES))

            summary_writer = tf.contrib.summary.create_file_writer(args.logdir, flush_millis=10 * 1000)
            self.summaries = {}
            with summary_writer.as_default(), tf.contrib.summary.record_summaries_every_n_global_steps(10):
                self.summaries["train"] = [tf.contrib.summary.scalar("train/loss", self.update_loss),
                                           tf.contrib.summary.scalar("train/accuracy", self.update_accuracy_training)]
            with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
                for dataset in ["dev", "test"]:
                    self.summaries[dataset] = [tf.contrib.summary.scalar(dataset + "/loss", self.current_loss),
                                               tf.contrib.summary.scalar(dataset + "/accuracy", self.current_accuracy)]

            # Initialize variables
            self.session.run(tf.global_variables_initializer())
            with summary_writer.as_default():
                tf.contrib.summary.initialize(session=self.session, graph=self.session.graph)
コード例 #32
0
	def build_model(self):
		with tf.variable_scope('RNNTEST'):
			self.sense = tf.placeholder(tf.int32,[None])
			self.arg1 = tf.placeholder(tf.int32,[None,None])
			self.arg2 = tf.placeholder(tf.int32,[None,None])
			self.arg1_len = tf.placeholder(tf.int32,[None])
			self.arg2_len = tf.placeholder(tf.int32,[None])
			self.keep_prob = tf.placeholder(tf.float32)
			
			with tf.device('/cpu:0'):
				if self.use_pre_trained_embedding:
					word_W = tf.get_variable('word_embed',initializer = tf.convert_to_tensor(self.data_loader.pre_trained_word_embeddings,dtype=tf.float32)) 
				else:
					word_W = tf.get_variable('word_embed',shape = [self.data_loader.word_vocab_size,self.word_embed_size])
			
			arg1 = tf.nn.dropout(tf.nn.embedding_lookup(word_W,self.arg1),self.keep_prob)
			arg2 = tf.nn.dropout(tf.nn.embedding_lookup(word_W,self.arg2),self.keep_prob)
			
			encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size)
			decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size)

			with tf.variable_scope('forward_encoder'):
				forward_encoder_outputs,forward_encoder_state = rnn.dynamic_rnn(encoder_lstm_unit,arg1,self.arg1_len,dtype=tf.float32)
			with tf.variable_scope('backward_encoder'):
				backward_encoder_outputs,backward_encoder_state= rnn.dynamic_rnn(encoder_lstm_unit,tf.reverse_sequence(arg1,tf.cast(self.arg1_len,tf.int64),1),dtype=tf.float32)
			encoder_outputs = tf.concat(2,[forward_encoder_outputs,tf.reverse_sequence(backward_encoder_outputs,tf.cast(self.arg1_len,tf.int64),1)])
			encoder_state = tf.concat(1,[forward_encoder_state,backward_encoder_state])

			source = tf.expand_dims(encoder_outputs,2) #batch_size x source_len x 1 x source_depth(2*encoder_size)
			attention_W = tf.get_variable('attention_W',[1,1,2*self.encoder_size,self.attention_judge_size])
			attention_V = tf.get_variable('attention_V',[self.attention_judge_size])
 			WxH = tf.nn.conv2d(source, attention_W,[1,1,1,1],'SAME') #batch_size x source_len x 1 x attention
 			self.mask = tf.placeholder(tf.float32,[None,None])

			def attention(input_t,output_t_minus_1,time):
				with tf.variable_scope('attention'):
					VxS = tf.reshape(rnn_cell.linear(output_t_minus_1,self.attention_judge_size,True),[-1,1,1,self.attention_judge_size]) #batch_size x 1 x 1 x attention
				_exp = tf.exp(tf.reduce_sum( attention_V * tf.tanh(WxH+VxS), [3]))#batch_size x source_len x 1
				_exp = _exp*tf.expand_dims(self.mask,-1)
				attention_weight = _exp/tf.reduce_sum(_exp,[1], keep_dims=True)
				attention_t = tf.reduce_sum(encoder_outputs*attention_weight,[1])
				feed_in_t = tf.tanh(rnn_cell.linear([attention_t,input_t],self.embedding_size,True))
				return feed_in_t

			with tf.variable_scope('decoder'):
				decoder_outputs,decoder_state = dynamic_rnn_decoder(arg2,decoder_lstm_unit,sequence_length=self.arg2_len,loop_function=attention)
			judge = tf.concat(1,[tf.reduce_sum(decoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg2_len,tf.float32),-1),tf.reduce_sum(encoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg1_len,tf.float32),-1)])
			unscaled_log_distribution = rnn_cell.linear(judge,self.data_loader.sense_vocab_size,True)
			self.output = tf.cast(tf.argmax(unscaled_log_distribution,1),tf.int32)
			self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output,self.sense), tf.float32))
			
			#max-margin method
			#self._MM = tf.placeholder(tf.int32,[None])
			#margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM))
			#self.loss = tf.reduce_mean(margin)

			#maximum likelihood method
			self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(unscaled_log_distribution, self.sense))
			
			self.optimizer = tf.train.AdagradOptimizer(self.lr)
			self.train_op = self.optimizer.minimize(self.loss)
コード例 #33
0
    def _preprocess(self, features, labels):
        """Model-specific preprocessing for features and labels:

    - Creates vocabulary lookup tables for source and target vocab
    - Converts tokens into vocabulary ids
    """

        # Create vocabulary lookup for source
        source_vocab_to_id, source_id_to_vocab, source_word_to_count, _ = \
          vocab.create_vocabulary_lookup_table(self.source_vocab_info.path)

        source_candidate_vocab_to_id, source_candidate_id_to_vocab, source_candidate_word_to_count, _ = \
            vocab.create_vocabulary_lookup_table(self.source_candidate_vocab_info.path)

        # Create vocabulary look for target
        target_vocab_to_id, target_id_to_vocab, target_word_to_count, _ = \
          vocab.create_vocabulary_lookup_table(self.target_vocab_info.path)

        # Add vocab tables to graph colection so that we can access them in
        # other places.
        graph_utils.add_dict_to_collection(
            {
                "source_vocab_to_id": source_vocab_to_id,
                "source_id_to_vocab": source_id_to_vocab,
                "source_word_to_count": source_word_to_count,
                "source_candidate_vocab_to_id": source_candidate_vocab_to_id,
                "source_candidate_id_to_vocab": source_candidate_id_to_vocab,
                "source_candidate_word_to_count":
                source_candidate_word_to_count,
                "target_vocab_to_id": target_vocab_to_id,
                "target_id_to_vocab": target_id_to_vocab,
                "target_word_to_count": target_word_to_count
            }, "vocab_tables")

        # Slice source to max_len
        if self.params["source.max_seq_len"] is not None:
            features["source_tokens"] = features[
                "source_tokens"][:, :self.params["source.max_seq_len"]]
            features["source_len"] = tf.minimum(
                features["source_len"], self.params["source.max_seq_len"])
        # Slice source_candidate to max_len
        if self.params["source_candidate.max_seq_len"] is not None:
            features["source_candidate_tokens"] = features[
                "source_candidate_tokens"][:, :self.params[
                    "source_candidate.max_seq_len"]]
            features["source_candidate_len"] = tf.minimum(
                features["source_candidate_len"],
                self.params["source_candidate.max_seq_len"])

        # Look up the source ids in the vocabulary
        features["source_ids"] = source_vocab_to_id.lookup(
            features["source_tokens"])
        features["source_candidate_ids"] = source_candidate_vocab_to_id.lookup(
            features["source_candidate_tokens"])
        # Maybe reverse the source
        if self.params["source.reverse"] is True:
            features["source_ids"] = tf.reverse_sequence(
                input=features["source_ids"],
                seq_lengths=features["source_len"],
                seq_dim=1,
                batch_dim=0,
                name=None)
            features["source_candidate_ids"] = tf.reverse_sequence(
                input=features["source_candidate_ids"],
                seq_lengths=features["source_candidate_len"],
                seq_dim=1,
                batch_dim=0,
                name=None)

        features["source_len"] = tf.to_int32(features["source_len"])
        tf.summary.histogram("source_len", tf.to_float(features["source_len"]))
        features["source_candidate_len"] = tf.to_int32(
            features["source_candidate_len"])
        tf.summary.histogram("source_candidate_len",
                             tf.to_float(features["source_candidate_len"]))

        if labels is None:
            return features, None

        labels = labels.copy()

        # Slices targets to max length
        if self.params["target.max_seq_len"] is not None:
            labels["target_tokens"] = labels[
                "target_tokens"][:, :self.params["target.max_seq_len"]]
            labels["target_len"] = tf.minimum(
                labels["target_len"], self.params["target.max_seq_len"])

        # Look up the target ids in the vocabulary
        labels["target_ids"] = target_vocab_to_id.lookup(
            labels["target_tokens"])

        labels["target_len"] = tf.to_int32(labels["target_len"])
        tf.summary.histogram("target_len", tf.to_float(labels["target_len"]))

        # Keep track of the number of processed tokens
        num_tokens = tf.reduce_sum(labels["target_len"])
        num_tokens += tf.reduce_sum(features["source_len"])
        num_tokens += tf.reduce_sum(features["source_candidate_len"])
        token_counter_var = tf.Variable(0, "tokens_counter")
        total_tokens = tf.assign_add(token_counter_var, num_tokens)
        tf.summary.scalar("num_tokens", total_tokens)

        with tf.control_dependencies([total_tokens]):
            features["source_tokens"] = tf.identity(features["source_tokens"])
            features["source_candidate_tokens"] = tf.identity(
                features["source_candidate_tokens"])

        # Add to graph collection for later use
        graph_utils.add_dict_to_collection(features, "features")
        if labels:
            graph_utils.add_dict_to_collection(labels, "labels")

        print("attention_biseqseq features:{} labels:{}".format(
            features, labels))
        return features, labels
コード例 #34
0
    def build(self):
        print('Building model')
        self.x_embeddings = tf.Variable(tf.random_normal(
            [self.alphabet_src_size, self.embedd_dims], stddev=0.1),
                                        name='x_embeddings')
        self.t_embeddings = tf.Variable(tf.random_normal(
            [self.alphabet_tar_size, self.embedd_dims], stddev=0.1),
                                        name='t_embeddings')

        X_embedded = tf.gather(self.x_embeddings, self.Xs, name='embed_X')
        t_embedded = tf.gather(self.t_embeddings, self.ts_go, name='embed_t')

        with tf.variable_scope('dense_out'):
            W_out = tf.get_variable(
                'W_out', [self.word_encoder_units * 2, self.alphabet_tar_size])
            b_out = tf.get_variable('b_out', [self.alphabet_tar_size])

        # forward encoding
        char_enc_state, char_enc_out = encoder(X_embedded, self.X_len,
                                               'char_encoder',
                                               self.char_encoder_units)
        char2word = _grid_gather(char_enc_out, self.X_spaces)
        char2word.set_shape([None, None, self.char_encoder_units])
        word_enc_state, word_enc_out = encoder(char2word, self.X_spaces_len,
                                               'word_encoder',
                                               self.word_encoder_units)

        # backward encoding words
        char2word = tf.reverse_sequence(char2word,
                                        tf.to_int64(self.X_spaces_len), 1)
        char2word.set_shape([None, None, self.char_encoder_units])
        word_enc_state_bck, word_enc_out_bck = encoder(
            char2word, self.X_spaces_len, 'word_encoder_backwards',
            self.word_encoder_units)
        word_enc_out_bck = tf.reverse_sequence(word_enc_out_bck,
                                               tf.to_int64(self.X_spaces_len),
                                               1)

        word_enc_state = tf.concat(1, [word_enc_state, word_enc_state_bck])
        word_enc_out = tf.concat(2, [word_enc_out, word_enc_out_bck])

        # decoding
        dec_state, dec_out, valid_dec_out, valid_attention_tracker = (
            attention_decoder(word_enc_out, self.X_spaces_len, word_enc_state,
                              t_embedded, self.t_len, self.attn_units,
                              self.t_embeddings, W_out, b_out))

        out_tensor = tf.reshape(dec_out, [-1, self.word_encoder_units * 2])
        out_tensor = tf.matmul(out_tensor, W_out) + b_out
        out_shape = tf.concat(0, [
            tf.expand_dims(tf.shape(self.X_len)[0], 0),
            tf.expand_dims(tf.shape(t_embedded)[1], 0),
            tf.expand_dims(tf.constant(self.alphabet_tar_size), 0)
        ])
        self.valid_attention_tracker = valid_attention_tracker.pack()
        self.out_tensor = tf.reshape(out_tensor, out_shape)
        self.out_tensor.set_shape([None, None, self.alphabet_tar_size])

        valid_out_tensor = tf.reshape(valid_dec_out,
                                      [-1, self.word_encoder_units * 2])
        valid_out_tensor = tf.matmul(valid_out_tensor, W_out) + b_out
        self.valid_out_tensor = tf.reshape(valid_out_tensor, out_shape)

        self.out = None

        # add TensorBoard summaries for all variables
        tf.contrib.layers.summarize_variables()
コード例 #35
0
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import tensorflow as tf

filename = "MarshOrchid.jpg"

image = mpimg.imread(filename)
height, width, depth = image.shape

x = tf.Variable(image, name='x')

model = tf.global_variables_initializer()

with tf.Session() as session:
    #2 times makes it upside down
    for i in range(2):
        xshape = tf.shape(x)
        result = session.run(xshape)
        x = tf.reverse_sequence(x, [result[1]] * result[0], 1, batch_dim=0)
        x = tf.transpose(x, perm=[1, 0, 2])
        session.run(model)
        result = (session.run(x))

#print(image.shape)

plt.imshow(result)
plt.show()
コード例 #36
0
    def construct(self, args, source_chars, target_chars, bow, eow):
        with self.session.graph.as_default():
            # Inputs
            self.sentence_lens = tf.placeholder(tf.int32, [None],
                                                name="sentence_lens")
            self.source_ids = tf.placeholder(tf.int32, [None, None],
                                             name="source_ids")
            self.source_seqs = tf.placeholder(tf.int32, [None, None],
                                              name="source_seqs")
            self.source_seq_lens = tf.placeholder(tf.int32, [None],
                                                  name="source_seq_lens")
            self.target_ids = tf.placeholder(tf.int32, [None, None],
                                             name="target_ids")
            self.target_seqs = tf.placeholder(tf.int32, [None, None],
                                              name="target_seqs")
            self.target_seq_lens = tf.placeholder(tf.int32, [None],
                                                  name="target_seq_lens")

            # Training. The rest of the code assumes that
            # - when training the decoder, the output layer with logis for each generated
            #   character is in `output_layer` and the corresponding predictions are in
            #   `self.predictions_training`.
            # - the `target_ids` contains the gold generated characters
            # - the `target_lens` contains number of valid characters for each lemma
            # - when running decoder inference, the predictions are in `self.predictions`
            #   and their lengths in `self.prediction_lens`.

            # Append EOW after target_seqs
            target_ids = tf.reverse_sequence(self.target_seqs,
                                             self.target_seq_lens, 1)
            target_ids = tf.pad(target_ids, [[0, 0], [1, 0]],
                                constant_values=eow)
            target_seq_lens = self.target_seq_lens + 1
            target_ids = tf.reverse_sequence(target_ids, target_seq_lens, 1)

            # Encoder
            # Generate source embeddings for source chars, of shape [source_chars, args.char_dim].
            source_embeddings = tf.get_variable("source_embeddings",
                                                [source_chars, args.char_dim])

            # Embed the self.source_seqs using the source embeddings.
            embedded_source_seqs = tf.nn.embedding_lookup(
                source_embeddings, self.source_seqs)

            # Using a GRU with dimension args.rnn_dim, process the embedded self.source_seqs
            # using bidirectional RNN. Store the summed fwd and bwd outputs in `source_encoded`
            # and the summed fwd and bwd states into `source_states`.
            rnn_cell_fwd = tf.nn.rnn_cell.GRUCell(num_units=args.rnn_dim)
            rnn_cell_bwd = tf.nn.rnn_cell.GRUCell(num_units=args.rnn_dim)
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                rnn_cell_fwd,
                rnn_cell_bwd,
                embedded_source_seqs,
                sequence_length=self.source_seq_lens,
                dtype=tf.float32)
            source_encoded = tf.add(outputs[0], outputs[1])
            source_states = tf.add(states[0], states[1])

            # Index the unique words using self.source_ids and self.target_ids.
            sentence_mask = tf.sequence_mask(self.sentence_lens)
            source_encoded = tf.boolean_mask(
                tf.nn.embedding_lookup(source_encoded, self.source_ids),
                sentence_mask)
            source_states = tf.boolean_mask(
                tf.nn.embedding_lookup(source_states, self.source_ids),
                sentence_mask)
            source_lens = tf.boolean_mask(
                tf.nn.embedding_lookup(self.source_seq_lens, self.source_ids),
                sentence_mask)

            target_ids = tf.boolean_mask(
                tf.nn.embedding_lookup(target_ids, self.target_ids),
                sentence_mask)
            target_lens = tf.boolean_mask(
                tf.nn.embedding_lookup(target_seq_lens, self.target_ids),
                sentence_mask)

            # Decoder
            # Generate target embeddings for target chars, of shape [target_chars, args.char_dim].
            target_embeddings = tf.get_variable("target_embeddings",
                                                [target_chars, args.char_dim])

            # Embed the target_seqs using the target embeddings.
            embedded_target_seqs = tf.nn.embedding_lookup(
                target_embeddings, target_ids)

            # Generate a decoder GRU with dimension args.rnn_dim.
            rnn_decoder = tf.nn.rnn_cell.GRUCell(num_units=args.rnn_dim)

            # Create a `decoder_layer` -- a fully connected layer with
            # target_chars neurons used in the decoder to classify into target characters.
            decoder_layer = tf.layers.Dense(units=target_chars,
                                            activation=None)

            # Attention
            # Generate three fully connected layers without activations:
            # - `source_layer` with args.rnn_dim units
            # - `state_layer` with args.rnn_dim units
            # - `weight_layer` with 1 unit
            source_layer = tf.layers.Dense(args.rnn_dim, activation=None)
            state_layer = tf.layers.Dense(args.rnn_dim, activation=None)
            weight_layer = tf.layers.Dense(1, activation=None)

            def with_attention(inputs, states):
                # Generate the attention

                # Project source_encoded using source_layer.
                source_projection = source_layer(source_encoded)

                # Change shape of states from [a, b] to [a, 1, b] and project it using state_layer.
                states = tf.expand_dims(states, 1)
                state_projection = state_layer(states)

                # Sum the two above projections, apply tf.tanh and project the result using weight_layer.
                # The result has shape [x, y, 1].
                weight_projection = weight_layer(
                    tf.tanh(tf.add(source_projection, state_projection)))

                # Apply tf.nn.softmax to the latest result, using axis corresponding to source characters.
                softmax_layer = tf.nn.softmax(weight_projection, axis=1)

                # Multiply the source_encoded by the latest result, and sum the results with respect
                # to the axis corresponding to source characters. This is the final attention.
                tmp = tf.multiply(source_encoded, softmax_layer)
                attention = tf.reduce_sum(tmp, axis=1)

                # Return concatenation of inputs and the computed attention.
                return tf.concat([inputs, attention], 1)

            # The DecoderTraining will be used during training. It will output logits for each
            # target character.
            class DecoderTraining(tf.contrib.seq2seq.Decoder):
                @property
                def batch_size(self):
                    return tf.shape(
                        source_states
                    )[0]  # Return size of the batch, using for example source_states size

                @property
                def output_dtype(self):
                    return tf.float32  # Type for logits of target characters

                @property
                def output_size(self):
                    return target_chars  # Length of logits for every output

                def initialize(self, name=None):
                    finished = target_lens <= 0  # False if target_lens > 0, True otherwise
                    states = source_states  # Initial decoder state to use
                    inputs = with_attention(
                        tf.nn.embedding_lookup(target_embeddings,
                                               tf.fill([self.batch_size],
                                                       bow)), states)
                    # Call with_attention on the embedded BOW characters of shape [self.batch_size].
                    # You can use tf.fill to generate BOWs of appropriate size.
                    return finished, inputs, states

                def step(self, time, inputs, states, name=None):
                    outputs, states = rnn_decoder(
                        inputs, states
                    )  # Run the decoder GRU cell using inputs and states.
                    outputs = decoder_layer(
                        outputs)  # Apply the decoder_layer on outputs.
                    next_input = with_attention(
                        embedded_target_seqs[:, time], states
                    )  # Next input is with_attention called on words with index `time` in target_embedded.
                    finished = tf.less_equal(
                        target_lens, time +
                        1)  # False if target_lens > time + 1, True otherwise.
                    return outputs, states, next_input, finished

            output_layer, _, _ = tf.contrib.seq2seq.dynamic_decode(
                DecoderTraining())
            self.predictions_training = tf.argmax(output_layer,
                                                  axis=2,
                                                  output_type=tf.int32)

            # The DecoderPrediction will be used during prediction. It will
            # directly output the predicted target characters.
            class DecoderPrediction(tf.contrib.seq2seq.Decoder):
                @property
                def batch_size(self):
                    return tf.shape(
                        source_states
                    )[0]  # Return size of the batch, using for example source_states size

                @property
                def output_dtype(self):
                    return tf.int32  # Type for predicted target characters

                @property
                def output_size(self):
                    return 1  # Will return just one output

                def initialize(self, name=None):
                    finished = tf.fill(
                        [self.batch_size],
                        False)  # False of shape [self.batch_size].
                    states = source_states  # Initial decoder state to use.
                    inputs = with_attention(
                        tf.nn.embedding_lookup(target_embeddings,
                                               tf.fill([self.batch_size],
                                                       bow)), states)
                    # Call with_attention on the embedded BOW characters of shape [self.batch_size].
                    # You can use tf.fill to generate BOWs of appropriate size.
                    return finished, inputs, states

                def step(self, time, inputs, states, name=None):
                    outputs, states = rnn_decoder(
                        inputs, states
                    )  # Run the decoder GRU cell using inputs and states.
                    outputs = decoder_layer(
                        outputs)  # Apply the decoder_layer on outputs.
                    # Use tf.argmax to choose most probable class (supply parameter `output_type=tf.int32`).
                    outputs = tf.argmax(outputs, axis=1, output_type=tf.int32)
                    next_input = with_attention(
                        tf.nn.embedding_lookup(target_embeddings,
                                               outputs), states
                    )  # Embed `outputs` using target_embeddings and pass it to with_attention.
                    finished = tf.equal(
                        outputs,
                        eow)  # True where outputs==eow, False otherwise
                    # Use tf.equal for the comparison, Python's '==' is not overloaded
                    return outputs, states, next_input, finished

            self.predictions, _, self.prediction_lens = tf.contrib.seq2seq.dynamic_decode(
                DecoderPrediction(),
                maximum_iterations=tf.reduce_max(source_lens) + 10)

            # - the `target_ids` contains the gold generated characters
            # - the `target_lens` contains number of valid characters for each lemma

            # Training
            weights = tf.sequence_mask(target_lens, dtype=tf.float32)
            loss = tf.losses.sparse_softmax_cross_entropy(target_ids,
                                                          output_layer,
                                                          weights=weights)
            global_step = tf.train.create_global_step()
            self.training = tf.train.AdamOptimizer().minimize(
                loss, global_step=global_step, name="training")

            # Summaries
            accuracy_training = tf.reduce_all(tf.logical_or(
                tf.equal(self.predictions_training, target_ids),
                tf.logical_not(tf.sequence_mask(target_lens))),
                                              axis=1)
            self.current_accuracy_training, self.update_accuracy_training = tf.metrics.mean(
                accuracy_training)

            minimum_length = tf.minimum(
                tf.shape(self.predictions)[1],
                tf.shape(target_ids)[1])
            accuracy = tf.logical_and(
                tf.equal(self.prediction_lens, target_lens),
                tf.reduce_all(tf.logical_or(
                    tf.equal(self.predictions[:, :minimum_length],
                             target_ids[:, :minimum_length]),
                    tf.logical_not(
                        tf.sequence_mask(target_lens, maxlen=minimum_length))),
                              axis=1))
            self.current_accuracy, self.update_accuracy = tf.metrics.mean(
                accuracy)

            self.current_loss, self.update_loss = tf.metrics.mean(
                loss, weights=tf.reduce_sum(weights))
            self.reset_metrics = tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.METRIC_VARIABLES))

            summary_writer = tf.contrib.summary.create_file_writer(
                args.logdir, flush_millis=10 * 1000)
            self.summaries = {}
            with summary_writer.as_default(
            ), tf.contrib.summary.record_summaries_every_n_global_steps(10):
                self.summaries["train"] = [
                    tf.contrib.summary.scalar("train/loss", self.update_loss),
                    tf.contrib.summary.scalar("train/accuracy",
                                              self.update_accuracy_training)
                ]
            with summary_writer.as_default(
            ), tf.contrib.summary.always_record_summaries():
                for dataset in ["dev", "test"]:
                    self.summaries[dataset] = [
                        tf.contrib.summary.scalar(dataset + "/loss",
                                                  self.current_loss),
                        tf.contrib.summary.scalar(dataset + "/accuracy",
                                                  self.current_accuracy)
                    ]

            # Initialize variables
            self.session.run(tf.global_variables_initializer())
            with summary_writer.as_default():
                tf.contrib.summary.initialize(session=self.session,
                                              graph=self.session.graph)
コード例 #37
0
    def build_graph(self, input_network_outputs={}, reuse=True):
        """"""

        outputs = {}
        with tf.variable_scope('Embeddings'):
            input_tensors = [
                input_vocab.get_input_tensor(reuse=reuse)
                for input_vocab in self.input_vocabs
            ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(input_tensors, 2)
        n_nonzero = tf.to_float(
            tf.count_nonzero(layer, axis=-1, keep_dims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder,
                                   0,
                                   dtype=tf.int32)
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)
        seq_lengths = tokens_per_sequence + 1
        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob

        rev_layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2)
        for i in six.moves.range(self.n_layers):
            conv_width = self.first_layer_conv_width if not i else self.conv_width
            with tf.variable_scope('RNN_FW-{}'.format(i)):
                layer, _ = recurrent.directed_RNN(
                    layer,
                    self.recur_size,
                    seq_lengths,
                    bidirectional=False,
                    recur_cell=self.recur_cell,
                    conv_width=conv_width,
                    recur_func=self.recur_func,
                    conv_keep_prob=conv_keep_prob,
                    recur_include_prob=recur_include_prob,
                    recur_keep_prob=recur_keep_prob,
                    cifg=self.cifg,
                    highway=self.highway,
                    highway_func=self.highway_func)
            if self.bidirectional:
                with tf.variable_scope('RNN_BW-{}'.format(i)):
                    rev_layer, _ = recurrent.directed_RNN(
                        rev_layer,
                        self.recur_size,
                        seq_lengths,
                        bidirectional=False,
                        recur_cell=self.recur_cell,
                        conv_width=conv_width,
                        recur_func=self.recur_func,
                        conv_keep_prob=conv_keep_prob,
                        recur_keep_prob=recur_keep_prob,
                        recur_include_prob=recur_include_prob,
                        cifg=self.cifg,
                        highway=self.highway,
                        highway_func=self.highway_func)
        ones = tf.ones([batch_size, 1, 1])
        with tf.variable_scope('RNN_FW-{}/RNN/Loop'.format(i), reuse=True):
            fw_initial_state = tf.get_variable('Initial_state')
            n_splits = fw_initial_state.get_shape().as_list(
            )[-1] / self.recur_size
            fw_initial_state = tf.split(fw_initial_state, int(n_splits), -1)[0]
            start_token = ones * fw_initial_state
            layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2)
            layer = layer[:, 1:]
            layer = tf.reverse_sequence(layer, seq_lengths - 1, seq_axis=2)
            layer = tf.concat([start_token, layer], axis=1)
        if self.bidirectional:
            with tf.variable_scope('RNN_BW-{}/RNN/Loop'.format(i), reuse=True):
                bw_initial_state = tf.get_variable('Initial_state')
                n_splits = bw_initial_state.get_shape().as_list(
                )[-1] / self.recur_size
                bw_initial_state = tf.split(bw_initial_state, int(n_splits),
                                            -1)[0]
                stop_token = ones * bw_initial_state
                rev_layer = tf.concat([stop_token, layer], axis=1)
                rev_layer = tf.reverse_sequence(rev_layer,
                                                seq_lengths + 1,
                                                seq_axis=2)[:, 1:]
            if self.bilin:
                layer = tf.concat([layer * rev_layer, layer, rev_layer],
                                  axis=2)
            else:
                layer = tf.concat([layer, rev_layer], axis=2)

        output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}
        with tf.variable_scope('Classifiers'):
            if 'form' in output_vocabs:
                vocab = output_vocabs['form']
                outputs[vocab.field] = vocab.get_sampled_linear_classifier(
                    layer,
                    self.n_samples,
                    token_weights=token_weights,
                    reuse=reuse)
                self._evals.add('form')
            if 'upos' in output_vocabs:
                vocab = output_vocabs['upos']
                outputs[vocab.field] = vocab.get_linear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('upos')
            if 'xpos' in output_vocabs:
                vocab = output_vocabs['xpos']
                outputs[vocab.field] = vocab.get_linear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('xpos')
        return outputs, tokens
コード例 #38
0
ファイル: layers.py プロジェクト: sunmask/bert4keras
 def reverse_sequence(self, inputs, mask=None):
     if mask is None:
         return [x[:, ::-1] for x in inputs]
     else:
         length = K.cast(K.sum(mask, 1), 'int32')
         return [tf.reverse_sequence(x, length, seq_axis=1) for x in inputs]
コード例 #39
0
def rnn(inputs,
        input_lengths,
        cell_type,
        num_layers,
        num_units,
        keep_prob,
        is_training,
        bidirectional=False,
        debug=False,
        regular_output=False):
    # inputs: batch x time x depth

    assert num_layers >= 1

    need_tuple_state = cell_type in (tf.nn.rnn_cell.BasicLSTMCell,
                                     tf.nn.rnn_cell.LSTMCell)

    if need_tuple_state:
        cell = cell_type(num_units, state_is_tuple=True)
    else:
        cell = cell_type(num_units)

    if is_training and keep_prob < 1:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
    if bidirectional:
        input_lengths_64 = tf.cast(input_lengths, tf.int64)
        prev_layer_fwd = inputs
        prev_layer_rev = tf.reverse_sequence(inputs, input_lengths_64, 1)
        for i in xrange(num_layers):
            with tf.variable_scope("Layer%d" % i):
                with tf.variable_scope("Fwd"):
                    outputs_fwd, final_state_fwd = tf.nn.dynamic_rnn(
                        cell, prev_layer_fwd, input_lengths, dtype=tf.float32)
                with tf.variable_scope("Rev"):
                    outputs_rev, final_state_rev = tf.nn.dynamic_rnn(
                        cell, prev_layer_rev, input_lengths, dtype=tf.float32)

                outputs_rev = tf.reverse_sequence(outputs_rev,
                                                  input_lengths_64, 1)
                prev_layer_fwd = tf.concat([outputs_fwd, outputs_rev], 2)
                prev_layer_rev = tf.reverse_sequence(prev_layer_fwd,
                                                     input_lengths_64, 1)
        if regular_output:
            return prev_layer_fwd, final_state_fwd + final_state_rev

        if need_tuple_state:
            final_state_fwd = final_state_fwd[1]
            final_state_fwd.set_shape(
                [inputs.get_shape()[0], cell.state_size[1]])
            final_state_rev = final_state_rev[1]
            final_state_rev.set_shape(
                [inputs.get_shape()[0], cell.state_size[1]])
        else:
            final_state_fwd.set_shape([inputs.get_shape()[0], cell.state_size])
            final_state_rev.set_shape([inputs.get_shape()[0], cell.state_size])

        final_output = tf.concat([final_state_fwd, final_state_rev], 1)
        return prev_layer_fwd, final_output

    # Not bidirectional
    for i in xrange(num_layers):
        prev_layer = inputs
        with tf.variable_scope("Layer%d" % i):
            outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                     prev_layer,
                                                     input_lengths,
                                                     dtype=tf.float32)
            prev_layer = outputs

    #if num_layers > 1:
    #  cell = tf.nn.rnn_cell.MultiRNNCell(
    #      [cell] * (num_layers),
    #      state_is_tuple=need_tuple_state)
    #if debug:
    #  inputs = utils.tf_print_shape(inputs,
    #                                message='{} RNN input shape: '.format(
    #                                    tf.get_default_graph()._name_stack))

    #if need_tuple_state and num_layers > 1:
    #  # Work around bug with MultiRNNCell and tuple states
    #  initial_state = tuple(tuple(tf.zeros(
    #      tf.pack([tf.shape(inputs)[0], s]),
    #      dtype=tf.float32) for s in sizes) for sizes in cell.state_size)
    #else:
    #  initial_state = None

    #outputs, final_state = tf.nn.dynamic_rnn(cell,
    #                                         inputs,
    #                                         input_lengths,
    #                                         initial_state=initial_state,
    #                                         dtype=tf.float32)
    if regular_output:
        return outputs, final_state

    if need_tuple_state:
        final_state[1].set_shape([inputs.get_shape()[0], cell.state_size[1]])
        return final_state[1]
    else:
        final_state.set_shape([inputs.get_shape()[0], cell.state_size])
        return final_state
コード例 #40
0
ファイル: models.py プロジェクト: whatyouknow123/ABSA_Keras
    def cabasc(self):
        def sequence_mask(sequence):
            return K.sign(K.max(K.abs(sequence), 2))

        def sequence_length(sequence):
            return K.cast(K.sum(sequence_mask(sequence), 1), tf.int32)

        input_text = Input(shape=(self.max_len, ))
        input_text_l = Input(shape=(self.max_len, ))
        input_text_r = Input(shape=(self.max_len, ))
        input_aspect = Input(shape=(1, ))
        input_mask = Input(shape=(self.max_len, ))

        if self.use_elmo:
            text_elmo_embedding = ELMoEmbedding(
                output_mode=self.config.elmo_output_mode,
                idx2word=self.config.idx2token,
                mask_zero=True,
                hub_url=self.config.elmo_hub_url,
                elmo_trainable=self.config.elmo_trainable)
            l_elmo_embedding = ELMoEmbedding(
                output_mode=self.config.elmo_output_mode,
                idx2word=self.config.idx2token,
                mask_zero=True,
                hub_url=self.config.elmo_hub_url,
                elmo_trainable=self.config.elmo_trainable)
            r_elmo_embedding = ELMoEmbedding(
                output_mode=self.config.elmo_output_mode,
                idx2word=self.config.idx2token,
                mask_zero=True,
                hub_url=self.config.elmo_hub_url,
                elmo_trainable=self.config.elmo_trainable)
            if self.config.use_elmo_alone:
                text_embed = SpatialDropout1D(0.2)(
                    text_elmo_embedding(input_text))
                text_l_embed = SpatialDropout1D(0.2)(
                    l_elmo_embedding(input_text_l))
                text_r_embed = SpatialDropout1D(0.2)(
                    r_elmo_embedding(input_text_r))
            else:
                word_embedding = Embedding(
                    input_dim=self.text_embeddings.shape[0],
                    output_dim=self.config.word_embed_dim,
                    weights=[self.text_embeddings],
                    trainable=self.config.word_embed_trainable,
                    mask_zero=True)
                text_embed = SpatialDropout1D(0.2)(concatenate([
                    word_embedding(input_text),
                    text_elmo_embedding(input_text)
                ]))
                text_l_embed = SpatialDropout1D(0.2)(concatenate([
                    word_embedding(input_text_l),
                    l_elmo_embedding(input_text_l)
                ]))
                text_r_embed = SpatialDropout1D(0.2)(concatenate([
                    word_embedding(input_text_r),
                    r_elmo_embedding(input_text_r)
                ]))
        else:
            word_embedding = Embedding(
                input_dim=self.text_embeddings.shape[0],
                output_dim=self.config.word_embed_dim,
                weights=[self.text_embeddings],
                trainable=self.config.word_embed_trainable,
                mask_zero=True)
            text_embed = SpatialDropout1D(0.2)(word_embedding(input_text))
            text_l_embed = SpatialDropout1D(0.2)(word_embedding(input_text_l))
            text_r_embed = SpatialDropout1D(0.2)(word_embedding(input_text_r))

        if self.config.aspect_embed_type == 'random':
            asp_embedding = Embedding(input_dim=self.n_aspect,
                                      output_dim=self.config.aspect_embed_dim)
        else:
            asp_embedding = Embedding(
                input_dim=self.aspect_embeddings.shape[0],
                output_dim=self.config.aspect_embed_dim,
                trainable=self.config.aspect_embed_trainable)
        aspect_embed = asp_embedding(input_aspect)
        aspect_embed = Flatten()(aspect_embed)  # reshape to 2d

        # regarding aspect string as the first unit
        hidden_l = GRU(self.config.lstm_units,
                       go_backwards=True,
                       return_sequences=True)(text_l_embed)
        hidden_r = GRU(self.config.lstm_units,
                       return_sequences=True)(text_r_embed)

        # left context attention
        context_attend_l = TimeDistributed(Dense(
            1, activation='sigmoid'))(hidden_l)
        # Note: I couldn't find `reverse_sequence` in keras
        context_attend_l = Lambda(lambda x: tf.reverse_sequence(
            x, sequence_length(x), 1, 0))(context_attend_l)
        context_attend_l = Lambda(lambda x: K.squeeze(x, -1))(context_attend_l)

        # right context attention
        context_attend_r = TimeDistributed(Dense(
            1, activation='sigmoid'))(hidden_r)
        context_attend_r = Lambda(lambda x: K.squeeze(x, -1))(context_attend_r)

        # combine context attention
        # aspect_text_embed = subtract([add([text_l_embed, text_r_embed]), text_embed])
        # aspect_text_mask = Lambda(lambda x: sequence_mask(x))(aspect_text_embed)
        # text_mask = Lambda(lambda x: sequence_mask(x))(text_embed)
        # context_mask = subtract([text_mask, aspect_text_mask])
        # aspect_text_mask_half = Lambda(lambda x: x*0.5)(aspect_text_mask)
        # combine_mask = add([context_mask, aspect_text_mask_half])  # 1 for context, 0.5 for aspect
        context_attend = multiply(
            [add([context_attend_l, context_attend_r]), input_mask])

        # apply context attention
        context_attend_expand = Lambda(lambda x: K.expand_dims(x))(
            context_attend)
        memory = multiply([text_embed, context_attend_expand])

        # sentence-level content attention
        sentence = Lambda(lambda x: K.mean(x, axis=1))(memory)
        final_output = ContentAttention()([memory, aspect_embed, sentence])

        return Model(
            [input_text, input_text_l, input_text_r, input_aspect, input_mask],
            final_output)
コード例 #41
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 hidden_size,
                 user_size,
                 user_embedding_size,
                 num_utters,
                 l2_reg_lambda=0.0):

        self.input_text = tf.placeholder(tf.int32,
                                         shape=[None, sequence_length],
                                         name='input_text')
        self.input_user = tf.placeholder(tf.int32,
                                         shape=[None, num_utters],
                                         name='input_user')
        self.input_y = tf.placeholder(tf.float32,
                                      shape=[None, num_classes],
                                      name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name='dropout_keep_prob')
        self.num_utters = num_utters
        self.utter_length = int(sequence_length / num_utters)
        self.hidden_size = hidden_size
        self.batch_size = tf.shape(self.input_text)[0]
        self.initializer = tf.random_normal_initializer(stddev=0.1)
        self.embedding_size = embedding_size
        self.user_embedding_size = user_embedding_size

        self._instantiate_weights()
        l2_loss = tf.constant(0.0)

        input_text = tf.split(self.input_text, self.num_utters, axis=1)
        input_text = tf.stack(input_text, axis=1)
        input_user = self.input_user

        with tf.name_scope("text-embedding"):
            self.W_text = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                      name="W_text")
            self.embedded_words = tf.nn.embedding_lookup(
                self.W_text, input_text)
            self.embedded_words_reshaped = tf.reshape(
                self.embedded_words,
                shape=[-1, self.utter_length, embedding_size])
        with tf.name_scope("user-embedding"):
            self.W_user = tf.Variable(tf.random_uniform(
                [user_size, user_embedding_size], -1.0, 1.0),
                                      name="W_user")
            self.embedded_users = tf.nn.embedding_lookup(
                self.W_user, input_user)

        with tf.name_scope("rnn"):
            input_text_reshaped = tf.reshape(input_text,
                                             shape=[-1, self.utter_length])
            relevant_input_text = tf.sign(tf.abs(input_text_reshaped))
            length_input_text = tf.cast(
                tf.reduce_sum(relevant_input_text, axis=1), tf.int32)
            reversed_embedded_words = tf.reverse_sequence(
                self.embedded_words_reshaped,
                length_input_text,
                batch_dim=0,
                seq_dim=1)

            hidden_state_forward_list = self.gru_forward_word_level(
                self.embedded_words_reshaped, relevant_input_text)
            hidden_state_backward_list = self.gru_backward_word_level(
                reversed_embedded_words, relevant_input_text,
                length_input_text)

            self.hidden_state = [
                tf.concat([h_forward, h_backward], axis=1)
                for h_forward, h_backward in zip(hidden_state_forward_list,
                                                 hidden_state_backward_list)
            ]

            utter_representation = self.hidden_state
            utter_representation, p_attention_word = self.attention_word_level(
                utter_representation)
            self.p_attention_word = tf.reshape(
                p_attention_word,
                shape=[-1, self.num_utters, self.utter_length])

            utter_representation = tf.reshape(
                utter_representation,
                shape=[-1, self.num_utters, self.hidden_size * 2])

            hidden_state_forward_utters = self.gru_forward_utter_level(
                utter_representation, self.embedded_users)
            hidden_state_backward_utters = self.gru_backward_utter_level(
                utter_representation, self.embedded_users)
            self.hidden_state_utter = [
                tf.concat([h_forward, h_backward], axis=1)
                for h_forward, h_backward in zip(hidden_state_forward_utters,
                                                 hidden_state_backward_utters)
            ]

            conv_representation = self.hidden_state_utter
            conv_representation, p_attention_utter = self.attention_utter_level(
                conv_representation)
            self.p_attention_utter = tf.reshape(p_attention_utter,
                                                shape=[-1, self.num_utters])

            self.h_outputs = tf.nn.dropout(conv_representation,
                                           keep_prob=self.dropout_keep_prob)

        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[hidden_size * 4, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.logits = tf.nn.xw_plus_b(self.h_outputs, W, b, name="logits")
            self.predictions = tf.argmax(self.logits,
                                         axis=1,
                                         name="predictions")

        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=self.logits, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, axis=1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   tf.float32),
                                           name="accuracy")
コード例 #42
0
def cudnn_bi_gru(units,
                 n_hidden,
                 seq_lengths=None,
                 n_layers=1,
                 trainable_initial_states=False,
                 name='cudnn_bi_gru',
                 reuse=False):
    """ Fast CuDNN Bi-GRU implementation

    Args:
        units: tf.Tensor with dimensions [B x T x F], where
            B - batch size
            T - number of tokens
            F - features
        n_hidden: dimensionality of hidden state
        seq_lengths: number of tokens in each sample in the batch
        n_layers: number of layers
        trainable_initial_states: whether to create a special trainable variable
                to initialize the hidden states of the network or use just zeros
        name: name of the variable scope to use
        reuse:whether to reuse already initialized variable


    Returns:
        h - all hidden states along T dimension,
            tf.Tensor with dimensionality [B x T x F]
        h_last - last hidden state, tf.Tensor with dimensionality [B x H * 2]
            where H - number of hidden units
    """

    with tf.variable_scope(name, reuse=reuse):
        if seq_lengths is None:
            seq_lengths = tf.ones([tf.shape(units)[0]],
                                  dtype=tf.int32) * tf.shape(units)[1]
        with tf.variable_scope('Forward'):
            h_fw, h_last_fw = cudnn_gru_wrapper(
                units,
                n_hidden,
                n_layers=n_layers,
                trainable_initial_states=trainable_initial_states,
                seq_lengths=seq_lengths,
                reuse=reuse)

        with tf.variable_scope('Backward'):
            reversed_units = tf.reverse_sequence(units,
                                                 seq_lengths=seq_lengths,
                                                 seq_dim=1,
                                                 batch_dim=0)
            h_bw, h_last_bw = cudnn_gru_wrapper(
                reversed_units,
                n_hidden,
                n_layers=n_layers,
                trainable_initial_states=trainable_initial_states,
                seq_lengths=seq_lengths,
                reuse=reuse)
            h_bw = tf.reverse_sequence(h_bw,
                                       seq_lengths=seq_lengths,
                                       seq_dim=1,
                                       batch_dim=0)

    return (h_fw, h_bw), (h_last_fw, h_last_bw)
コード例 #43
0
    def _build_model(self):
        # initializing the cell type
        if self.cell_type is 'RNN':
            cell_element = tf.nn.rnn_cell.BasicRNNCell
        elif self.cell_type is 'LSTM':
            cell_element = tf.nn.rnn_cell.BasicLSTMCell
        elif self.cell_type is 'GRU':
            cell_element = tf.nn.rnn_cell.GRUCell
        else:
            raise ValueError('cell_type must be one of "LSTM", "RNN", "GRU"')

        # set the depth of cell
        if len(self.rnn_layers) == 1:
            cell = cell_element(self.rnn_layers[0])
        elif len(self.rnn_layers) > 1:
            cell_elements = []
            for rnn_layer in self.rnn_layers:
                cell_elements.append(cell_element(rnn_layer))
            cell = tf.nn.rnn_cell.MultiRNNCell(cell_elements)
        self.inputs = tf.placeholder(tf.int32,
                                     [self.batch_size, self.max_length])
        self.inputs_length = tf.placeholder(tf.int32, [self.batch_size])
        previous_tokens = self.inputs[:, :-1]
        next_tokens = self.inputs[:, 1:]
        tokens_length = self.inputs_length - 1
        previous_tokens_one_hot = tf.one_hot(previous_tokens, self.num_tokens)

        with tf.variable_scope("RNN_LM"):
            # training
            outputs, _ = tf.nn.dynamic_rnn(cell,
                                           previous_tokens_one_hot,
                                           tokens_length,
                                           dtype=tf.float32,
                                           swap_memory=True,
                                           time_major=False)

            # extracting features
            tf.get_variable_scope().reuse_variables()
            outputs_test, _ = tf.nn.dynamic_rnn(cell,
                                                tf.one_hot(
                                                    self.inputs,
                                                    self.num_tokens),
                                                self.inputs_length,
                                                dtype=tf.float32,
                                                swap_memory=True,
                                                time_major=False)

            last_state = tf.reverse_sequence(outputs_test,
                                             self.inputs_length,
                                             seq_dim=1,
                                             batch_dim=0)[:, 0, :]
            max_pooling = tf.reduce_max(outputs_test, 1)
            self.features = tf.concat([last_state, max_pooling], 1)

        outputs = tf.reshape(
            outputs,
            [self.batch_size * (self.max_length - 1), self.rnn_layers[-1]])
        W_output = tf.Variable(tf.random_uniform(
            [self.rnn_layers[-1], self.num_tokens], -0.1, 0.1),
                               name='W_output')
        b_output = tf.Variable(tf.zeros([self.num_tokens]), name='b_output')
        outputs = tf.matmul(outputs, W_output) + b_output

        next_tokens = tf.reshape(next_tokens,
                                 [self.batch_size * (self.max_length - 1)])
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            outputs, next_tokens)
        tokens_mask = tf.sequence_mask(tokens_length,
                                       self.max_length - 1,
                                       dtype=tf.float32)
        self.loss = tf.multiply(
            self.loss,
            tf.reshape(tokens_mask, [self.batch_size * (self.max_length - 1)]))
        self.loss = tf.reduce_sum(self.loss) / self.batch_size

        opt = tf.train.AdamOptimizer(self.learning_rate)
        grads_and_vars = opt.compute_gradients(self.loss)
        grads_and_vars = [(tf.clip_by_value(gv[0], -1.0, 1.0), gv[1])
                          for gv in grads_and_vars]
        self.train_op = opt.apply_gradients(grads_and_vars)
        self.init_op = tf.global_variables_initializer()
コード例 #44
0
def inference(X, weights, bias, reuse = None, trainMode = True):
    word_vectors = tf.nn.embedding_lookup(WORDS, X)
    # [batch_size, 80, 50]

    length = GetLength(X)
    length_64 = tf.cast(length, tf.int64)
    reuse = None if trainMode else True

    with tf.variable_scope("rnn_fwbw", reuse = reuse) as scope:
        forward_output, _ = tf.nn.dynamic_rnn(
            tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, reuse = reuse),
            word_vectors,
            dtype = tf.float32,
            sequence_length = length,
            scope = "RNN_forward")
        backward_output_, _ = tf.nn.dynamic_rnn(
            tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, reuse = reuse),
            inputs = tf.reverse_sequence(word_vectors,
                                       length_64,
                                       seq_dim = 1),
            dtype = tf.float32,
            sequence_length = length,
            scope = "RNN_backword")

    backward_output = tf.reverse_sequence(backward_output_,
                                          length_64,
                                          seq_dim = 1)
    
    output = tf.concat([forward_output, backward_output], 2)
    # [batch_size, 80, 200]

    output = tf.expand_dims(output, -1)
    # [batch_size, 80, 200, 1]
    output = tf.transpose(output, perm = [1, 0, 3, 2])
    # [80, batch_size, 1, 200]

    char_blocks = output
    for i in range(FLAGS.mrank):
        ileft = output[(i + 1) : ]
        iright = output[ : FLAGS.max_sentence_len - (i + 1)]

        ileft = tf.pad(ileft, [[(i + 1), 0], [0, 0], [0, 0], [0, 0]], "CONSTANT")
        iright = tf.pad(iright, [[0, (i + 1)], [0, 0], [0, 0], [0, 0]], "CONSTANT")
        
        char_blocks = tf.concat([ileft, char_blocks, iright], 3)
    # char_blocks.shape = [80, batch_size, 1, 200 * (2*mrank + 1)]

    char_blocks = tf.reshape(char_blocks, [FLAGS.max_sentence_len, -1,
                                            2 * FLAGS.mrank + 1,
                                            2 * FLAGS.num_hidden])
    char_blocks = tf.expand_dims(char_blocks, -1)
    # [80, batch_size, 2 * mrank + 1, 200, 1]
    # Namely, [80, batch_size, 3, 200, 1] for 1-rank Markov assumption

    # do conv
    do_char_conv = lambda x: char_convolution(x)
    abstract_chars = tf.map_fn(do_char_conv, char_blocks)
    # [80, batch_size, 200]

    abstract_chars = tf.transpose(abstract_chars, perm = [1, 0, 2])
    abstract_chars = tf.reshape(abstract_chars, [-1, FLAGS.num_hidden * 2])
    if trainMode:
        abstract_chars = tf.nn.dropout(abstract_chars, 0.5)

    matricized_unary_scores = tf.matmul(abstract_chars, weights) + bias

    unary_scores = tf.reshape(matricized_unary_scores,
            [-1, FLAGS.max_sentence_len, FLAGS.num_tags])

    # [batch_size, 80, 4]
    return unary_scores, length
コード例 #45
0
    def _build_model(self):
        # initializing the cell type
        if self.cell_type is 'RNN':
            cell_element = tf.contrib.rnn.BasicRNNCell
        elif self.cell_type is 'LSTM':
            cell_element = tf.contrib.rnn.BasicLSTMCell
        elif self.cell_type is 'GRU':
            cell_element = tf.contrib.rnn.GRUCell
        else:
            raise ValueError('cell_type must be one of "LSTM", "RNN", "GRU"')

        # set the depth of cell
        if len(self.rnn_layers) == 1:
            cell = cell_element(self.rnn_layers[0])
        elif len(self.rnn_layers) > 1:
            cell_elements = []
            for rnn_layer in self.rnn_layers:
                cell_elements.append(cell_element(rnn_layer))
            cell = tf.contrib.rnn.MultiRNNCell(cell_elements)
        else:
            raise ValueError('rnn_layers must have at least one element')

        self.inputs = tf.placeholder(tf.int32,
                                     [self.batch_size, self.max_length])
        self.inputs_length = tf.placeholder(tf.int32, [self.batch_size])
        self.outputs = tf.placeholder(tf.int32, [self.batch_size])
        inputs_one_hot = tf.one_hot(self.inputs, self.num_tokens)

        # bidirectional
        if self.bidirectional:
            outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell,
                                                         cell,
                                                         inputs_one_hot,
                                                         self.inputs_length,
                                                         dtype=tf.float32,
                                                         swap_memory=True,
                                                         time_major=False)
            output_fw, output_bw = outputs
            per_step_outputs = tf.concat(outputs, 2)
            if self.attention_layers is None:
                output_fw = tf.reverse_sequence(output_fw,
                                                self.inputs_length,
                                                seq_dim=1,
                                                batch_dim=0)
                ff_input = tf.concat([output_fw[:, 0, :], output_bw[:, 0, :]],
                                     1)
            else:
                attention_input = tf.concat([output_fw, output_bw], 2)
        else:
            outputs, _ = tf.nn.dynamic_rnn(cell,
                                           inputs_one_hot,
                                           self.inputs_length,
                                           dtype=tf.float32,
                                           swap_memory=True,
                                           time_major=False)
            per_step_outputs = outputs
            if self.attention_layers is None:
                outputs = tf.reverse_sequence(outputs,
                                              self.inputs_length,
                                              seq_dim=1,
                                              batch_dim=0)
                ff_input = outputs[:, 0, :]
            else:
                attention_input = outputs

        # attention
        if self.attention_layers is not None:
            last_attention_hidden = self.rnn_layers[-1] * (
                2 if self.bidirectional else 1)
            attention_hidden = tf.reshape(
                attention_input,
                [self.batch_size * self.max_length, last_attention_hidden])
            for layer, attention_layer in enumerate(self.attention_layers):
                W_attention = tf.Variable(tf.random_uniform(
                    [last_attention_hidden, attention_layer], -0.1, 0.1),
                                          name='W_attention_%d' % (layer, ))
                last_attention_hidden = attention_layer
                b_attention = tf.Variable(tf.zeros([attention_layer]),
                                          name='b_attention_%d' % (layer, ))
                attention_hidden = tf.matmul(attention_hidden,
                                             W_attention) + b_attention
                if layer < len(self.attention_layers) - 1:
                    attention_hidden = tf.nn.tanh(attention_hidden)
            attention_weights = tf.exp(
                tf.reshape(attention_hidden,
                           [self.batch_size, self.max_length]))
            inputs_mask = tf.sequence_mask(self.inputs_length,
                                           self.max_length,
                                           dtype=tf.float32)
            attention_weights *= inputs_mask
            attention_weights_sum = tf.reduce_sum(attention_weights,
                                                  1,
                                                  keep_dims=True)
            attention_weights /= attention_weights_sum
            ff_input = tf.reduce_sum(
                tf.multiply(attention_input,
                            tf.expand_dims(attention_weights, 2)), 1)

        # feed forwards
        last_ff_hidden = self.rnn_layers[-1] * (2 if self.bidirectional else 1)
        ff_hidden = ff_input
        params_ff = []
        for layer, ff_layer in enumerate(self.ff_layers):
            W_ff = tf.Variable(tf.random_uniform([last_ff_hidden, ff_layer],
                                                 -0.1, 0.1),
                               name='W_ff_%d' % (layer, ))
            last_ff_hidden = ff_layer
            b_ff = tf.Variable(tf.zeros([ff_layer]),
                               name='b_ff_%d' % (layer, ))
            ff_hidden = tf.matmul(ff_hidden, W_ff) + b_ff
            params_ff.append((W_ff, b_ff))
            if layer < len(self.ff_layers) - 1:
                ff_hidden = tf.nn.tanh(ff_hidden)

        self.probability = tf.nn.softmax(ff_hidden)
        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.outputs,
                                                           logits=ff_hidden))

        opt = tf.train.AdamOptimizer(self.learning_rate)
        grads_and_vars = opt.compute_gradients(self.loss)
        grads_and_vars = [(tf.clip_by_value(gv[0], -1.0, 1.0), gv[1])
                          for gv in grads_and_vars]
        self.train_op = opt.apply_gradients(grads_and_vars)
        self.init_op = tf.global_variables_initializer()

        # classify the sequence per step
        per_step_hidden = tf.reshape(per_step_outputs, [
            self.batch_size * self.max_length, self.rnn_layers[-1] *
            (2 if self.bidirectional else 1)
        ])
        for layer in range(len(params_ff)):
            W_ff, b_ff = params_ff[layer]
            per_step_hidden = tf.matmul(per_step_hidden, W_ff) + b_ff
            if layer < len(self.ff_layers) - 1:
                per_step_hidden = tf.nn.tanh(per_step_hidden)
        self.per_step_result = tf.reshape(tf.argmax(per_step_hidden, axis=1),
                                          [self.batch_size, self.max_length])
コード例 #46
0
 def _append_eow(self, sequences):
     """Append EOW character after end every given sequence."""
     sequences_rev = tf.reverse_sequence(sequences, tf.reduce_sum(tf.cast(tf.not_equal(sequences, 0), tf.int32), axis=1), 1)
     sequences_rev_eow = tf.pad(sequences_rev, [[0, 0], [1, 0]], constant_values=MorphoDataset.Factor.EOW)
     return tf.reverse_sequence(sequences_rev_eow, tf.reduce_sum(tf.cast(tf.not_equal(sequences_rev_eow, 0), tf.int32), axis=1), 1)
コード例 #47
0
ファイル: model.py プロジェクト: zckoh/magenta
def cudnn_lstm_layer(inputs,
                     batch_size,
                     num_units,
                     lengths=None,
                     stack_size=1,
                     rnn_dropout_drop_amt=0,
                     is_training=True,
                     bidirectional=True):
    """Create a LSTM layer that uses cudnn."""
    inputs_t = tf.transpose(inputs, [1, 0, 2])
    if lengths is not None:
        all_outputs = [inputs_t]
        for i in range(stack_size):
            with tf.variable_scope('stack_' + str(i)):
                with tf.variable_scope('forward'):
                    lstm_fw = tf.contrib.cudnn_rnn.CudnnLSTM(
                        num_layers=1,
                        num_units=num_units,
                        direction='unidirectional',
                        dropout=rnn_dropout_drop_amt,
                        kernel_initializer=tf.contrib.layers.
                        variance_scaling_initializer(),
                        bias_initializer=tf.zeros_initializer(),
                    )

                c_fw = tf.zeros([1, batch_size, num_units], tf.float32)
                h_fw = tf.zeros([1, batch_size, num_units], tf.float32)

                outputs_fw, _ = lstm_fw(all_outputs[-1], (h_fw, c_fw),
                                        training=is_training)

                combined_outputs = outputs_fw

                if bidirectional:
                    with tf.variable_scope('backward'):
                        lstm_bw = tf.contrib.cudnn_rnn.CudnnLSTM(
                            num_layers=1,
                            num_units=num_units,
                            direction='unidirectional',
                            dropout=rnn_dropout_drop_amt,
                            kernel_initializer=tf.contrib.layers.
                            variance_scaling_initializer(),
                            bias_initializer=tf.zeros_initializer(),
                        )

                    c_bw = tf.zeros([1, batch_size, num_units], tf.float32)
                    h_bw = tf.zeros([1, batch_size, num_units], tf.float32)

                    inputs_reversed = tf.reverse_sequence(all_outputs[-1],
                                                          lengths,
                                                          seq_axis=0,
                                                          batch_axis=1)
                    outputs_bw, _ = lstm_bw(inputs_reversed, (h_bw, c_bw),
                                            training=is_training)

                    outputs_bw = tf.reverse_sequence(outputs_bw,
                                                     lengths,
                                                     seq_axis=0,
                                                     batch_axis=1)

                    combined_outputs = tf.concat([outputs_fw, outputs_bw],
                                                 axis=2)

                all_outputs.append(combined_outputs)

        # for consistency with cudnn, here we just return the top of the stack,
        # although this can easily be altered to do other things, including be
        # more resnet like
        return tf.transpose(all_outputs[-1], [1, 0, 2])
    else:
        lstm = tf.contrib.cudnn_rnn.CudnnLSTM(
            num_layers=stack_size,
            num_units=num_units,
            direction='bidirectional' if bidirectional else 'unidirectional',
            dropout=rnn_dropout_drop_amt,
            kernel_initializer=tf.contrib.layers.variance_scaling_initializer(
            ),
            bias_initializer=tf.zeros_initializer(),
        )
        stack_multiplier = 2 if bidirectional else 1
        c = tf.zeros([stack_multiplier * stack_size, batch_size, num_units],
                     tf.float32)
        h = tf.zeros([stack_multiplier * stack_size, batch_size, num_units],
                     tf.float32)
        outputs, _ = lstm(inputs_t, (h, c), training=is_training)
        outputs = tf.transpose(outputs, [1, 0, 2])

        return outputs
コード例 #48
0
ファイル: embed_elmo_tf.py プロジェクト: mead-ml/hub
    def _build_lstms(self):
        # now the LSTMs
        # these will collect the initial states for the forward
        #   (and reverse LSTMs if we are doing bidirectional)

        # parse the options
        lstm_dim = self.options['lstm']['dim']
        projection_dim = self.options['lstm']['projection_dim']
        n_lstm_layers = self.options['lstm'].get('n_layers', 1)
        cell_clip = self.options['lstm'].get('cell_clip')
        proj_clip = self.options['lstm'].get('proj_clip')
        use_skip_connections = self.options['lstm']['use_skip_connections']

        # the sequence lengths from input mask
        if self.use_character_inputs:
            mask = tf.reduce_any(self.ids_placeholder > 0, axis=2)
        else:
            mask = self.ids_placeholder > 0
        sequence_lengths = tf.reduce_sum(tf.cast(mask, tf.int32), axis=1)
        batch_size = tf.shape(sequence_lengths)[0]

        # for each direction, we'll store tensors for each layer
        self.lstm_outputs = {'forward': [], 'backward': []}
        self.lstm_state_sizes = {'forward': [], 'backward': []}
        self.lstm_init_states = {'forward': [], 'backward': []}
        self.lstm_final_states = {'forward': [], 'backward': []}

        update_ops = []
        for direction in ['forward', 'backward']:
            if direction == 'forward':
                layer_input = self.embedding
            else:
                layer_input = tf.reverse_sequence(
                    self.embedding,
                    sequence_lengths,
                    seq_axis=1,
                    batch_axis=0
                )

            for i in range(n_lstm_layers):
                if projection_dim < lstm_dim:
                    # are projecting down output
                    lstm_cell = tf.nn.rnn_cell.LSTMCell(
                        lstm_dim, num_proj=projection_dim,
                        cell_clip=cell_clip, proj_clip=proj_clip)
                else:
                    lstm_cell = tf.nn.rnn_cell.LSTMCell(
                        lstm_dim,
                        cell_clip=cell_clip, proj_clip=proj_clip)

                if use_skip_connections:
                    # ResidualWrapper adds inputs to outputs
                    if i == 0:
                        # don't add skip connection from token embedding to
                        # 1st layer output
                        pass
                    else:
                        # add a skip connection
                        lstm_cell = tf.nn.rnn_cell.ResidualWrapper(lstm_cell)

                # collect the input state, run the dynamic rnn, collect
                # the output
                state_size = lstm_cell.state_size
                # the LSTMs are stateful.  To support multiple batch sizes,
                # we'll allocate size for states up to max_batch_size,
                # then use the first batch_size entries for each batch
                init_states = [
                    tf.Variable(
                        tf.zeros([self._max_batch_size, dim]),
                        trainable=False
                    )
                    for dim in lstm_cell.state_size
                ]
                batch_init_states = [
                    state[:batch_size, :] for state in init_states
                ]

                if direction == 'forward':
                    i_direction = 0
                else:
                    i_direction = 1
                variable_scope_name = 'RNN_{0}/RNN/MultiRNNCell/Cell{1}'.format(
                    i_direction, i)
                with tf.compat.v1.variable_scope(variable_scope_name):
                    layer_output, final_state = tf.nn.dynamic_rnn(
                        lstm_cell,
                        layer_input,
                        sequence_length=sequence_lengths,
                        initial_state=tf.nn.rnn_cell.LSTMStateTuple(
                            *batch_init_states),
                    )

                self.lstm_state_sizes[direction].append(lstm_cell.state_size)
                self.lstm_init_states[direction].append(init_states)
                self.lstm_final_states[direction].append(final_state)
                if direction == 'forward':
                    self.lstm_outputs[direction].append(layer_output)
                else:
                    self.lstm_outputs[direction].append(
                        tf.reverse_sequence(
                            layer_output,
                            sequence_lengths,
                            seq_axis=1,
                            batch_axis=0
                        )
                    )

                with tf.control_dependencies([layer_output]):
                    # update the initial states
                    for i in range(2):
                        new_state = tf.concat(
                            [final_state[i][:batch_size, :],
                             init_states[i][batch_size:, :]], axis=0)
                        state_update_op = tf.assign(init_states[i], new_state)
                        update_ops.append(state_update_op)

                layer_input = layer_output

        self.mask = mask
        self.sequence_lengths = sequence_lengths
        self.update_state_op = tf.group(*update_ops)
コード例 #49
0
ファイル: dgcnn_utils.py プロジェクト: Beleiaya/BERT
def backward_dgcnn(x, input_mask,
			num_layers=2, 
			dilation_rates=[1,2],
			strides=[1,1],
			num_filters=[64,64],
			kernel_sizes=[3,3], 
			is_training=False,
			scope_name="textcnn", 
			reuse=False, 
			activation=tf.nn.relu,
			is_casual=False,
			padding='same'):

	# input_mask: batch_size, seq

	# initializer = tf.glorot_uniform_initializer()
	# initializer = tf.truncated_normal_initializer(stddev=0.1)
	initializer = create_initializer(initializer_range=0.02)
	input_len = tf.reduce_sum(tf.cast(input_mask, tf.int32), axis=-1)

	# inverse_mask = tf.reverse_sequence(input_mask, input_len, seq_axis=1, batch_axis=0)
	input_mask = tf.expand_dims(input_mask, axis=-1)
	input_mask = tf.cast(input_mask, dtype=tf.float32)

	inverse_x = tf.reverse_sequence(x, input_len, seq_axis=1, batch_axis=0)

	if is_casual:
		left_pad = dilation_rates[0] * (kernel_sizes[0] - 1)
		inputs = tf.pad(inverse_x, [[0, 0, ], [left_pad, 0], [0, 0]])
		padding = 'valid'
		tf.logging.info("==casual valid padding==")
	else:
		inputs = inverse_x

	with tf.variable_scope(scope_name, reuse=reuse):
		inputs = gated_conv1d_op(inputs,
						filters=num_filters[0],
						kernel_size=kernel_sizes[0],
						padding=padding,
						activation=None,
						strides=1,
						reuse=reuse, 
						dilation_rate=1,
						name="gated_conv",
						kernel_initializer=initializer, #tf.truncated_normal_initializer(stddev=0.1),
						is_training=is_training)
		if padding == 'same':
			inputs *= input_mask
		residual_inputs = inputs

	for (dilation_rate, 
		layer, 
		kernel_size, 
		stride, 
		num_filter) in zip(dilation_rates, 
							range(num_layers), 
							kernel_sizes,
							strides, 
							num_filters):
		layer_scope_name = "%s_layer_%s"%(str(scope_name), str(layer))
		output_shape = bert_utils.get_shape_list(inputs, expected_rank=3)
		with tf.variable_scope(layer_scope_name, reuse=reuse):
			if dilation_rate > 1:
				stride = 1
			if not is_casual:
				padding = padding
				tf.logging.info("==none-casual same padding==")
			else:
				left_pad = dilation_rate * (kernel_size - 1)
				inputs = tf.pad(inputs, [[0, 0, ], [left_pad, 0], [0, 0]])
				padding = 'valid'
				tf.logging.info("==casual valid padding==")

			tf.logging.info("==kernel_size:%s, num_filter:%s, stride:%s, dilation_rate:%s==", str(kernel_size), 
										str(num_filter), str(stride), str(dilation_rate))
			inputs = residual_gated_conv1d_op(inputs,
									residual_inputs,
									filters=num_filter, 
									kernel_size=kernel_size, 
									padding=padding, 
									activation=None, 
									strides=stride, 
									reuse=False, 
									dilation_rate=dilation_rate,
									name="residual_gated_conv",
									kernel_initializer=initializer, #tf.truncated_normal_initializer(stddev=0.1), 
									is_training=is_training)
			if padding == 'same':
				inputs *= input_mask
			residual_inputs = inputs
	inverse_x = tf.reverse_sequence(inputs, input_len, seq_axis=1, batch_axis=0)
	return inputs
コード例 #50
0
ファイル: embed_elmo_tf.py プロジェクト: mead-ml/hub
    def _build_ops(self, lm_graph):
        with tf.control_dependencies([lm_graph.update_state_op]):
            # get the LM embeddings
            token_embeddings = lm_graph.embedding
            layers = [
                tf.concat([token_embeddings, token_embeddings], axis=2)
            ]

            n_lm_layers = len(lm_graph.lstm_outputs['forward'])
            for i in range(n_lm_layers):
                layers.append(
                    tf.concat(
                        [lm_graph.lstm_outputs['forward'][i],
                         lm_graph.lstm_outputs['backward'][i]],
                        axis=-1
                    )
                )

            # The layers include the BOS/EOS tokens.  Remove them
            sequence_length_wo_bos_eos = lm_graph.sequence_lengths - 2
            layers_without_bos_eos = []
            for layer in layers:
                layer_wo_bos_eos = layer[:, 1:, :]
                layer_wo_bos_eos = tf.reverse_sequence(
                    layer_wo_bos_eos,
                    lm_graph.sequence_lengths - 1,
                    seq_axis=1,
                    batch_axis=0,
                    )
                layer_wo_bos_eos = layer_wo_bos_eos[:, 1:, :]
                layer_wo_bos_eos = tf.reverse_sequence(
                    layer_wo_bos_eos,
                    sequence_length_wo_bos_eos,
                    seq_axis=1,
                    batch_axis=0,
                )
                layers_without_bos_eos.append(layer_wo_bos_eos)

            # concatenate the layers
            lm_embeddings = tf.concat(
                [tf.expand_dims(t, axis=1) for t in layers_without_bos_eos],
                axis=1
            )

            # get the mask op without bos/eos.
            # tf doesn't support reversing boolean tensors, so cast
            # to int then back
            mask_wo_bos_eos = tf.cast(lm_graph.mask[:, 1:], 'int32')
            mask_wo_bos_eos = tf.reverse_sequence(
                mask_wo_bos_eos,
                lm_graph.sequence_lengths - 1,
                seq_axis=1,
                batch_axis=0,
                )
            mask_wo_bos_eos = mask_wo_bos_eos[:, 1:]
            mask_wo_bos_eos = tf.reverse_sequence(
                mask_wo_bos_eos,
                sequence_length_wo_bos_eos,
                seq_axis=1,
                batch_axis=0,
            )
            mask_wo_bos_eos = tf.cast(mask_wo_bos_eos, 'bool')

        return {
            'lm_embeddings': lm_embeddings,
            'lengths': sequence_length_wo_bos_eos,
            'token_embeddings': lm_graph.embedding,
            'mask': mask_wo_bos_eos,
        }
コード例 #51
0
def bidirectional_GRU(inputs,
                      inputs_len,
                      cell=None,
                      cell_fn=tf.contrib.rnn.GRUCell,
                      units=0,
                      layers=1,
                      scope="Bidirectional_GRU",
                      output=0,
                      is_training=True,
                      reuse=None,
                      dr_input_keep_prob=1.0,
                      dr_output_keep_prob=1.0,
                      is_bidir=False):
    '''
    Bidirectional recurrent neural network with GRU cells.

    Args:
        inputs:     rnn input of shape (batch_size, timestep, dim)
        inputs_len: rnn input_len of shape (batch_size, )
        cell:       rnn cell of type RNN_Cell.
        output:     [ batch, step, dim (fw;bw) ], [ batch, dim (fw;bw) ]
    '''
    with tf.variable_scope(scope,
                           reuse=reuse,
                           initializer=tf.orthogonal_initializer()):
        if cell is not None:
            (cell_fw, cell_bw) = cell
        else:
            shapes = inputs.get_shape().as_list()
            if len(shapes) > 3:
                print('input reshaped!!!')
                inputs = tf.reshape(inputs,
                                    (shapes[0] * shapes[1], shapes[2], -1))
                inputs_len = tf.reshape(inputs_len, (shapes[0] * shapes[1], ))

            # if no cells are provided, use standard GRU cell implementation
            if layers > 1:
                cell_fw = MultiRNNCell([
                    apply_dropout(cell_fn(units),
                                  size=inputs.shape[-1] if i == 0 else units,
                                  is_training=is_training,
                                  input_keep_prob=dr_input_keep_prob,
                                  output_keep_prob=dr_output_keep_prob)
                    for i in range(layers)
                ])
                if is_bidir:
                    cell_bw = MultiRNNCell([
                        apply_dropout(
                            cell_fn(units),
                            size=inputs.shape[-1] if i == 0 else units,
                            is_training=is_training,
                            input_keep_prob=dr_input_keep_prob,
                            output_keep_prob=dr_output_keep_prob)
                        for i in range(layers)
                    ])
            else:
                cell_fw = apply_dropout(cell_fn(units),
                                        size=inputs.shape[-1],
                                        is_training=is_training)
                if is_bidir:
                    cell_bw = apply_dropout(cell_fn(units),
                                            size=inputs.shape[-1],
                                            is_training=is_training)

        if is_bidir:
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=inputs,
                sequence_length=inputs_len,
                dtype=tf.float32,
                scope=scope,
                time_major=False)
            if Params.reverse_bw:
                fw = outputs[0]
                bw = tf.reverse_sequence(outputs[1],
                                         seq_lengths=inputs_len,
                                         seq_axis=1)
                outputs = (fw, bw)

            return tf.concat(outputs, 2), tf.concat(states, axis=1)

        else:
            outputs, states = tf.nn.dynamic_rnn(cell=cell_fw,
                                                inputs=inputs,
                                                dtype=tf.float32,
                                                sequence_length=inputs_len,
                                                scope=scope,
                                                time_major=False)
            return outputs, states
コード例 #52
0
 def reverse_sequence(self, x, mask):
     """这里的mask.shape是[batch_size, seq_len, 1]
     """
     seq_len = K.round(K.sum(mask, 1)[:, 0])
     seq_len = K.cast(seq_len, 'int32')
     return tf.reverse_sequence(x, seq_len, seq_dim=1)
コード例 #53
0
import numpy as np
import tensorflow as tf
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

# First, load the image again
filename = "MarshOrchid.jpg"
image = mpimg.imread(filename)
height, width, depth = image.shape

# Create a TensorFlow Variable
x = tf.Variable(image, name='x')

model = tf.global_variables_initializer()

with tf.Session() as session:
    x = tf.reverse_sequence(x, [width] * height, 1, batch_dim=0)
    session.run(model)
    result = session.run(x)

print(result.shape)
plt.imshow(result)
plt.show()
コード例 #54
0
    def __init__(self, is_training, config):
        hidden_size = config.hidden_size
        view1_input_size = config.view1_input_size
        view2_input_size = config.view2_input_size
        margin = config.margin
        lr = config.learning_rate
        kp = config.keep_prob
        obj = config.objective

        # View 1 Layer1 x1
        self._input_x1 = input_x1 = tf.placeholder(
            tf.float32, [None, None, view1_input_size])
        self._input_x1_lengths = input_x1_lengths = tf.placeholder(
            tf.int32, [None])
        input_x1_lengths_64 = tf.to_int64(input_x1_lengths)

        if is_training and kp < 1:
            input_x1 = tf.nn.dropout(input_x1, keep_prob=kp)

        l2r_cell_layer1_view1 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)
        r2l_cell_layer1_view1 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)

        with tf.variable_scope('l2r_layer1_view1'):
            l2r_outputs_layer1_view1, _ = tf.nn.dynamic_rnn(
                l2r_cell_layer1_view1,
                input_x1,
                dtype=tf.float32,
                sequence_length=input_x1_lengths)
        with tf.variable_scope('r2l_layer1_view1'):
            r2l_outputs_layer1_view1, _ = tf.nn.dynamic_rnn(
                r2l_cell_layer1_view1,
                tf.reverse_sequence(input_x1, input_x1_lengths_64, 1),
                dtype=tf.float32,
                sequence_length=input_x1_lengths)
        r2l_outputs_layer1_view1 = tf.reverse_sequence(
            r2l_outputs_layer1_view1, input_x1_lengths_64, 1)

        # View 1 Layer 2 x1
        input_x1_layer2 = tf.concat(
            2, [l2r_outputs_layer1_view1, r2l_outputs_layer1_view1],
            'concat_layer1_view1_x1')

        if is_training and kp < 1:
            input_x1_layer2 = tf.nn.dropout(input_x1_layer2, keep_prob=kp)

        l2r_cell_layer2_view1 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)
        r2l_cell_layer2_view1 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)

        with tf.variable_scope('l2r_layer2_view1'):
            l2r_outputs_layer2_view1, _ = tf.nn.dynamic_rnn(
                l2r_cell_layer2_view1,
                input_x1_layer2,
                dtype=tf.float32,
                sequence_length=input_x1_lengths)
        with tf.variable_scope('r2l_layer2_view1'):
            r2l_outputs_layer2_view1, _ = tf.nn.dynamic_rnn(
                r2l_cell_layer2_view1,
                tf.reverse_sequence(input_x1_layer2, input_x1_lengths_64, 1),
                dtype=tf.float32,
                sequence_length=input_x1_lengths)

        l2r_outputs_view1 = tf.gather(
            tf.reshape(tf.concat(1, l2r_outputs_layer2_view1),
                       [-1, hidden_size]),
            tf.range(tf.shape(input_x1)[0]) * tf.shape(input_x1)[1] +
            input_x1_lengths - 1)
        r2l_outputs_view1 = tf.gather(
            tf.reshape(tf.concat(1, r2l_outputs_layer2_view1),
                       [-1, hidden_size]),
            tf.range(tf.shape(input_x1)[0]) * tf.shape(input_x1)[1] +
            input_x1_lengths - 1)
        self._final_state = x1 = self.normalization(
            tf.concat(1, [l2r_outputs_view1, r2l_outputs_view1],
                      'concat_view1_x1'))

        if not is_training:
            return

        # input_x2
        if 2 in obj or 3 in obj:
            # View 1 Layer 1 x2
            self._input_x2 = input_x2 = tf.placeholder(
                tf.float32, [None, None, view1_input_size])
            self._input_x2_lengths = input_x2_lengths = tf.placeholder(
                tf.int32, [None])
            input_x2_lengths_64 = tf.to_int64(input_x2_lengths)

            if is_training and kp < 1:
                input_x2 = tf.nn.dropout(input_x2, keep_prob=kp)

            with tf.variable_scope('l2r_layer1_view1', reuse=True):
                l2r_outputs_layer1_view1, _ = tf.nn.dynamic_rnn(
                    l2r_cell_layer1_view1,
                    input_x2,
                    dtype=tf.float32,
                    sequence_length=input_x2_lengths)
            with tf.variable_scope('r2l_layer1_view1', reuse=True):
                r2l_outputs_layer1_view1, _ = tf.nn.dynamic_rnn(
                    r2l_cell_layer1_view1,
                    tf.reverse_sequence(input_x2, input_x2_lengths_64, 1),
                    dtype=tf.float32,
                    sequence_length=input_x2_lengths)
            r2l_outputs_layer1_view1 = tf.reverse_sequence(
                r2l_outputs_layer1_view1, input_x2_lengths_64, 1)

            # View 1 Layer 2 x2
            input_x2_layer2 = tf.concat(
                2, [l2r_outputs_layer1_view1, r2l_outputs_layer1_view1],
                'concat_layer1_view1_x2')

            if is_training and kp < 1:
                input_x2_layer2 = tf.nn.dropout(input_x2_layer2, keep_prob=kp)

            l2r_cell_layer2_view1 = tf.nn.rnn_cell.BasicLSTMCell(
                hidden_size, forget_bias=1.0, state_is_tuple=True)
            r2l_cell_layer2_view1 = tf.nn.rnn_cell.BasicLSTMCell(
                hidden_size, forget_bias=1.0, state_is_tuple=True)

            with tf.variable_scope('l2r_layer2_view1', reuse=True):
                l2r_outputs_layer2_view1, _ = tf.nn.dynamic_rnn(
                    l2r_cell_layer2_view1,
                    input_x2_layer2,
                    dtype=tf.float32,
                    sequence_length=input_x2_lengths)
            with tf.variable_scope('r2l_layer2_view1', reuse=True):
                r2l_outputs_layer2_view1, _ = tf.nn.dynamic_rnn(
                    r2l_cell_layer2_view1,
                    tf.reverse_sequence(input_x2_layer2, input_x2_lengths_64,
                                        1),
                    dtype=tf.float32,
                    sequence_length=input_x2_lengths)

            l2r_outputs_view1 = tf.gather(
                tf.reshape(tf.concat(1, l2r_outputs_layer2_view1),
                           [-1, hidden_size]),
                tf.range(tf.shape(input_x2)[0]) * tf.shape(input_x2)[1] +
                input_x2_lengths - 1)
            r2l_outputs_view1 = tf.gather(
                tf.reshape(tf.concat(1, r2l_outputs_layer2_view1),
                           [-1, hidden_size]),
                tf.range(tf.shape(input_x2)[0]) * tf.shape(input_x2)[1] +
                input_x2_lengths - 1)
            x2 = self.normalization(
                tf.concat(1, [l2r_outputs_view1, r2l_outputs_view1],
                          'concat_view1_x2'))

        # View 2 Layer 1 c1
        self._input_c1 = input_c1 = tf.placeholder(
            tf.float32, [None, None, view2_input_size])
        self._input_c1_lengths = input_c1_lengths = tf.placeholder(
            tf.int32, [None])
        input_c1_lengths_64 = tf.to_int64(input_c1_lengths)

        l2r_cell_layer1_view2 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)
        r2l_cell_layer1_view2 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)

        with tf.variable_scope('l2r_layer1_view2'):
            l2r_outputs_layer1_view2, _ = tf.nn.dynamic_rnn(
                l2r_cell_layer1_view2,
                input_c1,
                dtype=tf.float32,
                sequence_length=input_c1_lengths)
        with tf.variable_scope('r2l_layer1_view2'):
            r2l_outputs_layer1_view2, _ = tf.nn.dynamic_rnn(
                r2l_cell_layer1_view2,
                tf.reverse_sequence(input_c1, input_c1_lengths_64, 1),
                dtype=tf.float32,
                sequence_length=input_c1_lengths)
        r2l_outputs_layer1_view2 = tf.reverse_sequence(
            r2l_outputs_layer1_view2, input_c1_lengths_64, 1)

        # View 2 Layer 2 c1
        input_c1_layer2 = tf.concat(
            2, [l2r_outputs_layer1_view2, r2l_outputs_layer1_view2],
            'concat_layer1_view2_c1')

        if is_training and kp < 1:
            input_c1_layer2 = tf.nn.dropout(input_c1_layer2, keep_prob=kp)

        l2r_cell_layer2_view2 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)
        r2l_cell_layer2_view2 = tf.nn.rnn_cell.BasicLSTMCell(
            hidden_size, forget_bias=1.0, state_is_tuple=True)

        with tf.variable_scope('l2r_layer2_view2'):
            l2r_outputs_layer2_view2, _ = tf.nn.dynamic_rnn(
                l2r_cell_layer2_view2,
                input_c1_layer2,
                dtype=tf.float32,
                sequence_length=input_c1_lengths)
        with tf.variable_scope('r2l_layer2_view2'):
            r2l_outputs_layer2_view2, _ = tf.nn.dynamic_rnn(
                r2l_cell_layer2_view2,
                tf.reverse_sequence(input_c1_layer2, input_c1_lengths_64, 1),
                dtype=tf.float32,
                sequence_length=input_c1_lengths)

        l2r_outputs_view2 = tf.gather(
            tf.reshape(tf.concat(1, l2r_outputs_layer2_view2),
                       [-1, hidden_size]),
            tf.range(tf.shape(input_c1)[0]) * tf.shape(input_c1)[1] +
            input_c1_lengths - 1)
        r2l_outputs_view2 = tf.gather(
            tf.reshape(tf.concat(1, r2l_outputs_layer2_view2),
                       [-1, hidden_size]),
            tf.range(tf.shape(input_c1)[0]) * tf.shape(input_c1)[1] +
            input_c1_lengths - 1)
        self._word_state = c1 = self.normalization(
            tf.concat(1, [l2r_outputs_view2, r2l_outputs_view2],
                      'concat_view2_c1'))

        # input_c2
        if 0 in obj or 1 in obj:
            # View 2 Layer 1 c2
            self._input_c2 = input_c2 = tf.placeholder(
                tf.float32, [None, None, view2_input_size])
            self._input_c2_lengths = input_c2_lengths = tf.placeholder(
                tf.int32, [None])
            input_c2_lengths_64 = tf.to_int64(input_c2_lengths)

            with tf.variable_scope('l2r_layer1_view2', reuse=True):
                l2r_outputs_layer1_view2, _ = tf.nn.dynamic_rnn(
                    l2r_cell_layer1_view2,
                    input_c2,
                    dtype=tf.float32,
                    sequence_length=input_c2_lengths)
            with tf.variable_scope('r2l_layer1_view2', reuse=True):
                r2l_outputs_layer1_view2, _ = tf.nn.dynamic_rnn(
                    r2l_cell_layer1_view2,
                    tf.reverse_sequence(input_c2, input_c2_lengths_64, 1),
                    dtype=tf.float32,
                    sequence_length=input_c2_lengths)
            r2l_outputs_layer1_view2 = tf.reverse_sequence(
                r2l_outputs_layer1_view2, input_c2_lengths_64, 1)

            # View 2 Layer 2 c2
            input_c2_layer2 = tf.concat(
                2, [l2r_outputs_layer1_view2, r2l_outputs_layer1_view2],
                'concat_layer1_view2_c2')

            if is_training and kp < 1:
                input_c2_layer2 = tf.nn.dropout(input_c2_layer2, keep_prob=kp)

            with tf.variable_scope('l2r_layer2_view2', reuse=True):
                l2r_outputs_layer2_view2, _ = tf.nn.dynamic_rnn(
                    l2r_cell_layer2_view2,
                    input_c2_layer2,
                    dtype=tf.float32,
                    sequence_length=input_c2_lengths)
            with tf.variable_scope('r2l_layer2_view2', reuse=True):
                r2l_outputs_layer2_view2, _ = tf.nn.dynamic_rnn(
                    r2l_cell_layer2_view2,
                    tf.reverse_sequence(input_c2_layer2, input_c2_lengths_64,
                                        1),
                    dtype=tf.float32,
                    sequence_length=input_c2_lengths)

            l2r_outputs_view2 = tf.gather(
                tf.reshape(tf.concat(1, l2r_outputs_layer2_view2),
                           [-1, hidden_size]),
                tf.range(tf.shape(input_c2)[0]) * tf.shape(input_c2)[1] +
                input_c2_lengths - 1)
            r2l_outputs_view2 = tf.gather(
                tf.reshape(tf.concat(1, r2l_outputs_layer2_view2),
                           [-1, hidden_size]),
                tf.range(tf.shape(input_c2)[0]) * tf.shape(input_c2)[1] +
                input_c2_lengths - 1)
            c2 = self.normalization(
                tf.concat(1, [l2r_outputs_view2, r2l_outputs_view2],
                          'concat_view2_c2'))

        num_objectives = len(obj)
        loss = 0
        if 0 in obj:
            loss += self.contrastive_loss(margin, x1, c1, c2)
        if 1 in obj:
            loss += self.contrastive_loss(margin, c1, x1, c2)
        if 2 in obj:
            loss += self.contrastive_loss(margin, c1, x1, x2)
        if 3 in obj:
            loss += self.contrastive_loss(margin, x1, c1, x2)
        loss /= num_objectives
        self._loss = loss
        self._train_step = tf.train.AdamOptimizer(lr).minimize(loss)
コード例 #55
0
    def _construct_network(self,
                           a_input,
                           a_seqlens,
                           n_samples,
                           p_input,
                           p_seqlens,
                           maxlen,
                           p_ids,
                           batch_size,
                           is_training=False,
                           run_prompt_encoder=False,
                           keep_prob=1.0):
        """ Construct RNNLM network
        Args:
          ?
        Returns:
          predictions, probabilities, logits, attention
        """

        L2 = self.network_architecture['L2']
        initializer = self.network_architecture['initializer']

        # Question Encoder RNN
        with tf.variable_scope('Embeddings',
                               initializer=initializer(self._seed)) as scope:
            embedding = slim.model_variable(
                'word_embedding',
                trainable=False,
                shape=[
                    self.network_architecture['n_in'],
                    self.network_architecture['n_ehid']
                ],
                initializer=tf.truncated_normal_initializer(stddev=0.1),
                regularizer=slim.l2_regularizer(L2),
                device='/GPU:0')
            a_inputs = tf.nn.dropout(tf.nn.embedding_lookup(
                embedding, a_input, name='embedded_data'),
                                     keep_prob=keep_prob,
                                     seed=self._seed + 1)
            p_inputs = tf.nn.dropout(tf.nn.embedding_lookup(
                embedding, p_input, name='embedded_data'),
                                     keep_prob=keep_prob,
                                     seed=self._seed + 2)

            p_inputs_fw = tf.transpose(p_inputs, [1, 0, 2])
            p_inputs_bw = tf.transpose(
                tf.reverse_sequence(p_inputs,
                                    seq_lengths=p_seqlens,
                                    seq_axis=1,
                                    batch_axis=0), [1, 0, 2])

            a_inputs_fw = tf.transpose(a_inputs, [1, 0, 2])
            a_inputs_bw = tf.transpose(
                tf.reverse_sequence(a_inputs,
                                    seq_lengths=a_seqlens,
                                    seq_axis=1,
                                    batch_axis=0), [1, 0, 2])

        if run_prompt_encoder == True:
            # Prompt Encoder RNN
            with tf.variable_scope('RNN_Q_FW',
                                   initializer=initializer(
                                       self._seed)) as scope:
                rnn_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                    num_units=self.network_architecture['n_phid'])
                _, state_fw = rnn_fw(p_inputs_fw,
                                     sequence_length=p_seqlens,
                                     dtype=tf.float32)
            with tf.variable_scope('RNN_Q_BW',
                                   initializer=initializer(
                                       self._seed)) as scope:
                rnn_bw = tf.contrib.rnn.LSTMBlockFusedCell(
                    num_units=self.network_architecture['n_phid'])
                _, state_bw = rnn_bw(p_inputs_bw,
                                     sequence_length=p_seqlens,
                                     dtype=tf.float32)

            prompt_embeddings = tf.concat([state_fw[1], state_bw[1]], axis=1)
            prompt_embeddings = tf.nn.dropout(prompt_embeddings,
                                              keep_prob=keep_prob,
                                              seed=self._seed)

        else:
            prompt_embeddings = tf.nn.dropout(self.prompt_embeddings,
                                              keep_prob=keep_prob,
                                              seed=self._seed)

        with tf.variable_scope('RNN_KEY_FW',
                               initializer=initializer(self._seed)) as scope:
            rnn_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                num_units=self.network_architecture['n_phid'])
            _, state_fw = rnn_fw(p_inputs_fw,
                                 sequence_length=p_seqlens,
                                 dtype=tf.float32)
        with tf.variable_scope('RNN_KEY_BW',
                               initializer=initializer(self._seed)) as scope:
            rnn_bw = tf.contrib.rnn.LSTMBlockFusedCell(
                num_units=self.network_architecture['n_phid'])
            _, state_bw = rnn_bw(p_inputs_bw,
                                 sequence_length=p_seqlens,
                                 dtype=tf.float32)

        keys = tf.nn.dropout(tf.concat([state_fw[1], state_bw[1]], axis=1),
                             keep_prob=keep_prob,
                             seed=self._seed + 10)

        with tf.variable_scope('PROMPT_ATN',
                               initializer=initializer(self._seed)) as scope:
            # Compute Attention over known questions
            mems = slim.fully_connected(
                prompt_embeddings,
                2 * self.network_architecture['n_phid'],
                activation_fn=None,
                weights_regularizer=slim.l2_regularizer(L2),
                scope="mem")
            mems = tf.expand_dims(mems, axis=0, name='expanded_mems')
            tkeys = slim.fully_connected(
                keys,
                2 * self.network_architecture['n_phid'],
                activation_fn=None,
                weights_regularizer=slim.l2_regularizer(L2),
                scope="tkeys")
            tkeys = tf.expand_dims(tkeys, axis=1, name='expanded_mems')
            v = slim.model_variable(
                'v',
                shape=[2 * self.network_architecture['n_phid'], 1],
                regularizer=slim.l2_regularizer(L2),
                device='/GPU:0')

            tmp = tf.nn.tanh(mems + tkeys)
            print tmp.get_shape()
            tmp = tf.nn.dropout(tf.reshape(
                tmp, shape=[-1, 2 * self.network_architecture['n_phid']]),
                                keep_prob=keep_prob,
                                seed=self._seed + 3)
            a = tf.exp(
                tf.reshape(tf.matmul(tmp, v),
                           [batch_size * (n_samples + 1), -1]))

            if is_training:
                mask = tf.where(
                    tf.equal(
                        tf.expand_dims(p_ids, axis=1),
                        tf.tile(
                            tf.expand_dims(
                                tf.range(0,
                                         self.network_architecture['n_topics'],
                                         dtype=tf.int32),
                                axis=0), [batch_size * (n_samples + 1), 1])),
                    tf.zeros(shape=[
                        batch_size * (n_samples + 1),
                        self.network_architecture['n_topics']
                    ],
                             dtype=tf.float32),
                    tf.ones(shape=[
                        batch_size * (n_samples + 1),
                        self.network_architecture['n_topics']
                    ],
                            dtype=tf.float32))
                a = a * mask

            attention = a / tf.reduce_sum(a, axis=1, keep_dims=True)
            attended_prompt_embedding = tf.matmul(attention, prompt_embeddings)

        # Response Encoder RNN
        with tf.variable_scope('RNN_A_FW',
                               initializer=initializer(self._seed)) as scope:
            rnn_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                num_units=self.network_architecture['n_phid'])
            outputs_fw, _ = rnn_fw(a_inputs_fw,
                                   sequence_length=a_seqlens,
                                   dtype=tf.float32)

        with tf.variable_scope('RNN_A_BW',
                               initializer=initializer(self._seed)) as scope:
            rnn_bw = tf.contrib.rnn.LSTMBlockFusedCell(
                num_units=self.network_architecture['n_phid'])
            outputs_bw, _ = rnn_bw(a_inputs_bw,
                                   sequence_length=a_seqlens,
                                   dtype=tf.float32)

        outputs = tf.concat([outputs_fw, outputs_bw], axis=2)
        outputs = tf.transpose(outputs, [1, 0, 2])
        outputs = tf.nn.dropout(outputs, keep_prob=keep_prob, seed=self._seed)

        a_seqlens = tf.tile(a_seqlens, [n_samples + 1])
        outputs = tf.tile(outputs, [1 + n_samples, 1, 1])

        hidden, attention = self._bahdanau_attention(
            memory=outputs,
            seq_lens=a_seqlens,
            maxlen=maxlen,
            query=attended_prompt_embedding,
            size=2 * self.network_architecture['n_rhid'],
            batch_size=batch_size * (n_samples + 1))

        with tf.variable_scope('Grader') as scope:
            for layer in xrange(self.network_architecture['n_flayers']):
                hidden = slim.fully_connected(
                    hidden,
                    self.network_architecture['n_fhid'],
                    activation_fn=self.network_architecture['f_activation_fn'],
                    weights_regularizer=slim.l2_regularizer(L2),
                    scope="hidden_layer_" + str(layer))
                hidden = tf.nn.dropout(hidden,
                                       keep_prob=keep_prob,
                                       seed=self._seed + layer)

            logits = slim.fully_connected(hidden,
                                          self.network_architecture['n_out'],
                                          activation_fn=None,
                                          scope="output_layer")
            probabilities = self.network_architecture['output_fn'](logits)
            predictions = tf.cast(tf.round(probabilities), dtype=tf.float32)

        return predictions, probabilities, logits, attention
コード例 #56
0
def reverse_sequence(x):
    x, mask, seq_len = x
    return tf.reverse_sequence(x, seq_len, seq_dim=1)
コード例 #57
0
ファイル: lm.py プロジェクト: deepmipt/lm
    def __init__(self,
                 vocab_size,
                 tok_emb_mat,
                 emb_dim=256,
                 n_hidden=512,
                 n_layers=1,
                 n_unroll=70,
                 model_name='test_model',
                 gpu=1,
                 bidirectional=False,
                 dropout_keep_prob=0.7):
        tf.reset_default_graph()
        self.learning_rate_ph = tf.placeholder(dtype=tf.float32,
                                               shape=[],
                                               name='lr')
        self._dropout_ph = tf.placeholder_with_default(1.0,
                                                       shape=[],
                                                       name='drop')
        self.tok_ph = tf.placeholder(dtype=tf.int32,
                                     shape=[None, None],
                                     name='tok_idxs')
        self.mask_ph = tf.placeholder_with_default(tf.ones_like(
            self.tok_ph, dtype=tf.float32),
                                                   shape=[None, None])

        self.model_name = model_name
        self.vocab_size = vocab_size
        self.n_unroll = n_unroll
        self.dropout_keep_prob = dropout_keep_prob

        # Embeddings
        emb_mat = tf.Variable(tok_emb_mat,
                              name='Embeddings_Mat',
                              trainable=True)
        embs = tf.nn.embedding_lookup(emb_mat, self.tok_ph)

        # Forward LSTM
        with tf.variable_scope('Forward'):
            units = embs[:, :-1, :]
            units = self._variational_dropout(units, self._dropout_ph)
            for n in range(n_layers):
                with tf.variable_scope('LSTM_' + str(n)):
                    units, _ = cudnn_lstm(units, n_hidden)
                    if n != n_layers - 1:
                        units = self._variational_dropout(
                            units, self._dropout_ph)
            if n_hidden != emb_dim:
                units = tf.layers.dense(units,
                                        emb_dim,
                                        name='Output_Projection')
            units = self._variational_dropout(units, self._dropout_ph)
            logits_fw = tf.tensordot(units, emb_mat, (2, 1))
            targets = tf.one_hot(self.tok_ph, self.vocab_size)

            fw_loss = tf.losses.softmax_cross_entropy(
                targets[:, 1:, :],
                logits_fw,
                reduction=tf.losses.Reduction.NONE)
            fw_loss = self.mask_ph[:, 1:] * fw_loss

        self.loss = fw_loss

        if bidirectional:
            # Backward LSTM
            # Lengths assumed to be equal to n_unroll + n_hist
            lengths = tf.cast(tf.reduce_sum(self.mask_ph, 1), tf.int32)
            embs_bw = tf.reverse_sequence(embs,
                                          lengths,
                                          seq_axis=1,
                                          batch_axis=0)
            with tf.variable_scope('Backward'):
                units = embs_bw[:, :-1, :]
                for n in range(n_layers):
                    with tf.variable_scope('LSTM_' + str(n)):
                        units, _ = cudnn_lstm(units, n_hidden)
                        if n != n_layers - 1:
                            units = self._variational_dropout(
                                units, self._dropout_ph)
                if n_hidden != emb_dim:
                    units = tf.layers.dense(units,
                                            emb_dim,
                                            name='Output_Projection')
                units = self._variational_dropout(units, self._dropout_ph)
                logits_bw = tf.tensordot(units, emb_mat, (2, 1))
                targets_bw = tf.one_hot(
                    tf.reverse_sequence(self.tok_ph,
                                        lengths,
                                        seq_axis=1,
                                        batch_axis=0), self.vocab_size)
                bw_loss = tf.losses.softmax_cross_entropy(
                    targets_bw[:, 1:, :],
                    logits_bw,
                    reduction=tf.losses.Reduction.NONE)
                bw_loss = self.mask_ph[:, 1:] * bw_loss
                self.loss = (self.loss + bw_loss) / 2
        self.loss = tf.reduce_sum(self.loss) / tf.reduce_sum(self.mask_ph)

        # Summary
        tf.summary.scalar('log_loss', self.loss)
        self.summary = tf.summary.merge_all()

        # Predictions
        self.pred = tf.argmax(logits_fw, axis=-1)
        if bidirectional:
            self.pred_bw = tf.argmax(tf.reverse_sequence(logits_bw,
                                                         lengths,
                                                         seq_axis=1,
                                                         batch_axis=0),
                                     axis=-1)

        # Train ops
        self.train_op = self.get_train_op(self.loss,
                                          self.learning_rate_ph,
                                          clip_norm=5.0,
                                          optimizer_scope_name='Optimizer')

        #  the session
        config = tf.ConfigProto()
        config.gpu_options.visible_device_list = str(gpu)
        self.sess = tf.Session(config=config)

        # Init variables
        self.sess.run(tf.global_variables_initializer())

        self.saver = tf.train.Saver()
        # self.saver.restore(self.sess, 'model/reddit_lm.ckpt')
        self.summary_writer = tf.summary.FileWriter('model/' + self.model_name,
                                                    self.sess.graph)
コード例 #58
0
    def _build_unidi_rnn_cudnn(self, inputs, state, sequence_length, dtype,
                               hparams, num_layers, is_fwd):
        # cudnn inputs only support time-major
        if not self.time_major:
            inputs = tf.transpose(inputs, axis=[1, 0, 2])

        if num_layers == 1 and not np.isclose(hparams.dropout, 0.):
            # Special case when drop is used and only one layer
            dropout = 0.
            inputs = tf.nn.dropout(inputs, keep_prob=1 - dropout)
        else:
            dropout = hparams.dropout

        # the outputs would be in time-majored
        sequence_length = tf.transpose(sequence_length)

        if not is_fwd:
            inputs = tf.reverse_sequence(inputs,
                                         sequence_length,
                                         batch_axis=1,
                                         seq_axis=0)
        cell = tf.contrib.cudnn_rnn.CudnnLSTM(
            num_layers=num_layers,
            num_units=hparams.num_units,
            direction=cudnn_rnn.CUDNN_RNN_UNIDIRECTION,
            use_fp16=(True if self.dtype == tf.float16 else False),
            dropout=dropout)
        outputs, (h, c) = cell(inputs, initial_state=state)
        """
    # Mask outputs
    # [batch, time]
    mask = tf.sequence_mask(sequence_length, dtype=self.dtype)
    # [time, batch]
    mask = tf.transpose(mask)
    outputs *= mask
    """

        if not is_fwd:
            outputs = tf.reverse_sequence(inputs,
                                          sequence_length,
                                          batch_axis=1,
                                          seq_axis=0)
        # NOTICE! There's no way to get the "correct" masked cell state in cudnn
        # rnn.
        if num_layers == 1:
            h = tf.squeeze(h, axis=0)
            c = tf.squeeze(c, axis=0)
            return outputs, tf.nn.rnn_cell.LSTMStateTuple(c=c, h=h)

        # Split h and c to form a
        h.set_shape((num_layers, None, hparams.num_units))
        c.set_shape((num_layers, None, hparams.num_units))
        hs = tf.unstack(h)
        cs = tf.unstack(c)
        # The cell passed to bidi-dyanmic-rnn is a MultiRNNCell consisting 2 regular
        # LSTM, the state of each is a simple LSTMStateTuple. Thus the state of the
        # MultiRNNCell is a tuple of LSTMStateTuple.
        states = tuple(
            tf.nn.rnn_cell.LSTMStateTuple(c=c, h=h) for h, c in zip(hs, cs))
        # No need to transpose back
        return outputs, states
コード例 #59
0
ファイル: model.py プロジェクト: woshiyyya/QA4IE-plus
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        print("VW:", VW, "N:", N, "M:", M, "JX:", JX, "JQ:", JQ)
        JA = config.max_answer_length
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        print("VW:", VW, "N:", N, "M:", M, "JX:", JX, "JQ:", JQ)
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            # Char-CNN Embedding
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(map(int, config.out_channel_dims.split(','))) # [100]
                    heights = list(map(int, config.filter_heights.split(','))) # [5]
                    assert sum(filter_sizes) == dco, (filter_sizes, dco) # Make sure filter channels = char_cnn_out size
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
                        else:
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            # Word Embedding
            if config.use_word_emb:
                with tf.variable_scope("emb_var") as scope, tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    tf.get_variable_scope().reuse_variables()
                    self.word_emb_scope = scope
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat([word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                # Concat Char-CNN Embedding and Word Embedding
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

            # exact match
            if config.use_exact_match: # TODO: What does it mean?
                emx = tf.expand_dims(tf.cast(self.emx, tf.float32), -1)
                xx = tf.concat([xx, emx], 3)  # [N, M, JX, di+1]
                emq = tf.expand_dims(tf.cast(self.emq, tf.float32), -1)
                qq = tf.concat([qq, emq], 2)  # [N, JQ, di+1]


        # 2 layer highway network on Concat Embedding
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        # Bidirection-LSTM (3rd layer on paper)
        cell = GRUCell(d) if config.GRU else BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]
        flat_x_len = flatten(x_len, 0)  # [N * M]

        with tf.variable_scope("prepro"):
            if config.use_fused_lstm: #yes
                with tf.variable_scope("u1"):
                    fw_inputs = tf.transpose(qq, [1, 0, 2]) #[time_len, batch_size, input_size]
                    bw_inputs = tf.reverse_sequence(fw_inputs, q_len, batch_dim=1, seq_dim=0)
                    fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                    bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                    prep_fw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                    prep_bw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                    fw_outputs, fw_final = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=q_len, scope="fw")
                    bw_outputs, bw_final = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=q_len, scope="bw")
                    bw_outputs = tf.reverse_sequence(bw_outputs, q_len, batch_dim=1, seq_dim = 0)
                    current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                    output = tf.transpose(current_inputs, [1, 0, 2])
                    u = output
                flat_xx = flatten(xx, 2)  # [N * M, JX, d]
                if config.share_lstm_weights: # Yes
                    tf.get_variable_scope().reuse_variables()
                    with tf.variable_scope("u1"):
                        fw_inputs = tf.transpose(flat_xx, [1, 0, 2]) #[time_len, batch_size, input_size]
                        bw_inputs = tf.reverse_sequence(fw_inputs, flat_x_len, batch_dim=1, seq_dim=0)
                        # fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                        # bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                        fw_outputs, fw_final = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="fw")
                        bw_outputs, bw_final = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="bw")
                        bw_outputs = tf.reverse_sequence(bw_outputs, flat_x_len, batch_dim=1, seq_dim=0)
                        current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                        output = tf.transpose(current_inputs, [1, 0, 2])
                else: # No
                    with tf.variable_scope("h1"):
                        fw_inputs = tf.transpose(flat_xx, [1, 0, 2]) #[time_len, batch_size, input_size]
                        bw_inputs = tf.reverse_sequence(fw_inputs, flat_x_len, batch_dim=1, seq_dim=0)
                        # fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                        # bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                        prep_fw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        prep_bw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        fw_outputs, fw_final = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="fw")
                        bw_outputs, bw_final = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="bw")
                        bw_outputs = tf.reverse_sequence(bw_outputs, flat_x_len, batch_dim=1, seq_dim=0)
                        current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                        output = tf.transpose(current_inputs, [1, 0, 2])
                h = tf.expand_dims(output, 1) # [N, M, JX, 2d]
            else:
                (fw_u, bw_u), _ = bidirectional_dynamic_rnn(d_cell, d_cell, qq, q_len, dtype='float', scope='u1')  # [N, J, d], [N, d]
                u = tf.concat([fw_u, bw_u], 2)
                if config.share_lstm_weights:
                    tf.get_variable_scope().reuse_variables()
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='u1')  # [N, M, JX, 2d]
                    h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
                else:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='h1')  # [N, M, JX, 2d]
                    h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u # hidden state of Q = u
            self.tensor_dict['h'] = h # hidden state of C = h

        # Attention Flow Layer (4th layer on paper)
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell = AttentionCell(cell, u, size=d, mask=q_mask, mapper='sim',
                                           input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
                first_cell = d_cell # a GRU cell with dropout wrapper
            tp0 = p0 # Output of Attention layer

        # Modeling layer (5th layer on paper)
        with tf.variable_scope('modeling_layer'):
            if config.use_fused_lstm:
                g1, encoder_state_final = build_fused_bidirectional_rnn(inputs=p0,
                                                                        num_units=config.hidden_size,
                                                                        num_layers=config.num_modeling_layers,
                                                                        inputs_length=flat_x_len,
                                                                        input_keep_prob=config.input_keep_prob,
                                                                        scope='modeling_layer_g')

            else:
                for layer_idx in range(config.num_modeling_layers-1):
                    (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell, first_cell, p0, x_len,
                                                                  dtype='float', scope="g_{}".format(layer_idx))  # [N, M, JX, 2d]
                    p0 = tf.concat([fw_g0, bw_g0], 3)
                (fw_g1, bw_g1), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(first_cell, first_cell, p0, x_len,
                                                                             dtype='float', scope='g1')  # [N, M, JX, 2d]
                g1 = tf.concat([fw_g1, bw_g1], 3)  # [N, M, JX, 2d]

        # Self match layer
        if config.use_self_match:
            s0 = tf.reshape(g1, [N * M, JX, 2 * d])                     # [N * M, JX, 2d]
            x_mask = tf.reshape(self.x_mask, [N * M, JX])               # [N * M, JX]
            if config.use_static_self_match:
                with tf.variable_scope("StaticSelfMatch"):              # implemented follow r-net section 3.3
                    W_x_Vj = tf.contrib.layers.fully_connected(         # [N * M, JX, d]
                        s0, int(d / 2), scope='row_first',
                        activation_fn=None, biases_initializer=None
                    )
                    W_x_Vt = tf.contrib.layers.fully_connected(         # [N * M, JX, d]
                        s0, int(d / 2), scope='col_first',
                        activation_fn=None, biases_initializer=None
                    )
                    sum_rc = tf.add(                                    # [N * M, JX, JX, d]
                        tf.expand_dims(W_x_Vj, 1),
                        tf.expand_dims(W_x_Vt, 2)
                    )
                    v = tf.get_variable('second', shape=[1, 1, 1, int(d / 2)], dtype=tf.float32)
                    Sj = tf.reduce_sum(tf.multiply(v, tf.tanh(sum_rc)), -1)     # [N * M, JX, JX]
                    Ai = softmax(Sj, mask = tf.expand_dims(x_mask, 1))          # [N * M, JX, JX]
                    Ai = tf.expand_dims(Ai, -1)                                 # [N * M, JX, JX, 1]
                    Vi = tf.expand_dims(s0, 1)                                  # [N * M, 1, JX, 2d]
                    Ct = tf.reduce_sum(                                         # [N * M, JX, 2d]
                        tf.multiply(Ai, Vi),
                        axis = 2
                    )
                    inputs_Vt_Ct = tf.concat([s0, Ct], 2)                       # [N * M, JX, 4d]
                    if config.use_fused_lstm:
                        fw_inputs = tf.transpose(inputs_Vt_Ct, [1, 0, 2])  # [time_len, batch_size, input_size]
                        bw_inputs = tf.reverse_sequence(fw_inputs, flat_x_len, batch_dim=1, seq_dim=0)
                        fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                        bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                        prep_fw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        prep_bw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        fw_outputs, fw_s_f = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=flat_x_len,
                                                            scope="fw")
                        bw_outputs, bw_s_f = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=flat_x_len,
                                                            scope="bw")
                        fw_s_f = LSTMStateTuple(c=fw_s_f[0], h=fw_s_f[1])
                        bw_s_f = LSTMStateTuple(c=bw_s_f[0], h=bw_s_f[1])
                        bw_outputs = tf.reverse_sequence(bw_outputs, flat_x_len, batch_dim=1, seq_dim=0)
                        current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                        s1 = tf.transpose(current_inputs, [1, 0, 2])
                    else:
                        (fw_s, bw_s), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(first_cell, first_cell, inputs_Vt_Ct,
                                                                                   flat_x_len, dtype='float',
                                                                                   scope='s')  # [N, M, JX, 2d]
                        s1 = tf.concat([fw_s, bw_s], 2)  # [N * M, JX, 2d], M == 1
            else:
                with tf.variable_scope("DynamicSelfMatch"):
                    first_cell = AttentionCell(cell, s0, size=d, mask=x_mask, is_train=self.is_train)
                    (fw_s, bw_s), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(first_cell, first_cell, s0, x_len,
                                                                               dtype='float', scope='s')  # [N, M, JX, 2d]
                    s1 = tf.concat([fw_s, bw_s], 2)  # [N * M, JX, 2d], M == 1
            g1 = tf.expand_dims(s1, 1) # [N, M, JX, 2d]

        # prepare for PtrNet
        encoder_output = g1  # [N, M, JX, 2d]
        encoder_output = tf.expand_dims(tf.cast(self.x_mask, tf.float32), -1) * encoder_output  # [N, M, JX, 2d]

        if config.use_self_match or not config.use_fused_lstm:
            if config.GRU:
                encoder_state_final = tf.concat((fw_s_f, bw_s_f), 1, name='encoder_concat')
            else:
                if isinstance(fw_s_f, LSTMStateTuple):
                    encoder_state_c = tf.concat(
                        (fw_s_f.c, bw_s_f.c), 1, name='encoder_concat_c')
                    encoder_state_h = tf.concat(
                        (fw_s_f.h, bw_s_f.h), 1, name='encoder_concat_h')
                    encoder_state_final = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
                elif isinstance(fw_s_f, tf.Tensor):
                    encoder_state_final = tf.concat((fw_s_f, bw_s_f), 1, name='encoder_concat')
                else:
                    encoder_state_final = None
                    tf.logging.error("encoder_state_final not set")

        print("encoder_state_final:", encoder_state_final)

        with tf.variable_scope("output"):
            # eos_symbol = config.eos_symbol
            # next_symbol = config.next_symbol

            tf.assert_equal(M, 1)  # currently dynamic M is not supported, thus we assume M==1
            answer_string = tf.placeholder(
                shape=(N, 1, JA + 1),
                dtype=tf.int32,
                name='answer_string'
            )  # [N, M, JA + 1]
            answer_string_mask = tf.placeholder(
                shape=(N, 1, JA + 1),
                dtype=tf.bool,
                name='answer_string_mask'
            )  # [N, M, JA + 1]
            answer_string_length = tf.placeholder(
                shape=(N, 1),
                dtype=tf.int32,
                name='answer_string_length',
            ) # [N, M]
            self.tensor_dict['answer_string'] = answer_string
            self.tensor_dict['answer_string_mask'] = answer_string_mask
            self.tensor_dict['answer_string_length'] = answer_string_length
            self.answer_string = answer_string
            self.answer_string_mask = answer_string_mask
            self.answer_string_length = answer_string_length

            answer_string_flattened = tf.reshape(answer_string, [N * M, JA + 1])
            self.answer_string_flattened = answer_string_flattened  # [N * M, JA+1]
            print("answer_string_flattened:", answer_string_flattened)

            answer_string_length_flattened = tf.reshape(answer_string_length, [N * M])
            self.answer_string_length_flattened = answer_string_length_flattened  # [N * M]
            print("answer_string_length_flattened:", answer_string_length_flattened)

            decoder_cell = GRUCell(2 * d) if config.GRU else BasicLSTMCell(2 * d, state_is_tuple=True)

            with tf.variable_scope("Decoder"):
                decoder_train_logits = ptr_decoder(decoder_cell,
                                                   tf.reshape(tp0, [N * M, JX, 2 * d]),  # [N * M, JX, 2d]
                                                   tf.reshape(encoder_output, [N * M, JX, 2 * d]),  # [N * M, JX, 2d]
                                                   flat_x_len,
                                                   encoder_final_state=encoder_state_final,
                                                   max_encoder_length=config.sent_size_th,
                                                   decoder_output_length=answer_string_length_flattened,  # [N * M]
                                                   batch_size=N,  # N * M (M=1)
                                                   attention_proj_dim=self.config.decoder_proj_dim,
                                                   scope='ptr_decoder')  # [batch_size, dec_len*, enc_seq_len + 1]

                self.decoder_train_logits = decoder_train_logits
                print("decoder_train_logits:", decoder_train_logits)
                self.decoder_train_softmax = tf.nn.softmax(self.decoder_train_logits)
                self.decoder_inference = tf.argmax(decoder_train_logits, axis=2,
                                                   name='decoder_inference')  # [N, JA + 1]

            self.yp = tf.ones([N, M, JX], dtype=tf.int32) * -1
            self.yp2 = tf.ones([N, M, JX], dtype=tf.int32) * -1
コード例 #60
0
ファイル: lemmatag.py プロジェクト: Hyperparticle/LemmaTag
    def construct(self, args, num_words, num_chars, lem_num_chars, num_tags, num_senses, bow, eow):
        with self.session.graph.as_default():
            # Training params
            self.is_training = tf.placeholder(tf.bool, [])
            self.learning_rate = tf.placeholder(tf.float32, [], name="learning_rate")

            # Sentence lengths
            self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens")
            # Number of output words
            self.words_count = tf.reduce_sum(self.sentence_lens)
            words_count = self.words_count
            # Map sentences -> word list
            self.word_indexes = tf.placeholder(tf.int32, [None, 2], name='word_indexes')

            # Tag data
            self.tags = tf.placeholder(tf.int32, [None, None, len(num_tags)], name="tags")

            # Form IDs and charseqs
            self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids")
            self.charseqs = tf.placeholder(tf.int32, [None, None], name="charseqs")
            self.charseq_lens = tf.placeholder(tf.int32, [None], name="charseq_lens")
            self.charseq_ids = tf.placeholder(tf.int32, [None, None], name="charseq_ids")

            # Lemma charseqs
            self.target_senses = tf.placeholder(tf.int32, [None, None], name="target_senses")
            self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids")
            self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs")
            self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens")

            # Sentence weights
            weights = tf.sequence_mask(self.sentence_lens, dtype=tf.float32)
            sum_weights = tf.reduce_sum(weights)

            # Source forms lengths (in sentences and by words/lemmas)
            sentence_form_len = tf.nn.embedding_lookup(self.charseq_lens, self.charseq_ids)
            word_form_len = tf.gather_nd(sentence_form_len, self.word_indexes)

            # Target sequences for words
            _target_seq_lens = tf.nn.embedding_lookup(self.target_seq_lens, self.target_ids) # 2D
            _target_seqs = tf.nn.embedding_lookup(self.target_seqs, self.target_ids)
            # Flattened to word-list
            target_lens = tf.gather_nd(_target_seq_lens, self.word_indexes)
            target_seqs = tf.gather_nd(_target_seqs, self.word_indexes)
            target_senses = tf.gather_nd(self.target_senses, self.word_indexes)
            # Add eow at the end
            target_seqs = tf.reverse_sequence(target_seqs, target_lens, 1)
            target_seqs = tf.pad(target_seqs, [[0, 0], [1, 0]], constant_values=eow)
            target_lens = target_lens + 1
            target_seqs = tf.reverse_sequence(target_seqs, target_lens, 1)

            # RNN Cell
            if args.rnn_cell == "LSTM":
                rnn_cell = tf.nn.rnn_cell.LSTMCell
            elif args.rnn_cell == "GRU":
                rnn_cell = tf.nn.rnn_cell.GRUCell
            else:
                raise ValueError("Unknown rnn_cell {}".format(args.rnn_cell))

            # Encoder
            enc_out = encoder_network(self.word_indexes, self.word_ids, self.charseqs, self.charseq_ids,
                                      self.charseq_lens, self.sentence_lens, num_words, num_chars, args.we_dim,
                                      args.cle_dim, rnn_cell, args.rnn_cell_dim, args.rnn_layers, args.dropout,
                                      self.is_training, args.separate_embed, args.separate_rnn)
            rnn_inputs_tags, word_rnn_outputs, sentence_rnn_outputs_tags, word_cle_states, word_cle_outputs = enc_out

            # Tagger
            loss_tag, tag_outputs, self.predictions, correct_tag, correct_tags_compositional = tag_decoder(
                self.tags, sentence_rnn_outputs_tags, weights, sum_weights, num_tags, args.tags, args.label_smoothing)

            # Tagger features for lemmatizer
            tag_feats = tag_features(tag_outputs, self.word_indexes, words_count, args.rnn_cell_dim, args.dropout,
                                        self.is_training, args.no_tags_to_lemmas, args.tag_signal_dropout)

            self.current_accuracy_tag, self.update_accuracy_tag = tf.metrics.mean(correct_tag, weights=sum_weights)
            self.current_accuracy_tags_compositional, self.update_accuracy_tags_compositional = tf.metrics.mean(
                correct_tags_compositional)

            # Lemmatizer
            loss_lem, predictions = lemma_decoder(word_rnn_outputs, tag_feats, word_cle_states, word_cle_outputs,
                                                  word_form_len, target_seqs, target_lens, self.charseq_lens,
                                                  words_count, lem_num_chars, rnn_cell, args.rnn_cell,
                                                  args.rnn_cell_dim, args.cle_dim, args.beams, args.beam_len_penalty,
                                                  args.lem_smoothing, bow, eow)
            self.lemma_predictions_training, self.lemma_predictions, self.lemma_prediction_lengths = predictions

            # Lemmatizer sense predictor
            loss_sense, self.sense_prediction = sense_predictor(word_rnn_outputs, tag_feats, target_senses, num_senses,
                                                                words_count, args.predict_sense, args.sense_smoothing)

            # Lemma predictions, loss and accuracy
            self._lemma_stats(target_seqs, target_lens, target_senses)

            # Loss, training and gradients
            # Compute combined weighted loss on tags and lemmas
            loss = loss_tag + loss_lem * args.loss_lem_w + loss_sense * args.loss_sense_w
            self.global_step = tf.train.create_global_step()
            self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(self.update_ops):
                optimizer = tf.contrib.opt.LazyAdamOptimizer(learning_rate=self.learning_rate, beta2=args.beta_2)
                gradients, variables = zip(*optimizer.compute_gradients(loss))
                self.gradient_norm = tf.global_norm(gradients)
                if args.grad_clip:
                    gradients, _ = tf.clip_by_global_norm(gradients, args.grad_clip)
                self.training = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step, name="training")

            # Saver
            self.saver = tf.train.Saver(max_to_keep=2)

            # Summaries
            self.current_loss_tag, self.update_loss_tag = tf.metrics.mean(loss_tag, weights=sum_weights)
            self.current_loss_lem, self.update_loss_lem = tf.metrics.mean(loss_lem, weights=sum_weights)
            self.current_loss_sense, self.update_loss_sense = tf.metrics.mean(loss_sense, weights=sum_weights)
            self.current_loss, self.update_loss = tf.metrics.mean(loss, weights=sum_weights)
            self.reset_metrics = tf.variables_initializer(tf.get_collection(tf.GraphKeys.METRIC_VARIABLES))

            summary_writer = tf.contrib.summary.create_file_writer(args.logdir, flush_millis=1 * 1000)
            self.summaries = {}
            with summary_writer.as_default(), tf.contrib.summary.record_summaries_every_n_global_steps(1):
                self.summaries["train"] = [tf.contrib.summary.scalar("train/loss_tag", self.update_loss_tag),
                                           tf.contrib.summary.scalar("train/loss_sense", self.update_loss_sense),
                                           tf.contrib.summary.scalar("train/loss_lem", self.update_loss_lem),
                                           tf.contrib.summary.scalar("train/loss", self.update_loss),
                                           tf.contrib.summary.scalar("train/gradient", self.gradient_norm),
                                           tf.contrib.summary.scalar("train/accuracy_tag", self.update_accuracy_tag),
                                           tf.contrib.summary.scalar("train/accuracy_compositional_tags", self.update_accuracy_tags_compositional),
                                           tf.contrib.summary.scalar("train/accuracy_lem", self.update_accuracy_lem_train),
                                           tf.contrib.summary.scalar("train/accuracy_lemsense", self.update_accuracy_lemsense_train),
                                           tf.contrib.summary.scalar("train/learning_rate", self.learning_rate)]
            with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
                for dataset in ["dev", "test"]:
                    self.summaries[dataset] = [tf.contrib.summary.scalar(dataset + "/loss", self.current_loss),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_tag", self.current_accuracy_tag),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_compositional_tags", self.current_accuracy_tags_compositional),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_lem", self.current_accuracy_lem),
                                               tf.contrib.summary.scalar(dataset + "/accuracy_lemsense", self.current_accuracy_lemsense)]

            # Initialize variables
            self.session.run(tf.global_variables_initializer())
            with summary_writer.as_default():
                tf.contrib.summary.initialize(session=self.session, graph=self.session.graph)