Esempio n. 1
0
def cudnn_bidirectional_lstm(cells_fw, cells_bw, inputs, length, is_training):
    """Implements stacked bidirectional LSTM for variable-length inputs."""
    inputs_fw = tf.transpose(inputs, [1, 0, 2])
    for lstm_fw, lstm_bw in zip(cells_fw, cells_bw):
        outputs_fw, _ = lstm_fw(inputs_fw, training=is_training)
        inputs_bw = tf.reverse_sequence(inputs_fw,
                                        length,
                                        seq_axis=0,
                                        batch_axis=1)
        outputs_bw, _ = lstm_bw(inputs_bw, training=is_training)
        outputs_bw = tf.reverse_sequence(outputs_bw,
                                         length,
                                         seq_axis=0,
                                         batch_axis=1)
        inputs_fw = tf.concat([outputs_fw, outputs_bw], axis=2)
    return outputs_fw, outputs_bw
Esempio n. 2
0
def lstm_seq2seq_internal(inputs, targets, hparams, train):
    """The basic LSTM seq2seq model, main step used for training."""
    with tf.variable_scope("lstm_seq2seq"):
        if inputs is not None:
            inputs_length = common_layers.length_from_embedding(inputs)
            # Flatten inputs.
            inputs = common_layers.flatten4d3d(inputs)

            # LSTM encoder.
            inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
            _, final_encoder_state = lstm(inputs, inputs_length, hparams,
                                          train, "encoder")
        else:
            final_encoder_state = None

        # LSTM decoder.
        shifted_targets = common_layers.shift_right(targets)
        # Add 1 to account for the padding added to the left from shift_right
        targets_length = common_layers.length_from_embedding(
            shifted_targets) + 1
        decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets),
                                  targets_length,
                                  hparams,
                                  train,
                                  "decoder",
                                  initial_state=final_encoder_state)
        return tf.expand_dims(decoder_outputs, axis=2)
Esempio n. 3
0
def _single_lstm(input_emb, input_len, hidden_size, is_fwd, use_cudnn):
  """Compute the outputs of a single LSTM (subroutine of stacked_bilstm).

  Be careful if used anywhere outside of stacked_bilstm, which converts the
  sequences to the time-major format expected by this function.

  Args:
    input_emb: <float32> [sequence_length, batch_size, emb]
    input_len: <int32> [batch_size]
    hidden_size: Number of units in the LSTM cell.
    is_fwd: Boolean indicator the directionality of the LSTM.
    use_cudnn: Boolean indicating the use of cudnn.

  Returns:
    output_emb: <float32> [sequence_length, batch_size, emb]
  """
  if not is_fwd:
    input_emb = tf.reverse_sequence(
        input_emb,
        input_len,
        seq_axis=0,
        batch_axis=1)
  if use_cudnn:
    lstm = contrib_cudnn_rnn.CudnnLSTM(
        num_layers=1,
        num_units=hidden_size,
        input_mode=cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE,
        direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION)
    lstm.build(input_emb.shape)
    output_emb, _ = lstm(input_emb)
  else:
    cell = contrib_cudnn_rnn.CudnnCompatibleLSTMCell(hidden_size)
    cell = contrib_rnn.MultiRNNCell([cell])
    output_emb, _ = tf.nn.dynamic_rnn(
        cell=cell,
        inputs=input_emb,
        sequence_length=input_len,
        dtype=tf.float32,
        time_major=True)
  if not is_fwd:
    output_emb = tf.reverse_sequence(
        output_emb,
        input_len,
        seq_axis=0,
        batch_axis=1)
  return output_emb
Esempio n. 4
0
 def build_graph(parameters):
     """Build the graph for reverse_sequence tests."""
     input_value = tf.compat.v1.placeholder(dtype=parameters["input_dtype"],
                                            name="input",
                                            shape=parameters["input_shape"])
     outs = tf.reverse_sequence(input_value,
                                seq_lengths=parameters["seq_lengths"],
                                batch_axis=parameters["batch_axis"],
                                seq_axis=parameters["seq_axis"])
     return [input_value], [outs]
Esempio n. 5
0
def birnn(cell,
          inputs,
          sequence_length,
          initial_state_fw=None,
          initial_state_bw=None,
          ff_keep_prob=1.,
          recur_keep_prob=1.,
          enforce_dropout=False,
          dtype=tf.float32,
          scope=None):
    """ """

    # Forward direction
    with tf.variable_scope(scope or 'BiRNN_FW') as fw_scope:
        output_fw, output_state_fw = rnn(cell,
                                         inputs,
                                         sequence_length,
                                         initial_state_fw,
                                         ff_keep_prob,
                                         recur_keep_prob,
                                         enforce_dropout,
                                         dtype,
                                         scope=fw_scope)

    # Backward direction
    rev_inputs = tf.reverse_sequence(inputs, sequence_length, 1, 0)
    with tf.variable_scope(scope or 'BiRNN_BW') as bw_scope:
        output_bw, output_state_bw = rnn(cell,
                                         rev_inputs,
                                         sequence_length,
                                         initial_state_bw,
                                         ff_keep_prob,
                                         recur_keep_prob,
                                         enforce_dropout,
                                         dtype,
                                         scope=bw_scope)
    output_bw = tf.reverse_sequence(output_bw, sequence_length, 1, 0)
    # Concat each of the forward/backward outputs
    outputs = tf.concat([output_fw, output_bw], 2)

    return outputs, tf.tuple([output_state_fw, output_state_bw])
Esempio n. 6
0
    def testScanSumEquivalenceWithSeqLen(self):
        with self.test_session() as sess:
            sequence_lengths = [0, 2]
            bootstrap = tf.constant([0.5, 1.5], dtype=tf.float32)

            sequence = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
            decays = [[.1, .2, .3, .4, .5], [.6, .7, .8, .9, .10]]

            eq_sequence = [[0, 0, 0, 0, 0], [6, 7, 0, 0, 0]]
            eq_decays = [[0, 0, 0, 0, 0], [.6, .7, 0, 0, 0]]

            eq_reverse_sequence = [[0, 0, 0, 0, 0], [7, 6, 0, 0, 0]]
            eq_reverse_decays = [[0, 0, 0, 0, 0], [.7, .6, 0, 0, 0]]

            # We use transpose because it is easier to define the input data in
            # BxT (batch x time) form, while scan_discounted_sum assumes TxB form.
            sequence_in = tf.transpose(tf.constant(sequence, dtype=tf.float32))
            decays_in = tf.transpose(tf.constant(decays, dtype=tf.float32))
            eq_sequence_in = tf.transpose(
                tf.constant(eq_sequence, dtype=tf.float32))
            eq_decays_in = tf.transpose(
                tf.constant(eq_decays, dtype=tf.float32))
            eq_reverse_sequence_in = tf.transpose(
                tf.constant(eq_reverse_sequence, dtype=tf.float32))
            eq_reverse_decays_in = tf.transpose(
                tf.constant(eq_reverse_decays, dtype=tf.float32))

            eq_result = sequence_ops.scan_discounted_sum(
                sequence_in,
                decays_in,
                bootstrap,
                reverse=False,
                sequence_lengths=sequence_lengths)
            exp_eq_result = sequence_ops.scan_discounted_sum(
                eq_sequence_in, eq_decays_in, bootstrap)

            eq_reverse_result = sequence_ops.scan_discounted_sum(
                sequence_in,
                decays_in,
                bootstrap,
                reverse=True,
                sequence_lengths=sequence_lengths)
            exp_eq_reverse_result = sequence_ops.scan_discounted_sum(
                eq_reverse_sequence_in, eq_reverse_decays_in, bootstrap)
            exp_eq_reverse_result = tf.reverse_sequence(exp_eq_reverse_result,
                                                        sequence_lengths,
                                                        seq_axis=0,
                                                        batch_axis=1)

            self.assertAllClose(sess.run(eq_result), sess.run(exp_eq_result))
            self.assertAllClose(sess.run(eq_reverse_result),
                                sess.run(exp_eq_reverse_result))
Esempio n. 7
0
 def body(self, features):
     if self._hparams.initializer == "orthogonal":
         raise ValueError("LSTM models fail with orthogonal initializer.")
     train = self._hparams.mode == tf_estimator.ModeKeys.TRAIN
     inputs = features.get("inputs")
     inputs_length = common_layers.length_from_embedding(inputs)
     # Flatten inputs.
     inputs = common_layers.flatten4d3d(inputs)
     # LSTM encoder.
     inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
     encoder_output, _ = lstm(inputs, inputs_length, self._hparams, train,
                              "encoder")
     return tf.expand_dims(encoder_output, axis=2)
Esempio n. 8
0
    def _reverse(self, t, lengths):
        """Time reverse the provided tensor or list of tensors.

    Assumes the top dimension is the time dimension.

    Args:
      t: 3D tensor or list of 2D tensors to be reversed
      lengths: 1D tensor of lengths, or `None`

    Returns:
      A reversed tensor or list of tensors
    """
        if isinstance(t, list):
            return list(reversed(t))
        else:
            if lengths is None:
                return tf.reverse(t, [0])
            else:
                return tf.reverse_sequence(t, lengths, 0, 1)
Esempio n. 9
0
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train,
                                    inputs_length, targets_length):
    """LSTM seq2seq model with attention, main step used for training."""
    with tf.variable_scope("lstm_seq2seq_attention"):
        # Flatten inputs.
        inputs = common_layers.flatten4d3d(inputs)

        # LSTM encoder.
        inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
        encoder_outputs, final_encoder_state = lstm(inputs, inputs_length,
                                                    hparams, train, "encoder")

        # LSTM decoder with attention.
        shifted_targets = common_layers.shift_right(targets)
        # Add 1 to account for the padding added to the left from shift_right
        targets_length = targets_length + 1
        decoder_outputs = lstm_attention_decoder(
            common_layers.flatten4d3d(shifted_targets), hparams, train,
            "decoder", final_encoder_state, encoder_outputs, inputs_length,
            targets_length)
        return tf.expand_dims(decoder_outputs, axis=2)
Esempio n. 10
0
def _reverse_seq(sequence, sequence_lengths=None):
    """Reverse sequence along dim 0.

  Args:
    sequence: Tensor of shape [T, B, ...].
    sequence_lengths: (optional) tensor of shape [B]. If `None`, only reverse
      along dim 0.

  Returns:
    Tensor of same shape as sequence with dim 0 reversed up to sequence_lengths.
  """
    if sequence_lengths is None:
        return tf.reverse(sequence, [0])

    sequence_lengths = tf.convert_to_tensor(sequence_lengths)
    with tf.control_dependencies(
        [tf.assert_equal(sequence.shape[1], sequence_lengths.shape[0])]):
        return tf.reverse_sequence(sequence,
                                   sequence_lengths,
                                   seq_axis=0,
                                   batch_axis=1)
Esempio n. 11
0
def cudnn_lstm_layer(inputs,
                     batch_size,
                     num_units,
                     lengths=None,
                     stack_size=1,
                     rnn_dropout_drop_amt=0,
                     is_training=True,
                     bidirectional=True):
    """Create a LSTM layer that uses cudnn."""
    inputs_t = tf.transpose(inputs, [1, 0, 2])
    if lengths is not None:
        all_outputs = [inputs_t]
        for i in range(stack_size):
            with tf.variable_scope('stack_' + str(i)):
                with tf.variable_scope('forward'):
                    lstm_fw = contrib_cudnn_rnn.CudnnLSTM(
                        num_layers=1,
                        num_units=num_units,
                        direction='unidirectional',
                        dropout=rnn_dropout_drop_amt,
                        kernel_initializer=contrib_layers.
                        variance_scaling_initializer(),
                        bias_initializer=tf.zeros_initializer(),
                    )

                c_fw = tf.zeros([1, batch_size, num_units], tf.float32)
                h_fw = tf.zeros([1, batch_size, num_units], tf.float32)

                outputs_fw, _ = lstm_fw(all_outputs[-1], (h_fw, c_fw),
                                        training=is_training)

                combined_outputs = outputs_fw

                if bidirectional:
                    with tf.variable_scope('backward'):
                        lstm_bw = contrib_cudnn_rnn.CudnnLSTM(
                            num_layers=1,
                            num_units=num_units,
                            direction='unidirectional',
                            dropout=rnn_dropout_drop_amt,
                            kernel_initializer=contrib_layers.
                            variance_scaling_initializer(),
                            bias_initializer=tf.zeros_initializer(),
                        )

                    c_bw = tf.zeros([1, batch_size, num_units], tf.float32)
                    h_bw = tf.zeros([1, batch_size, num_units], tf.float32)

                    inputs_reversed = tf.reverse_sequence(all_outputs[-1],
                                                          lengths,
                                                          seq_axis=0,
                                                          batch_axis=1)
                    outputs_bw, _ = lstm_bw(inputs_reversed, (h_bw, c_bw),
                                            training=is_training)

                    outputs_bw = tf.reverse_sequence(outputs_bw,
                                                     lengths,
                                                     seq_axis=0,
                                                     batch_axis=1)

                    combined_outputs = tf.concat([outputs_fw, outputs_bw],
                                                 axis=2)

                all_outputs.append(combined_outputs)

        # for consistency with cudnn, here we just return the top of the stack,
        # although this can easily be altered to do other things, including be
        # more resnet like
        return tf.transpose(all_outputs[-1], [1, 0, 2])
    else:
        lstm = contrib_cudnn_rnn.CudnnLSTM(
            num_layers=stack_size,
            num_units=num_units,
            direction='bidirectional' if bidirectional else 'unidirectional',
            dropout=rnn_dropout_drop_amt,
            kernel_initializer=contrib_layers.variance_scaling_initializer(),
            bias_initializer=tf.zeros_initializer(),
        )
        stack_multiplier = 2 if bidirectional else 1
        c = tf.zeros([stack_multiplier * stack_size, batch_size, num_units],
                     tf.float32)
        h = tf.zeros([stack_multiplier * stack_size, batch_size, num_units],
                     tf.float32)
        outputs, _ = lstm(inputs_t, (h, c), training=is_training)
        outputs = tf.transpose(outputs, [1, 0, 2])

        return outputs
Esempio n. 12
0
    def _build_lstms(self):
        # now the LSTMs
        # these will collect the initial states for the forward
        #   (and reverse LSTMs if we are doing bidirectional)

        # parse the options
        lstm_dim = self.options['lstm']['dim']
        projection_dim = self.options['lstm']['projection_dim']
        n_lstm_layers = self.options['lstm'].get('n_layers', 1)
        cell_clip = self.options['lstm'].get('cell_clip')
        proj_clip = self.options['lstm'].get('proj_clip')
        use_skip_connections = self.options['lstm']['use_skip_connections']
        # if use_skip_connections:
        #    print("USING SKIP CONNECTIONS", file=sys.stderr)
        # else:
        #    print("NOT USING SKIP CONNECTIONS", file=sys.stderr)

        # the sequence lengths from input mask
        if self.use_character_inputs:
            mask = tf.reduce_any(self.ids_placeholder > 0, axis=2)
        else:
            mask = self.ids_placeholder > 0
        sequence_lengths = tf.reduce_sum(tf.cast(mask, tf.int32), axis=1)
        batch_size = tf.shape(sequence_lengths)[0]

        # for each direction, we'll store tensors for each layer
        self.lstm_outputs = {'forward': [], 'backward': []}
        self.lstm_state_sizes = {'forward': [], 'backward': []}
        self.lstm_init_states = {'forward': [], 'backward': []}
        self.lstm_final_states = {'forward': [], 'backward': []}

        update_ops = []
        for direction in ['forward', 'backward']:
            if direction == 'forward':
                layer_input = self.embedding
            else:
                layer_input = tf.reverse_sequence(self.embedding,
                                                  sequence_lengths,
                                                  seq_axis=1,
                                                  batch_axis=0)

            for i in range(n_lstm_layers):
                if projection_dim < lstm_dim:
                    # are projecting down output
                    lstm_cell = tf.nn.rnn_cell.LSTMCell(
                        lstm_dim,
                        num_proj=projection_dim,
                        cell_clip=cell_clip,
                        proj_clip=proj_clip)
                else:
                    lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_dim,
                                                        cell_clip=cell_clip,
                                                        proj_clip=proj_clip)

                if use_skip_connections:
                    # ResidualWrapper adds inputs to outputs
                    if i == 0:
                        # don't add skip connection from token embedding to
                        # 1st layer output
                        pass
                    else:
                        # add a skip connection
                        lstm_cell = tf.nn.rnn_cell.ResidualWrapper(lstm_cell)

                # collect the input state, run the dynamic rnn, collect
                # the output
                state_size = lstm_cell.state_size
                # the LSTMs are stateful.  To support multiple batch sizes,
                # we'll allocate size for states up to max_batch_size,
                # then use the first batch_size entries for each batch
                init_states = [
                    tf.Variable(tf.zeros([self._max_batch_size, dim]),
                                trainable=False) for dim in state_size
                ]
                batch_init_states = [
                    state[:batch_size, :] for state in init_states
                ]

                if direction == 'forward':
                    i_direction = 0
                else:
                    i_direction = 1
                f_string = 'RNN_{0}/RNN/MultiRNNCell/Cell{1}'
                variable_scope_name = f_string.format(i_direction, i)
                with tf.variable_scope(variable_scope_name):
                    layer_output, final_state = tf.nn.dynamic_rnn(
                        lstm_cell,
                        layer_input,
                        sequence_length=sequence_lengths,
                        initial_state=tf.nn.rnn_cell.LSTMStateTuple(
                            *batch_init_states),
                    )

                self.lstm_state_sizes[direction].append(state_size)
                self.lstm_init_states[direction].append(init_states)
                self.lstm_final_states[direction].append(final_state)
                if direction == 'forward':
                    self.lstm_outputs[direction].append(layer_output)
                else:
                    self.lstm_outputs[direction].append(
                        tf.reverse_sequence(layer_output,
                                            sequence_lengths,
                                            seq_axis=1,
                                            batch_axis=0))

                with tf.control_dependencies([layer_output]):
                    # update the initial states
                    for in_st in range(2):
                        new_state = tf.concat([
                            final_state[in_st][:batch_size, :],
                            init_states[in_st][batch_size:, :]
                        ],
                                              axis=0)
                        state_update_op = tf.assign(init_states[in_st],
                                                    new_state)
                        update_ops.append(state_update_op)

                layer_input = layer_output

        self.mask = mask
        self.sequence_lengths = sequence_lengths
        self.update_state_op = tf.group(*update_ops)
Esempio n. 13
0
    def _build_ops(lm_graph):
        with tf.control_dependencies([lm_graph.update_state_op]):
            # get the LM embeddings
            token_embeddings = lm_graph.embedding
            layers = [tf.concat([token_embeddings, token_embeddings], axis=2)]

            n_lm_layers = len(lm_graph.lstm_outputs['forward'])
            for i in range(n_lm_layers):
                layers.append(
                    tf.concat([
                        lm_graph.lstm_outputs['forward'][i],
                        lm_graph.lstm_outputs['backward'][i]
                    ],
                              axis=-1))

            # The layers include the BOS/EOS tokens.  Remove them
            sequence_length_wo_bos_eos = lm_graph.sequence_lengths - 2
            layers_without_bos_eos = []
            for layer in layers:
                layer_wo_bos_eos = layer[:, 1:, :]
                layer_wo_bos_eos = tf.reverse_sequence(
                    layer_wo_bos_eos,
                    lm_graph.sequence_lengths - 1,
                    seq_axis=1,
                    batch_axis=0,
                )
                layer_wo_bos_eos = layer_wo_bos_eos[:, 1:, :]
                layer_wo_bos_eos = tf.reverse_sequence(
                    layer_wo_bos_eos,
                    sequence_length_wo_bos_eos,
                    seq_axis=1,
                    batch_axis=0,
                )
                layers_without_bos_eos.append(layer_wo_bos_eos)

            # concatenate the layers
            lm_embeddings = tf.concat(
                [tf.expand_dims(t, axis=1) for t in layers_without_bos_eos],
                axis=1)

            # get the mask op without bos/eos.
            # tf doesn't support reversing boolean tensors, so cast
            # to int then back
            mask_wo_bos_eos = tf.cast(lm_graph.mask[:, 1:], 'int32')
            mask_wo_bos_eos = tf.reverse_sequence(
                mask_wo_bos_eos,
                lm_graph.sequence_lengths - 1,
                seq_axis=1,
                batch_axis=0,
            )
            mask_wo_bos_eos = mask_wo_bos_eos[:, 1:]
            mask_wo_bos_eos = tf.reverse_sequence(
                mask_wo_bos_eos,
                sequence_length_wo_bos_eos,
                seq_axis=1,
                batch_axis=0,
            )
            mask_wo_bos_eos = tf.cast(mask_wo_bos_eos, 'bool')

        return {
            'lm_embeddings': lm_embeddings,
            'lengths': sequence_length_wo_bos_eos,
            'token_embeddings': lm_graph.embedding,
            'mask': mask_wo_bos_eos,
        }