def _build_bidi_rnn_fused(self, inputs, sequence_length, hparams, dtype):
    if (not np.isclose(hparams.dropout, 0.) and
        self.mode == tf.contrib.learn.ModeKeys.TRAIN):
      inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout)

    fwd_cell = block_lstm.LSTMBlockFusedCell(
        hparams.num_units, hparams.forget_bias, dtype=dtype)
    fwd_encoder_outputs, (fwd_final_c, fwd_final_h) = fwd_cell(
        inputs,
        dtype=dtype,
        sequence_length=sequence_length)

    inputs_r = tf.reverse_sequence(
        inputs, sequence_length, batch_axis=1, seq_axis=0)
    bak_cell = block_lstm.LSTMBlockFusedCell(
        hparams.num_units, hparams.forget_bias, dtype=dtype)
    bak_encoder_outputs, (bak_final_c, bak_final_h) = bak_cell(
        inputs_r,
        dtype=dtype,
        sequence_length=sequence_length)
    bak_encoder_outputs = tf.reverse_sequence(
        bak_encoder_outputs, sequence_length, batch_axis=1, seq_axis=0)
    bi_encoder_outputs = tf.concat(
        [fwd_encoder_outputs, bak_encoder_outputs], axis=-1)
    fwd_state = tf.nn.rnn_cell.LSTMStateTuple(fwd_final_c, fwd_final_h)
    bak_state = tf.nn.rnn_cell.LSTMStateTuple(bak_final_c, bak_final_h)
    bi_encoder_state = (fwd_state, bak_state)

    # mask aren't applied on outputs, but final states are post-masking.
    return bi_encoder_outputs, bi_encoder_state
  def _build_unidi_rnn_fused(self, inputs, state,
                             sequence_length, hparams, dtype):
    if (not np.isclose(hparams.dropout, 0.) and
        self.mode == tf.contrib.learn.ModeKeys.TRAIN):
      inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout)

    cell = block_lstm.LSTMBlockFusedCell(
        hparams.num_units, hparams.forget_bias, dtype=dtype)
    outputs, (final_c, final_h) = cell(
        inputs,
        state,
        dtype=dtype,
        sequence_length=sequence_length)

    # mask aren't applied on outputs, but final states are post-masking.
    return outputs, tf.nn.rnn_cell.LSTMStateTuple(final_c, final_h)
  def _build_encoder_layers_unidi(self, inputs, sequence_length,
                                  num_uni_layers, hparams, dtype):
    """Build encoder layers all at once."""
    encoder_outputs = None
    encoder_state = tuple()

    if hparams.use_fused_lstm:
      for i in range(num_uni_layers):
        if (not np.isclose(hparams.dropout, 0.) and
            self.mode == tf.contrib.learn.ModeKeys.TRAIN):
          cell_inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout)
        else:
          cell_inputs = inputs

        cell = block_lstm.LSTMBlockFusedCell(
            hparams.num_units, hparams.forget_bias, dtype=dtype)
        encoder_outputs, (final_c, final_h) = cell(
            cell_inputs,
            dtype=dtype,
            sequence_length=sequence_length)
        encoder_state += (tf.nn.rnn_cell.LSTMStateTuple(final_c, final_h),)
        if i >= num_uni_layers - self.num_encoder_residual_layers:
          # Add the pre-dropout inputs. Residual wrapper is applied after
          # dropout wrapper.
          encoder_outputs += inputs
        inputs = encoder_outputs
    elif hparams.use_cudnn_lstm:
      # Single layer cudnn rnn, dropout isnt applied in the kernel
      for i in range(num_uni_layers):
        if (not np.isclose(hparams.dropout, 0.) and
            self.mode == tf.contrib.learn.ModeKeys.TRAIN):
          inputs = tf.nn.dropout(inputs, keep_prob=1-hparams.dropout)

        encoder_outputs, encoder_states = self._build_unidi_rnn_cudnn(
            inputs,
            None,  # initial_state
            sequence_length,
            dtype,
            hparams,
            1,  # num_layer
            is_fwd=True)
        encoder_state += (tf.nn.rnn_cell.LSTMStateTuple(encoder_states.c,
                                                        encoder_states.h),)
        if i >= num_uni_layers - self.num_encoder_residual_layers:
          encoder_outputs += inputs
        inputs = encoder_outputs
    else:
      uni_cell = model_helper.create_rnn_cell(
          unit_type=hparams.unit_type,
          num_units=hparams.num_units,
          num_layers=num_uni_layers,
          num_residual_layers=self.num_encoder_residual_layers,
          forget_bias=hparams.forget_bias,
          dropout=hparams.dropout,
          dtype=dtype,
          mode=self.mode,
          single_cell_fn=self.single_cell_fn,
          use_block_lstm=hparams.use_block_lstm)

      if hparams.use_dynamic_rnn:
        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            uni_cell,
            inputs,
            dtype=dtype,
            sequence_length=sequence_length,
            time_major=self.time_major)
      else:
        encoder_outputs, encoder_state = tf.contrib.recurrent.functional_rnn(
            uni_cell,
            inputs,
            dtype=dtype,
            sequence_length=sequence_length,
            time_major=self.time_major,
            use_tpu=False)

    return encoder_state, encoder_outputs