예제 #1
0
    def _process_batch(self, inputs, initial_state):
        if not self.time_major:
            inputs = tf.transpose(inputs, perm=(1, 0, 2))
        input_h = initial_state[0]
        input_c = initial_state[1]
        input_h = tf.expand_dims(input_h, axis=0)
        input_c = tf.expand_dims(input_c, axis=0)

        params = gru_lstm_utils.canonical_to_params(
            weights=[
                self.kernel[:, : self.units],
                self.kernel[:, self.units : self.units * 2],
                self.kernel[:, self.units * 2 : self.units * 3],
                self.kernel[:, self.units * 3 :],
                self.recurrent_kernel[:, : self.units],
                self.recurrent_kernel[:, self.units : self.units * 2],
                self.recurrent_kernel[:, self.units * 2 : self.units * 3],
                self.recurrent_kernel[:, self.units * 3 :],
            ],
            biases=[
                self.bias[: self.units],
                self.bias[self.units : self.units * 2],
                self.bias[self.units * 2 : self.units * 3],
                self.bias[self.units * 3 : self.units * 4],
                self.bias[self.units * 4 : self.units * 5],
                self.bias[self.units * 5 : self.units * 6],
                self.bias[self.units * 6 : self.units * 7],
                self.bias[self.units * 7 :],
            ],
            shape=self._vector_shape,
        )

        args = {
            "input": inputs,
            "input_h": input_h,
            "input_c": input_c,
            "params": params,
            "is_training": True,
        }

        outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV2(**args)

        if self.stateful or self.return_state:
            h = h[0]
            c = c[0]
        if self.return_sequences:
            if self.time_major:
                output = outputs
            else:
                output = tf.transpose(outputs, perm=(1, 0, 2))
        else:
            output = outputs[-1]
        return output, [h, c]
예제 #2
0
  def _process_batch(self, inputs, initial_state):
    if not self.time_major:
      inputs = tf.transpose(inputs, perm=(1, 0, 2))
    input_h = initial_state[0]
    input_h = tf.expand_dims(input_h, axis=0)

    params = gru_lstm_utils.canonical_to_params(
        weights=[
            self.kernel[:, self.units:self.units * 2],
            self.kernel[:, :self.units],
            self.kernel[:, self.units * 2:],
            self.recurrent_kernel[:, self.units:self.units * 2],
            self.recurrent_kernel[:, :self.units],
            self.recurrent_kernel[:, self.units * 2:],
        ],
        biases=[
            self.bias[self.units:self.units * 2],
            self.bias[:self.units],
            self.bias[self.units * 2:self.units * 3],
            self.bias[self.units * 4:self.units * 5],
            self.bias[self.units * 3:self.units * 4],
            self.bias[self.units * 5:],
        ],
        shape=self._vector_shape)

    args = {
        'input': inputs,
        'input_h': input_h,
        'input_c': 0,
        'params': params,
        'is_training': True,
        'rnn_mode': 'gru',
    }

    outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV2(**args)

    if self.stateful or self.return_state:
      h = h[0]
    if self.return_sequences:
      if self.time_major:
        output = outputs
      else:
        output = tf.transpose(outputs, perm=(1, 0, 2))
    else:
      output = outputs[-1]
    return output, [h]
예제 #3
0
def gpu_gru(
    inputs,
    init_h,
    kernel,
    recurrent_kernel,
    bias,
    mask,
    time_major,
    go_backwards,
    sequence_lengths,
    return_sequences,
):
    """GRU with cuDNN implementation which is only available for GPU."""
    if mask is not None:
        sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask(
            mask, time_major)

    if not time_major and sequence_lengths is None:
        inputs = tf.transpose(inputs, perm=(1, 0, 2))
        seq_axis, batch_axis = (0, 1)
    else:
        seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
    # For init_h, cuDNN expects one more dim of num_layers before or after batch
    # dim for time major or batch major inputs respectively
    init_h = tf.expand_dims(init_h, axis=seq_axis)

    weights = tf.split(kernel, 3, axis=1)
    weights += tf.split(recurrent_kernel, 3, axis=1)
    # Note that the bias was initialized as shape (2, 3 * units), flat it into
    # (6 * units)
    bias = tf.split(backend.flatten(bias), 6)

    if tf.sysconfig.get_build_info()["is_cuda_build"]:
        # Note that the gate order for cuDNN is different from the canonical format.
        # canonical format is [z, r, h], whereas cuDNN is [r, z, h]. The swap need
        # to be done for kernel, recurrent_kernel, input_bias, recurrent_bias.
        # z is update gate weights.
        # r is reset gate weights.
        # h is output gate weights.
        weights[0], weights[1] = weights[1], weights[0]
        weights[3], weights[4] = weights[4], weights[3]
        bias[0], bias[1] = bias[1], bias[0]
        bias[3], bias[4] = bias[4], bias[3]

    params = gru_lstm_utils.canonical_to_params(
        weights=weights,
        biases=bias,
        shape=tf.constant([-1]),
        transpose_weights=True,
    )

    if sequence_lengths is not None:
        if go_backwards:
            # Three reversals are required. E.g.,
            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
            # output_from_cudnn = [6, 5, 4, 0, 0]
            # expected_output = [0, 0, 6, 5 ,4]
            inputs = tf.reverse_sequence(
                inputs,
                sequence_lengths,
                seq_axis=seq_axis,
                batch_axis=batch_axis,
            )
        outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(
            input=inputs,
            input_h=init_h,
            input_c=0,
            params=params,
            is_training=True,
            rnn_mode="gru",
            sequence_lengths=sequence_lengths,
            time_major=time_major,
        )
        if go_backwards:
            outputs = tf.reverse_sequence(
                outputs,
                sequence_lengths,
                seq_axis=seq_axis,
                batch_axis=batch_axis,
            )
            outputs = tf.reverse(outputs, axis=[seq_axis])
    else:
        if go_backwards:
            # Reverse axis 0 since the input is already convert to time major.
            inputs = tf.reverse(inputs, axis=[0])
        outputs, h, _, _ = tf.raw_ops.CudnnRNN(
            input=inputs,
            input_h=init_h,
            input_c=0,
            params=params,
            is_training=True,
            rnn_mode="gru",
        )

    last_output = outputs[-1]
    if not time_major and sequence_lengths is None and return_sequences:
        outputs = tf.transpose(outputs, perm=[1, 0, 2])
    h = tf.squeeze(h, axis=seq_axis)

    # In the case of variable length input, the cudnn kernel will fill zeros for
    # the output, whereas the default keras behavior is to bring over the previous
    # output for t-1, so that in the return_sequence=False case, user can quickly
    # get the final effect output instead just 0s at the last timestep.
    # In order to mimic the default keras behavior, we copy the final h state as
    # the last_output, since it is numerically same as the output.
    if sequence_lengths is not None:
        last_output = h

    # Match CPU return format
    if not return_sequences:
        outputs = tf.expand_dims(last_output, axis=0 if time_major else 1)

    return (
        last_output,
        outputs,
        h,
        gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_GPU),
    )
예제 #4
0
파일: lstm.py 프로젝트: ttigong/keras
def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask,
             time_major, go_backwards, sequence_lengths):
  """LSTM with either cuDNN or ROCm implementation which is only available for GPU.

  Note that currently only right padded data is supported, or the result will be
  polluted by the unmasked data which should be filtered.

  Args:
    inputs: Input tensor of LSTM layer.
    init_h: Initial state tensor for the cell output.
    init_c: Initial state tensor for the cell hidden state.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence. An individual
      `True` entry indicates that the corresponding timestep should be utilized,
      while a `False` entry indicates that the corresponding timestep should be
      ignored.
    time_major: Boolean, whether the inputs are in the format of [time, batch,
      feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.

  Returns:
    last_output: Output tensor for the last timestep, which has shape
      [batch, units].
    outputs: Output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: The cell output, which has same shape as init_h.
    state_1: The cell hidden state, which has same shape as init_c.
    runtime: Constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should not be used by user.
  """
  if mask is not None:
    sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask(
        mask, time_major)

  if not time_major and sequence_lengths is None:
    inputs = tf.transpose(inputs, perm=(1, 0, 2))
    seq_axis, batch_axis = (0, 1)
  else:
    seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
  # For init_h and init_c, cuDNN expects one more dim of num_layers before or
  # after batch dim for time major or batch major inputs respectively
  init_h = tf.expand_dims(init_h, axis=seq_axis)
  init_c = tf.expand_dims(init_c, axis=seq_axis)

  weights = tf.split(kernel, 4, axis=1)
  weights += tf.split(recurrent_kernel, 4, axis=1)
  # cuDNN has an extra set of bias for inputs, we disable them (setting to 0),
  # so that mathematically it is same as the canonical LSTM implementation.
  full_bias = tf.concat((tf.zeros_like(bias), bias), 0)

  if tf.sysconfig.get_build_info()['is_rocm_build']:
    # ROCm MIOpen's weight sequence for LSTM is different from both canonical
    # and Cudnn format
    # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o]
    # i is input gate weights.
    # f is forget gate weights.
    # o is output gate weights.
    # c is cell gate weights.
    weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]
    # full_bias is a tensor of shape (8*n,)
    full_bias = tf.split(full_bias, 8, axis=0)
    full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]

  params = gru_lstm_utils.canonical_to_params(
      weights=weights,
      biases=tf.split(full_bias, 8),
      shape=tf.constant([-1]),
      transpose_weights=True)

  if sequence_lengths is not None:
    if go_backwards:
      # Three reversals are required. E.g.,
      # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
      # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
      # output_from_cudnn = [6, 5, 4, 0, 0]
      # expected_output = [0, 0, 6, 5 ,4]
      inputs = tf.reverse_sequence(
          inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
    outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
        input=inputs,
        input_h=init_h,
        input_c=init_c,
        params=params,
        is_training=True,
        rnn_mode='lstm',
        sequence_lengths=sequence_lengths,
        time_major=time_major)
    if go_backwards:
      outputs = tf.reverse_sequence(
          outputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
      outputs = tf.reverse(outputs, axis=[seq_axis])
  else:
    # # Fill the array with shape [batch] with value of max timesteps.
    # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
    #                                  array_ops.shape(inputs)[0])
    if go_backwards:
      # Reverse axis 0 since the input is already convert to time major.
      inputs = tf.reverse(inputs, axis=[0])
    outputs, h, c, _ = tf.raw_ops.CudnnRNN(
        input=inputs, input_h=init_h, input_c=init_c, params=params,
        is_training=True, rnn_mode='lstm')

  last_output = outputs[-1]
  if not time_major and sequence_lengths is None:
    outputs = tf.transpose(outputs, perm=[1, 0, 2])
  h = tf.squeeze(h, axis=seq_axis)
  c = tf.squeeze(c, axis=seq_axis)

  # In the case of variable length input, the cudnn kernel will fill zeros for
  # the output, whereas the default keras behavior is to bring over the previous
  # output for t-1, so that in the return_sequence=False case, user can quickly
  # get the final effect output instead just 0s at the last timestep.
  # In order to mimic the default keras behavior, we copy the final h state as
  # the last_output, since it is numerically same as the output.
  if sequence_lengths is not None:
    last_output = h
  return last_output, outputs, h, c, gru_lstm_utils.runtime(
      gru_lstm_utils.RUNTIME_GPU)