def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias,
               time_major):
    """LSTM with CuDNN implementation which is only available for GPU."""
    if not time_major:
        inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    input_h = array_ops.expand_dims(input_h, axis=0)
    input_c = array_ops.expand_dims(input_c, axis=0)

    weights = array_ops.split(kernel, 4, axis=1)
    weights += array_ops.split(recurrent_kernel, 4, axis=1)
    # CuDNN has an extra set of bias for inputs, we disable them (setting to 0),
    # so that mathematically it is same as the canonical LSTM implementation.
    full_bias = array_ops.concat((array_ops.zeros_like(bias), bias), 0)

    params = _canonical_to_params(weights=weights,
                                  biases=array_ops.split(full_bias, 8),
                                  shape=constant_op.constant([-1]),
                                  transpose_weights=True)

    outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(inputs,
                                                   input_h=input_h,
                                                   input_c=input_c,
                                                   params=params,
                                                   is_training=True)
    last_output = outputs[-1]
    if not time_major:
        outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
    h = h[0]
    c = c[0]

    return last_output, outputs, h, c, _runtime('cudnn')
Exemple #2
0
def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias, units):
  inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
  input_h = array_ops.expand_dims(input_h, axis=0)
  input_c = array_ops.expand_dims(input_c, axis=0)

  params = _canonical_to_params(
      weights=[
          kernel[:, :units],
          kernel[:, units:units * 2],
          kernel[:, units * 2:units * 3],
          kernel[:, units * 3:],
          recurrent_kernel[:, :units],
          recurrent_kernel[:, units:units * 2],
          recurrent_kernel[:, units * 2:units * 3],
          recurrent_kernel[:, units * 3:],
      ],
      biases=[
          bias[:units],
          bias[units:units * 2],
          bias[units * 2:units * 3],
          bias[units * 3:units * 4],
          bias[units * 4:units * 5],
          bias[units * 5:units * 6],
          bias[units * 6:units * 7],
          bias[units * 7:],
      ],
      shape=constant_op.constant([-1]))

  outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
      inputs, input_h=input_h, input_c=input_c, params=params)
  outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
  h = h[0]
  c = c[0]
  return outputs, [h, c], constant_op.constant(
      'cudnn', dtype=dtypes.string, name='runtime')
    def _process_batch(self, inputs, initial_state):
        if not self.time_major:
            inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
        input_h = initial_state[0]
        input_c = initial_state[1]
        input_h = array_ops.expand_dims(input_h, axis=0)
        input_c = array_ops.expand_dims(input_c, axis=0)

        params = recurrent_v2._canonical_to_params(  # pylint: disable=protected-access
            weights=[
                self.kernel[:, :self.units],
                self.kernel[:, self.units:self.units * 2],
                self.kernel[:, self.units * 2:self.units * 3],
                self.kernel[:, self.units * 3:],
                self.recurrent_kernel[:, :self.units] *
                self.mask[:, :self.units],
                self.recurrent_kernel[:, self.units:self.units * 2] *
                self.mask[:, self.units:self.units * 2],
                self.recurrent_kernel[:, self.units * 2:self.units * 3] *
                self.mask[:, self.units * 2:self.units * 3],
                self.recurrent_kernel[:, self.units * 3:] *
                self.mask[:, self.units * 3:],
            ],
            biases=[
                self.bias[:self.units],
                self.bias[self.units:self.units * 2],
                self.bias[self.units * 2:self.units * 3],
                self.bias[self.units * 3:self.units * 4],
                self.bias[self.units * 4:self.units * 5],
                self.bias[self.units * 5:self.units * 6],
                self.bias[self.units * 6:self.units * 7],
                self.bias[self.units * 7:],
            ],
            shape=self._vector_shape)

        outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(inputs,
                                                       input_h=input_h,
                                                       input_c=input_c,
                                                       params=params,
                                                       is_training=True)

        if self.stateful or self.return_state:
            h = h[0]
            c = c[0]
        if self.return_sequences:
            if self.time_major:
                output = outputs
            else:
                output = array_ops.transpose(outputs, perm=(1, 0, 2))
        else:
            output = outputs[-1]
        return output, [h, c]
  def _process_batch(self, inputs, initial_state):
    if not self.time_major:
      inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    input_h = initial_state[0]
    input_c = initial_state[1]
    input_h = array_ops.expand_dims(input_h, axis=0)
    input_c = array_ops.expand_dims(input_c, axis=0)

    params = recurrent_v2._canonical_to_params(    # pylint: disable=protected-access
        weights=[
            self.kernel[:, :self.units],
            self.kernel[:, self.units:self.units * 2],
            self.kernel[:, self.units * 2:self.units * 3],
            self.kernel[:, self.units * 3:],
            self.recurrent_kernel[:, :self.units],
            self.recurrent_kernel[:, self.units:self.units * 2],
            self.recurrent_kernel[:, self.units * 2:self.units * 3],
            self.recurrent_kernel[:, self.units * 3:],
        ],
        biases=[
            self.bias[:self.units],
            self.bias[self.units:self.units * 2],
            self.bias[self.units * 2:self.units * 3],
            self.bias[self.units * 3:self.units * 4],
            self.bias[self.units * 4:self.units * 5],
            self.bias[self.units * 5:self.units * 6],
            self.bias[self.units * 6:self.units * 7],
            self.bias[self.units * 7:],
        ],
        shape=self._vector_shape)

    outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
        inputs,
        input_h=input_h,
        input_c=input_c,
        params=params,
        is_training=True)

    if self.stateful or self.return_state:
      h = h[0]
      c = c[0]
    if self.return_sequences:
      if self.time_major:
        output = outputs
      else:
        output = array_ops.transpose(outputs, perm=(1, 0, 2))
    else:
      output = outputs[-1]
    return output, [h, c]
Exemple #5
0
  def _process_batch(self, inputs, initial_state):
    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    input_h = initial_state[0]
    input_c = initial_state[1]
    input_h = array_ops.expand_dims(input_h, axis=0)
    input_c = array_ops.expand_dims(input_c, axis=0)

    params = self._canonical_to_params(
        weights=[
            self.kernel[:, :self.units],
            self.kernel[:, self.units:self.units * 2],
            self.kernel[:, self.units * 2:self.units * 3],
            self.kernel[:, self.units * 3:],
            self.recurrent_kernel[:, :self.units],
            self.recurrent_kernel[:, self.units:self.units * 2],
            self.recurrent_kernel[:, self.units * 2:self.units * 3],
            self.recurrent_kernel[:, self.units * 3:],
        ],
        biases=[
            self.bias[:self.units],
            self.bias[self.units:self.units * 2],
            self.bias[self.units * 2:self.units * 3],
            self.bias[self.units * 3:self.units * 4],
            self.bias[self.units * 4:self.units * 5],
            self.bias[self.units * 5:self.units * 6],
            self.bias[self.units * 6:self.units * 7],
            self.bias[self.units * 7:],
        ],
    )

    outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
        inputs,
        input_h=input_h,
        input_c=input_c,
        params=params,
        is_training=True)

    if self.stateful or self.return_state:
      h = h[0]
      c = c[0]
    if self.return_sequences:
      output = array_ops.transpose(outputs, perm=(1, 0, 2))
    else:
      output = outputs[-1]
    return output, [h, c]
  def _process_batch(self, inputs, initial_state):
    if not self.time_major:
      inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    input_h = initial_state[0]
    input_h = array_ops.expand_dims(input_h, axis=0)

    params = self._canonical_to_params(
        weights=[
            self.kernel[:, self.units:self.units * 2],
            self.kernel[:, :self.units],
            self.kernel[:, self.units * 2:],
            self.recurrent_kernel[:, self.units:self.units * 2],
            self.recurrent_kernel[:, :self.units],
            self.recurrent_kernel[:, self.units * 2:],
        ],
        biases=[
            self.bias[self.units:self.units * 2],
            self.bias[:self.units],
            self.bias[self.units * 2:self.units * 3],
            self.bias[self.units * 4:self.units * 5],
            self.bias[self.units * 3:self.units * 4],
            self.bias[self.units * 5:],
        ],
    )

    outputs, h, _, _ = gen_cudnn_rnn_ops.cudnn_rnn(
        inputs,
        input_h=input_h,
        input_c=0,
        params=params,
        is_training=True,
        rnn_mode='gru')

    if self.stateful or self.return_state:
      h = h[0]
    if self.return_sequences:
      if self.time_major:
        output = outputs
      else:
        output = array_ops.transpose(outputs, perm=(1, 0, 2))
    else:
      output = outputs[-1]
    return output, [h]
def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, time_major):
    """GRU with CuDNN implementation which is only available for GPU."""
    if not time_major:
        inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    init_h = array_ops.expand_dims(init_h, axis=0)

    weights = array_ops.split(kernel, 3, axis=1)
    weights += array_ops.split(recurrent_kernel, 3, axis=1)
    # Note that the bias was initialized as shape (2, 3 * units), flat it into
    # (6 * units)
    bias = array_ops.split(K.flatten(bias), 6)
    # Note that the gate order for CuDNN is different from the canonical format.
    # canonical format is [z, r, h], whereas CuDNN is [r, z, h]. The swap need to
    # be done for kernel, recurrent_kernel, input_bias, recurrent_bias.
    # z is update gate weights.
    # r is reset gate weights.
    # h is output gate weights.
    weights[0], weights[1] = weights[1], weights[0]
    weights[3], weights[4] = weights[4], weights[3]
    bias[0], bias[1] = bias[1], bias[0]
    bias[3], bias[4] = bias[4], bias[3]

    params = _canonical_to_params(weights=weights,
                                  biases=bias,
                                  shape=constant_op.constant([-1]),
                                  transpose_weights=True)

    outputs, h, _, _ = gen_cudnn_rnn_ops.cudnn_rnn(inputs,
                                                   input_h=init_h,
                                                   input_c=0,
                                                   params=params,
                                                   is_training=True,
                                                   rnn_mode='gru')
    last_output = outputs[-1]
    if not time_major:
        outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
    h = h[0]
    return last_output, outputs, h, _runtime('cudnn')
Exemple #8
0
def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask,
             time_major, go_backwards, sequence_lengths):
  """LSTM with either CuDNN or ROCm implementation which is only available for GPU.

  Note that currently only right padded data is supported, or the result will be
  polluted by the unmasked data which should be filtered.

  Args:
    inputs: Input tensor of LSTM layer.
    init_h: Initial state tensor for the cell output.
    init_c: Initial state tensor for the cell hidden state.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence.
    time_major: Boolean, whether the inputs are in the format of [time, batch,
      feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.

  Returns:
    last_output: Output tensor for the last timestep, which has shape
      [batch, units].
    outputs: Output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: The cell output, which has same shape as init_h.
    state_1: The cell hidden state, which has same shape as init_c.
    runtime: Constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should not be used by user.
  """
  if not time_major and mask is None:
    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    seq_axis, batch_axis = (0, 1)
  else:
    seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
  # For init_h and init_c, cuDNN expects one more dim of num_layers before or
  # after batch dim for time major or batch major inputs respectively
  init_h = array_ops.expand_dims(init_h, axis=seq_axis)
  init_c = array_ops.expand_dims(init_c, axis=seq_axis)

  weights = array_ops.split(kernel, 4, axis=1)
  weights += array_ops.split(recurrent_kernel, 4, axis=1)
  # CuDNN has an extra set of bias for inputs, we disable them (setting to 0),
  # so that mathematically it is same as the canonical LSTM implementation.
  full_bias = array_ops.concat((array_ops.zeros_like(bias), bias), 0)

  if build_info.is_rocm_build:
    # ROCm MIOpen's weight sequence for LSTM is different from both canonical
    # and Cudnn format
    # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o]
    # i is input gate weights.
    # f is forget gate weights.
    # o is output gate weights.
    # c is cell gate weights.
    weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]
    # full_bias is a tensor of shape (8*n,)
    full_bias = array_ops.split(full_bias, 8, axis=0)
    full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]

  params = _canonical_to_params(
      weights=weights,
      biases=array_ops.split(full_bias, 8),
      shape=constant_op.constant([-1]),
      transpose_weights=True)

  if mask is not None:
    sequence_lengths = calculate_sequence_by_mask(mask, time_major)

  if sequence_lengths is not None:
    if go_backwards:
      # Three reversals are required. E.g.,
      # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
      # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
      # output_from_cudnn = [6, 5, 4, 0, 0]
      # expected_output = [0, 0, 6, 5 ,4]
      inputs = array_ops.reverse_sequence_v2(
          inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
    outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
        inputs,
        input_h=init_h,
        input_c=init_c,
        params=params,
        is_training=True,
        rnn_mode='lstm',
        sequence_lengths=sequence_lengths,
        time_major=time_major)
    if go_backwards:
      outputs = array_ops.reverse_sequence_v2(
          outputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
      outputs = array_ops.reverse(outputs, axis=[seq_axis])
  else:
    # # Fill the array with shape [batch] with value of max timesteps.
    # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
    #                                  array_ops.shape(inputs)[0])
    if go_backwards:
      # Reverse axis 0 since the input is already convert to time major.
      inputs = array_ops.reverse(inputs, axis=[0])
    outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
        inputs, input_h=init_h, input_c=init_c, params=params, is_training=True,
        rnn_mode='lstm')

  last_output = outputs[-1]
  if not time_major and mask is None:
    outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
  h = array_ops.squeeze(h, axis=seq_axis)
  c = array_ops.squeeze(c, axis=seq_axis)

  # In the case of variable length input, the cudnn kernel will fill zeros for
  # the output, whereas the default keras behavior is to bring over the previous
  # output for t-1, so that in the return_sequence=False case, user can quickly
  # get the final effect output instead just 0s at the last timestep.
  # In order to mimic the default keras behavior, we copy the final h state as
  # the last_output, since it is numerically same as the output.
  if mask is not None:
    last_output = h
  return last_output, outputs, h, c, _runtime(_RUNTIME_GPU)