Ejemplo n.º 1
0
    def call(self, inputs):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        encoder_out_seq, decoder_out_seq = inputs

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)

            return c_i, [c_i]

        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq,
                             axis=2)  # <= (batch_size, enc_seq_len, latent_dim
        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step,
            decoder_out_seq,
            [fake_state_e],
        )
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step,
            e_outputs,
            [fake_state_c],
        )

        return c_outputs, e_outputs
Ejemplo n.º 2
0
    def call(self, inputs, training=None, mask=None):
        kwargs = {}
        if generic_utils.has_arg(self.layer.call, 'training'):
            kwargs['training'] = training

        input_shape = Kr.int_shape(inputs)

        inputs, row_lengths = Kr.convert_inputs_if_ragged(inputs)
        is_ragged_input = row_lengths is not None

        self.layer.compute_K(inputs)

        # batch size matters, use rnn-based implementation
        def step(x, _):
            output = self.layer(x, **kwargs)
            return output, []

        _, outputs, _ = Kr.rnn(
            step,
            inputs,
            initial_states=[],
            input_length=row_lengths[0] if is_ragged_input else input_shape[1],
            mask=mask,
            unroll=False)

        y = Kr.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths)

        return y
Ejemplo n.º 3
0
  def call(self, inputs, training=None, mask=None):
    kwargs = {}
    if generic_utils.has_arg(self.layer.call, 'training'):
      kwargs['training'] = training
    uses_learning_phase = False  # pylint: disable=redefined-outer-name

    input_shape = K.int_shape(inputs)
    if input_shape[0]:
      # batch size matters, use rnn-based implementation
      def step(x, _):
        global uses_learning_phase  # pylint: disable=global-variable-undefined
        output = self.layer.call(x, **kwargs)
        if hasattr(output, '_uses_learning_phase'):
          uses_learning_phase = (output._uses_learning_phase or
                                 uses_learning_phase)
        return output, []

      _, outputs, _ = K.rnn(
          step,
          inputs,
          initial_states=[],
          input_length=input_shape[1],
          unroll=False)
      y = outputs
    else:
      # No batch size specified, therefore the layer will be able
      # to process batches of any size.
      # We can go with reshape-based implementation for performance.
      input_length = input_shape[1]
      if not input_length:
        input_length = array_ops.shape(inputs)[1]
      inner_input_shape = self._get_shape_tuple((-1,), inputs, 2)
      # Shape: (num_samples * timesteps, ...). And track the
      # transformation in self._input_map.
      input_uid = generic_utils.object_list_uid(inputs)
      inputs = array_ops.reshape(inputs, inner_input_shape)
      self._input_map[input_uid] = inputs
      # (num_samples * timesteps, ...)
      if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None:
        inner_mask_shape = self._get_shape_tuple((-1,), mask, 2)
        kwargs['mask'] = K.reshape(mask, inner_mask_shape)
      y = self.layer.call(inputs, **kwargs)
      if hasattr(y, '_uses_learning_phase'):
        uses_learning_phase = y._uses_learning_phase
      # Shape: (num_samples, timesteps, ...)
      output_shape = self.compute_output_shape(input_shape).as_list()
      output_shape = self._get_shape_tuple(
          (-1, input_length), y, 1, output_shape[2:])
      y = array_ops.reshape(y, output_shape)

    # Apply activity regularizer if any:
    if (hasattr(self.layer, 'activity_regularizer') and
        self.layer.activity_regularizer is not None):
      regularization_loss = self.layer.activity_regularizer(y)
      self.add_loss(regularization_loss, inputs)

    if uses_learning_phase:
      y._uses_learning_phase = True
    return y
Ejemplo n.º 4
0
  def call(self, inputs, training=None, mask=None):
    kwargs = {}
    if generic_utils.has_arg(self.layer.call, 'training'):
      kwargs['training'] = training

    input_shape = K.int_shape(inputs)
    if input_shape[0] and not self._always_use_reshape:
      inputs, row_lengths = K.convert_inputs_if_ragged(inputs)
      is_ragged_input = row_lengths is not None

      # batch size matters, use rnn-based implementation
      def step(x, _):
        output = self.layer(x, **kwargs)
        return output, []

      _, outputs, _ = K.rnn(
          step,
          inputs,
          initial_states=[],
          input_length=row_lengths[0] if is_ragged_input else input_shape[1],
          mask=mask,
          unroll=False)
      y = K.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths)
    else:
      # No batch size specified, therefore the layer will be able
      # to process batches of any size.
      # We can go with reshape-based implementation for performance.
      if isinstance(inputs, ragged_tensor.RaggedTensor):
        y = self.layer(inputs.values, **kwargs)
        y = ragged_tensor.RaggedTensor.from_row_lengths(
            y,
            inputs.nested_row_lengths()[0])
      else:
        input_length = input_shape[1]
        if not input_length:
          input_length = array_ops.shape(inputs)[1]
        inner_input_shape = self._get_shape_tuple((-1,), inputs, 2)
        # Shape: (num_samples * timesteps, ...). And track the
        # transformation in self._input_map.
        inputs = array_ops.reshape(inputs, inner_input_shape)
        # (num_samples * timesteps, ...)
        if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None:
          inner_mask_shape = self._get_shape_tuple((-1,), mask, 2)
          kwargs['mask'] = K.reshape(mask, inner_mask_shape)

        y = self.layer(inputs, **kwargs)

        # Shape: (num_samples, timesteps, ...)
        output_shape = self.compute_output_shape(input_shape).as_list()
        output_shape = self._get_shape_tuple((-1, input_length), y, 1,
                                             output_shape[2:])
        y = array_ops.reshape(y, output_shape)
        if not context.executing_eagerly():
          # Set the static shape for the result since it might be lost during
          # array_ops reshape, eg, some `None` dim in the result could be
          # inferred.
          y.set_shape(self.compute_output_shape(input_shape))

    return y
Ejemplo n.º 5
0
  def call(self, inputs, training=None, mask=None):
    kwargs = {}
    if generic_utils.has_arg(self.layer.call, 'training'):
      kwargs['training'] = training
    uses_learning_phase = False  # pylint: disable=redefined-outer-name

    input_shape = K.int_shape(inputs)
    if input_shape[0]:
      # batch size matters, use rnn-based implementation
      def step(x, _):
        global uses_learning_phase  # pylint: disable=global-variable-undefined
        output = self.layer.call(x, **kwargs)
        if hasattr(output, '_uses_learning_phase'):
          uses_learning_phase = (output._uses_learning_phase or
                                 uses_learning_phase)
        return output, []

      _, outputs, _ = K.rnn(
          step,
          inputs,
          initial_states=[],
          input_length=input_shape[1],
          unroll=False)
      y = outputs
    else:
      # No batch size specified, therefore the layer will be able
      # to process batches of any size.
      # We can go with reshape-based implementation for performance.
      input_length = input_shape[1]
      if not input_length:
        input_length = array_ops.shape(inputs)[1]
      inner_input_shape = self._get_shape_tuple((-1,), inputs, 2)
      # Shape: (num_samples * timesteps, ...). And track the
      # transformation in self._input_map.
      input_uid = generic_utils.object_list_uid(inputs)
      inputs = array_ops.reshape(inputs, inner_input_shape)
      self._input_map[input_uid] = inputs
      # (num_samples * timesteps, ...)
      if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None:
        inner_mask_shape = self._get_shape_tuple((-1,), mask, 2)
        kwargs['mask'] = K.reshape(mask, inner_mask_shape)
      y = self.layer.call(inputs, **kwargs)
      if hasattr(y, '_uses_learning_phase'):
        uses_learning_phase = y._uses_learning_phase
      # Shape: (num_samples, timesteps, ...)
      output_shape = self.compute_output_shape(input_shape).as_list()
      output_shape = self._get_shape_tuple(
          (-1, input_length), y, 1, output_shape[2:])
      y = array_ops.reshape(y, output_shape)

    # Apply activity regularizer if any:
    if (hasattr(self.layer, 'activity_regularizer') and
        self.layer.activity_regularizer is not None):
      regularization_loss = self.layer.activity_regularizer(y)
      self.add_loss(regularization_loss, inputs)

    if uses_learning_phase:
      y._uses_learning_phase = True
    return y
Ejemplo n.º 6
0
    def call(self, inputs, training=None, mask=None):
        kwargs = {}
        if generic_utils.has_arg(self.layer.call, 'training'):
            kwargs['training'] = training

        input_shape = K.int_shape(inputs)
        if input_shape[0] and not self._always_use_reshape:
            # batch size matters, use rnn-based implementation
            def step(x, _):
                output = self.layer.call(x, **kwargs)
                return output, []

            _, outputs, _ = K.rnn(step,
                                  inputs,
                                  initial_states=[],
                                  input_length=input_shape[1],
                                  unroll=False)
            y = outputs
        else:
            # No batch size specified, therefore the layer will be able
            # to process batches of any size.
            # We can go with reshape-based implementation for performance.
            input_length = input_shape[1]
            if not input_length:
                input_length = array_ops.shape(inputs)[1]
            inner_input_shape = self._get_shape_tuple((-1, ), inputs, 2)
            # Shape: (num_samples * timesteps, ...). And track the
            # transformation in self._input_map.
            input_uid = generic_utils.object_list_uid(inputs)
            inputs = array_ops.reshape(inputs, inner_input_shape)
            self._input_map[input_uid] = inputs
            # (num_samples * timesteps, ...)
            if generic_utils.has_arg(self.layer.call,
                                     'mask') and mask is not None:
                inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
                kwargs['mask'] = K.reshape(mask, inner_mask_shape)
            y = self.layer.call(inputs, **kwargs)
            # Shape: (num_samples, timesteps, ...)
            output_shape = self.compute_output_shape(input_shape).as_list()
            output_shape = self._get_shape_tuple((-1, input_length), y, 1,
                                                 output_shape[2:])
            y = array_ops.reshape(y, output_shape)

        # Apply activity regularizer if any:
        if (hasattr(self.layer, 'activity_regularizer')
                and self.layer.activity_regularizer is not None):
            regularization_loss = self.layer.activity_regularizer(y)
            base_layer_utils.check_graph_consistency(
                regularization_loss, method='activity_regularizer')
            self.add_loss(regularization_loss, inputs)
        return y
Ejemplo n.º 7
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # GRU does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self.could_use_cudnn:
      kwargs = {'training': training}
      self.cell.reset_dropout_mask()
      self.cell.reset_recurrent_dropout_mask()

      def step(cell_inputs, cell_states):
        return self.cell.call(cell_inputs, cell_states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      # This is a dummy tensor for testing purpose.
      runtime = _runtime(_RUNTIME_UNKNOWN)
    else:
      last_output, outputs, runtime, states = self._defun_gru_call(
          inputs, initial_state, training, mask)

    if self.stateful:
      updates = [state_ops.assign(self.states[0], states[0])]
      self.add_update(updates)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self._return_runtime:
      return output, runtime
    else:
      return output
Ejemplo n.º 8
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # GRU does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self.could_use_cudnn:
      # CuDNN does not support masking, fall back to use the normal GRU.
      kwargs = {'training': training}

      def step(cell_inputs, cell_states):
        return self.cell.call(cell_inputs, cell_states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      # This is a dummy tensor for testing purpose.
      runtime = _runtime('unknown')
    else:
      last_output, outputs, runtime, states = self._defun_gru_call(
          inputs, initial_state, training, mask)

    if self.stateful:
      updates = [state_ops.assign(self.states[0], states[0])]
      self.add_update(updates, inputs)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self._return_runtime:
      return output, runtime
    else:
      return output
Ejemplo n.º 9
0
    def call(self, inputs, training=None, mask=None):
        kwargs = {}
        if generic_utils.has_arg(self.layer.call, 'training'):
            kwargs['training'] = training

        input_shape = K.int_shape(inputs)
        if input_shape[0] and not self._always_use_reshape:
            # batch size matters, use rnn-based implementation
            def step(x, _):
                output = self.layer(x, **kwargs)
                return output, []

            _, outputs, _ = K.rnn(step,
                                  inputs,
                                  initial_states=[],
                                  input_length=input_shape[1],
                                  unroll=False)
            y = outputs
        else:
            # No batch size specified, therefore the layer will be able
            # to process batches of any size.
            # We can go with reshape-based implementation for performance.
            input_length = input_shape[1]
            if not input_length:
                input_length = array_ops.shape(inputs)[1]
            inner_input_shape = self._get_shape_tuple((-1, ), inputs, 2)
            # Shape: (num_samples * timesteps, ...). And track the
            # transformation in self._input_map.
            inputs = array_ops.reshape(inputs, inner_input_shape)
            # (num_samples * timesteps, ...)
            if generic_utils.has_arg(self.layer.call,
                                     'mask') and mask is not None:
                inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
                kwargs['mask'] = K.reshape(mask, inner_mask_shape)
            y = self.layer(inputs, **kwargs)
            # Shape: (num_samples, timesteps, ...)
            output_shape = self.compute_output_shape(input_shape).as_list()
            output_shape = self._get_shape_tuple((-1, input_length), y, 1,
                                                 output_shape[2:])
            y = array_ops.reshape(y, output_shape)

        return y
Ejemplo n.º 10
0
def normal_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, units,
                activation, recurrent_activation):
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[1]

  def step(cell_inputs, cell_states):
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    # Only use the second half of the bias weights.
    _, real_bias = array_ops.split(bias, 2)

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, real_bias)

    z0 = z[:, :units]
    z1 = z[:, units:2 * units]
    z2 = z[:, 2 * units:3 * units]
    z3 = z[:, 3 * units:]

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]

  _, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      input_length=timesteps)
  return outputs, new_states, constant_op.constant(
      'cpu', dtype=dtypes.string, name='runtime')
Ejemplo n.º 11
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # LSTM does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self.could_use_cudnn:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}

      def step(inputs, states):
        return self.cell.call(inputs, states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      runtime = _runtime('unknown')
    else:
      # Use the new defun approach for backend implementation swap.
      # Note that different implementations need to have same function
      # signature, eg, the tensor parameters need to have same shape and dtypes.
      # Since the CuDNN has an extra set of bias, those bias will be passed to
      # both normal and CuDNN implementations.
      self.reset_dropout_mask()
      dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
      if dropout_mask is not None:
        inputs *= dropout_mask[0]
      cudnn_lstm_kwargs = {
          'inputs': inputs,
          'init_h': initial_state[0],
          'init_c': initial_state[1],
          'kernel': self.cell.kernel,
          'recurrent_kernel': self.cell.recurrent_kernel,
          'bias': self.cell.bias,
          'mask': mask,
          'time_major': self.time_major,
          'go_backwards': self.go_backwards
      }
      normal_lstm_kwargs = cudnn_lstm_kwargs.copy()
      normal_lstm_kwargs.update({
          'activation': self.activation,
          'recurrent_activation': self.recurrent_activation
      })

      if context.executing_eagerly():
        device_type = _get_context_device_type()
        can_use_gpu = (
            # Either user specified GPU or unspecified but GPU is available.
            (device_type == _GPU_DEVICE_NAME
             or (device_type is None and context.num_gpus() > 0))
            and
            (mask is None or is_sequence_right_padded(mask, self.time_major)))
        # Under eager context, check the device placement and prefer the
        # GPU implementation when GPU is available.
        if can_use_gpu:
          last_output, outputs, new_h, new_c, runtime = cudnn_lstm(
              **cudnn_lstm_kwargs)
        else:
          last_output, outputs, new_h, new_c, runtime = standard_lstm(
              **normal_lstm_kwargs)
      else:
        # Each time a `tf.function` is called, we will give it a unique
        # identifiable API name, so that Grappler won't get confused when it
        # sees multiple LSTM layers added into same graph, and it will be able
        # to pair up the different implementations across them.
        api_name = 'lstm_' + str(uuid.uuid4())
        defun_standard_lstm = _generate_defun_backend(
            api_name, _CPU_DEVICE_NAME, standard_lstm)
        defun_cudnn_lstm = _generate_defun_backend(
            api_name, _GPU_DEVICE_NAME, cudnn_lstm)

        # Call the normal LSTM impl and register the CuDNN impl function. The
        # grappler will kick in during session execution to optimize the graph.
        last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(
            **normal_lstm_kwargs)

        def register_cudnn_defun():
          function.register(defun_cudnn_lstm, **cudnn_lstm_kwargs)
          # return some dummy value since the tf.cond require some return value.
          return 0
        if mask is None:
          register_cudnn_defun()
        else:
          # Only when seq_right_padded=True, CuDNN kernel can support that
          # properly.
          control_flow_ops.cond(is_sequence_right_padded(mask, self.time_major),
                                true_fn=register_cudnn_defun,
                                false_fn=lambda: 0)
      states = [new_h, new_c]

    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(state_ops.assign(self.states[i], states[i]))
      self.add_update(updates, inputs)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self.return_runtime:
      return output, runtime
    else:
      return output
Ejemplo n.º 12
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # LSTM does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self.could_use_cudnn:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}
      self.cell.reset_dropout_mask()
      self.cell.reset_recurrent_dropout_mask()

      def step(inputs, states):
        return self.cell.call(inputs, states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      runtime = _runtime(_RUNTIME_UNKNOWN)
    else:
      # Use the new defun approach for backend implementation swap.
      # Note that different implementations need to have same function
      # signature, eg, the tensor parameters need to have same shape and dtypes.
      # Since the CuDNN has an extra set of bias, those bias will be passed to
      # both normal and CuDNN implementations.
      self.reset_dropout_mask()
      dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
      if dropout_mask is not None:
        inputs = inputs * dropout_mask[0]
      cudnn_lstm_kwargs = {
          'inputs': inputs,
          'init_h': initial_state[0],
          'init_c': initial_state[1],
          'kernel': self.cell.kernel,
          'recurrent_kernel': self.cell.recurrent_kernel,
          'bias': self.cell.bias,
          'mask': mask,
          'time_major': self.time_major,
          'go_backwards': self.go_backwards
      }
      normal_lstm_kwargs = cudnn_lstm_kwargs.copy()
      normal_lstm_kwargs.update({
          'activation': self.activation,
          'recurrent_activation': self.recurrent_activation
      })

      if context.executing_eagerly():
        device_type = _get_context_device_type()
        can_use_gpu = (
            # Either user specified GPU or unspecified but GPU is available.
            (device_type == _GPU_DEVICE_NAME
             or (device_type is None and context.num_gpus() > 0))
            and
            (mask is None or is_sequence_right_padded(mask, self.time_major)))
        # Under eager context, check the device placement and prefer the
        # GPU implementation when GPU is available.
        if can_use_gpu:
          last_output, outputs, new_h, new_c, runtime = cudnn_lstm(
              **cudnn_lstm_kwargs)
        else:
          last_output, outputs, new_h, new_c, runtime = standard_lstm(
              **normal_lstm_kwargs)
      else:
        # Each time a `tf.function` is called, we will give it a unique
        # identifiable API name, so that Grappler won't get confused when it
        # sees multiple LSTM layers added into same graph, and it will be able
        # to pair up the different implementations across them.
        api_name = 'lstm_' + str(uuid.uuid4())
        defun_standard_lstm = _generate_defun_backend(
            api_name, _CPU_DEVICE_NAME, standard_lstm)
        defun_cudnn_lstm = _generate_defun_backend(
            api_name, _GPU_DEVICE_NAME, cudnn_lstm)

        # Call the normal LSTM impl and register the CuDNN impl function. The
        # grappler will kick in during session execution to optimize the graph.
        last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(
            **normal_lstm_kwargs)

        def register_cudnn_defun():
          function.register(defun_cudnn_lstm, **cudnn_lstm_kwargs)
          # return some dummy value since the tf.cond require some return value.
          return 0
        if mask is None:
          register_cudnn_defun()
        else:
          # Only when seq_right_padded=True, CuDNN kernel can support that
          # properly.
          control_flow_ops.cond(is_sequence_right_padded(mask, self.time_major),
                                true_fn=register_cudnn_defun,
                                false_fn=lambda: 0)
      states = [new_h, new_c]

    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(state_ops.assign(self.states[i], states[i]))
      self.add_update(updates)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self.return_runtime:
      return output, runtime
    else:
      return output
Ejemplo n.º 13
0
    def call(self, inputs, verbose=False):

        assert type(inputs) == list

        encoder_out_seq, decoder_out_seq = inputs

        if verbose:
            print('encoder_out_seq > ', encoder_out_seq.shape)
            print('decoder_out_seq > ', decoder_out_seq.shape)

        def energy_step(inputs, states):

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))

            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg

            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s > ', W_a_dot_s.shape)

            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # (batch_size, 1, latent_dim)
            if verbose:
                print('Ua.h > ', U_a_dot_h.shape)

            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh > ', reshaped_Ws_plus_Uh.shape)

            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            e_i = K.softmax(e_i)
            if verbose:
                print('ei > ', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci > ', c_i.shape)

            return c_i, [c_i]

        def create_inital_state(inputs, hidden_size):
            fake_state = K.zeros_like(
                inputs)  # (batch_size, enc_seq_len, latent_dim)
            fake_state = K.sum(fake_state, axis=[1, 2])  # (batch_size)
            fake_state = K.expand_dims(fake_state)  # (batch_size, 1)
            fake_state = K.tile(fake_state,
                                [1, hidden_size])  # (batch_size, latent_dim)

            return fake_state

        fake_state_c = create_inital_state(encoder_out_seq,
                                           encoder_out_seq.shape[-1])
        fake_state_e = create_inital_state(
            encoder_out_seq,
            encoder_out_seq.shape[1])  # (batch_size, enc_seq_len, latent_dim)

        # (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step,
            decoder_out_seq,
            [fake_state_e],
        )

        last_out, c_outputs, _ = K.rnn(
            context_step,
            e_outputs,
            [fake_state_c],
        )

        return c_outputs, e_outputs
Ejemplo n.º 14
0
    def call(
            self,
            inputs,
            targets,  #add targets + batch_size?
            mask=None,
            training=None,
            initial_state=None,
            constants=None):
        inputs, initial_state, constants = self._process_inputs(
            inputs, initial_state, constants)

        if mask is not None:
            # Time step masks must be the same for each input.
            # TODO(scottzhu): Should we accept multiple different masks?
            mask = nest.flatten(mask)[0]

        if nest.is_sequence(inputs):
            # In the case of nested input, use the first element for shape check.
            input_shape = K.int_shape(nest.flatten(inputs)[0])
        else:
            input_shape = K.int_shape(inputs)
        timesteps = input_shape[0] if self.time_major else input_shape[1]
        if self.unroll and timesteps is None:
            raise ValueError('Cannot unroll a RNN if the '
                             'time dimension is undefined. \n'
                             '- If using a Sequential model, '
                             'specify the time dimension by passing '
                             'an `input_shape` or `batch_input_shape` '
                             'argument to your first layer. If your '
                             'first layer is an Embedding, you can '
                             'also use the `input_length` argument.\n'
                             '- If using the functional API, specify '
                             'the time dimension by passing a `shape` '
                             'or `batch_shape` argument to your Input layer.')

        kwargs = {}
        if generic_utils.has_arg(self.cell.call, 'training'):
            kwargs['training'] = training

        # TF RNN cells expect single tensor as state instead of list wrapped tensor.
        is_tf_rnn_cell = getattr(self.cell, '_is_tf_rnn_cell',
                                 None) is not None
        if constants:
            if not generic_utils.has_arg(self.cell.call, 'constants'):
                raise ValueError('RNN cell does not support constants')

            def step(inputs, states):
                constants = states[-self._num_constants:]  # pylint: disable=invalid-unary-operand-type
                states = states[:-self._num_constants]  # pylint: disable=invalid-unary-operand-type

                states = states[0] if len(
                    states) == 1 and is_tf_rnn_cell else states

                ### LIGN TO CHANGE!!!! adapt with the function call of the SMC_LSTMCell
                output, new_states = self.cell.call(inputs,
                                                    states,
                                                    constants=constants,
                                                    **kwargs)
                if not nest.is_sequence(new_states):
                    new_states = [new_states]
                return output, new_states
        else:

            def step(inputs, states):
                states = states[0] if len(
                    states) == 1 and is_tf_rnn_cell else states
                output, new_states = self.cell.call(inputs, states, **kwargs)
                if not nest.is_sequence(new_states):
                    new_states = [new_states]
                return output, new_states

    # BACK TO TENSORFLOW code with K.rnn???

        last_output, outputs, states = K.rnn(
            step,
            inputs,
            initial_state,
            constants=constants,
            go_backwards=self.go_backwards,
            mask=mask,
            unroll=self.unroll,
            input_length=timesteps,
            time_major=self.time_major,
            zero_output_for_mask=self.zero_output_for_mask)
        if self.stateful:
            updates = []
            for state_, state in zip(nest.flatten(self.states),
                                     nest.flatten(states)):
                updates.append(state_ops.assign(state_, state))
            self.add_update(updates, inputs)

        if self.return_sequences:
            output = outputs
        else:
            output = last_output

        if self.return_state:
            if not isinstance(states, (list, tuple)):
                states = [states]
            else:
                states = list(states)
            return generic_utils.to_list(output) + states
        else:
            return output
Ejemplo n.º 15
0
def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias,
                  activation, recurrent_activation, mask, time_major,
                  go_backwards):
  """LSTM with standard kernel implementation.

  This implementation can be run on all types for hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Note that the first half of the bias tensor should be ignored by this impl.
  The CuDNN impl need an extra set of input gate bias. In order to make the both
  function take same shape of parameter, that extra set of bias is also feed
  here.

  Args:
    inputs: input tensor of LSTM layer.
    init_h: initial state tensor for the cell output.
    init_c: initial state tensor for the cell hidden state.
    kernel: weights for cell kernel.
    recurrent_kernel: weights for cell recurrent kernel.
    bias: weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Boolean tensor for mask out the steps within sequence.
    time_major: boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    state_1: the cell hidden state, which has same shape as init_c.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, bias)

    z0, z1, z2, z3 = array_ops.split(z, 4, axis=1)

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=timesteps)
  return last_output, outputs, new_states[0], new_states[1], _runtime('cpu')
Ejemplo n.º 16
0
    def call(self, inputs, verbose=False, mask=None):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        # 注意,encoder_out_seq是一个数组,长度是seq;decoder_out_seq是一个输出。
        encoder_out_seq, decoder_out_seq = inputs

        logger.debug("encoder_out_seq.shape:%r", encoder_out_seq.shape)
        logger.debug("decoder_out_seq.shape:%r", decoder_out_seq.shape)

        encoder_out_seq = _p(encoder_out_seq,
                             "注意力调用:入参编码器输出序列:encoder_out_seq")
        decoder_out_seq = _p(decoder_out_seq,
                             "注意力调用:入参解码器输出序列:decoder_out_seq")

        # 实现了能量函数,e_tj=V * tanh ( W * h_j + U * S_t-1 + b )
        # inputs,我理解就是所有的h_j,错!我之前理解错了,这个参数是指某个时刻t,对应的输入!不是所有,是某个时刻的输入。
        #        按为什么还有个s,input+s,是因为batch。
        # states,我理解就是S_t-1
        # decode_outs是不包含seq的,不是一个decode_out_seq,而是decode_out,为何加s呢,是因为batch
        # 但是每一步都是encoder_out_seq全都参与运算的,
        # decoder_out一个和encoder_out_seq一串,对
        def energy_step(decode_outs, states):  # decode_outs(batch,dim)

            # decoder_seq [N,30,512] 30是字符串长度
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]  # 30, 512
            de_hidden = decode_outs.shape[-1]
            #  W * h_j
            reshaped_enc_outputs = K.reshape(
                encoder_out_seq, (-1, en_hidden))  #[b,64,512]=> [b*64,512]

            # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512]
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))

            # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512]    => [b,1,512]
            U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a),
                                      axis=1)  # <= batch_size, 1, latent_dim

            # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程

            # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512]
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))

            # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64]
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))

            e_i = K.softmax(e_i)

            return e_i, [e_i]

        # 这个step函数有意思,特别要关注他的入参:
        # encoder_out_seq: 编码器的各个time sequence的输出h_i,[batch,ts,dim]
        # states:
        # inputs:某个时刻,这个rnn的输入,这里,恰好是之前那个能量函数eij对应这个时刻的概率
        # ----------------------------
        # "step_do 这个函数,这个函数接受两个输入:step_in 和 states。
        #   其中 step_in 是一个 (batch_size, input_dim) 的张量(去掉了(batch,seq,input_dim)中的seq
        #   代表当前时刻的样本 xt,而 states 是一个 list,代表 yt−1 及一些中间变量。"
        # e 是30次中的一次,他是一个64维度的概率向量
        def context_step(e, states):  # e (batch,dim),其实每个输入就是一个e

            # encoder_out_seq[b,64,512] * e[64,1]
            # dot是矩阵相乘,*是对应位置元素相乘
            # [b,64,512] * e[64,1]shape不一样,居然也可以乘,我试了,没问题 ===> [b,512,1]
            # 其实,就是实现了encoder ouput根据softmax概率分布,加权求和
            c_i = K.sum(encoder_out_seq * K.expand_dims(e, -1), axis=1)
            c_i = _p(c_i, "context_step:c_i,算h的期望,也就是注意力了")
            return c_i, [c_i]

        #    (batch_size, enc_seq_len, hidden_size) (b,64,512)
        # => (batch_size, hidden_size)
        # 这个函数是,作为GRU的初始状态值,
        def create_inital_state(inputs, hidden_size):  # hidden_size=64
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(
                inputs)  # [b,64,512]<= (batch_size, enc_seq_len, latent_dim)
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            fake_state = tile(
                fake_state,
                [1, hidden_size])  # <= (batch_size, latent_dim) (b,64)
            return fake_state

        # encoder_out_seq = (batch_size, enc_seq_len, latent_dim)
        # fake_state_c ==   (batch_size, latent_dim)
        # fake_state_e ==   (batch_size, enc_seq) , 最后这个维度不好理解,其实就是attention模型里面的decoder对应的每个步骤的attention这个序列,是一个值
        # K.rnn(计算函数,输入x,初始状态): K.rnn 这个函数,接受三个基本参数,其中第一个参数就是刚才写好的 step_do 函数,第二个参数则是输入的时间序列,第三个是初始态
        # 这个rnn就是解码器,输入 eji=a(s_i-1,hj),其中j要遍历一遍,这个k.rnn就是把每个hj对应的eij都计算一遍
        # 输出e_outputs,就是一个概率序列
        # eij(i不变,j是一个encoder的h下标),灌入到一个新的rnn中,让他计算出对应的输出,这个才是真正的Decoder!!!
        # shape[1]是seq=64,序列长度
        fake_state_e = create_inital_state(
            encoder_out_seq, encoder_out_seq.shape[1]
        )  # encoder_out_seq.shape[1]) , fake_state_e (batch,enc_seq_len)
        fake_state_e = _p(fake_state_e, "fake_state_e")

        # 输出是一个e的序列,是对一个时刻而言的
        # K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|
        # 这个步骤是做了30次(decoder,也就是字符串长度),每次得到一个64维度(encoder的time_sequence)的概率向量
        last_out, e_outputs, _ = K.rnn(
            step_function=energy_step,
            inputs=decoder_out_seq,
            initial_states=[fake_state_e],  #[b,64]
        )
        # e_outputs [30,64]

        e_outputs = _p(e_outputs, "能量函数e输出")
        # shape[-1]是encoder的隐含层

        fake_state_c = create_inital_state(encoder_out_seq,
                                           encoder_out_seq.shape[-1])  #

        # K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|K.rnn|
        last_out, c_outputs, _ = K.rnn(  # context_step算注意力的期望,sum(eij*encoder_out), 输出的(batch,encoder_seq,)
            step_function=context_step,
            inputs=e_outputs,
            initial_states=[fake_state_c],  # [b,1024]
        )
        # c_outputs [b,64,512]
        c_outputs = _p(c_outputs, "注意力c输出Shape")

        # 输出:
        # 注意力c_outputs的向量(batch,图像seq,512),
        # 注意力e_outputs的向量(batch,图像seq),
        # 能量函数e输出::::[3 29 64]
        # 注意力c输出::::  [3 29 1024]
        return c_outputs, e_outputs
Ejemplo n.º 17
0
    def call(self, inputs):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs

        logger.debug(f"encoder_out_seq.shape = {encoder_out_seq.shape}")
        logger.debug(f"decoder_out_seq.shape = {decoder_out_seq.shape}")

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            logger.debug("Running energy computation step")

            if not isinstance(states, (list, tuple)):
                raise TypeError(
                    f"States must be an iterable. Got {states} of type {type(states)}"
                )

            encoder_full_seq = states[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_full_seq, self.W_a)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim

            logger.debug(f"U_a_dot_h.shape = {U_a_dot_h.shape}")
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)

            logger.debug(f"Ws_plus_Uh.shape = {Ws_plus_Uh.shape}")
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            logger.debug(f"ei.shape = {e_i.shape}")

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            logger.debug("Running attention vector computation step")

            if not isinstance(states, (list, tuple)):
                raise TypeError(
                    f"States must be an iterable. Got {states} of type {type(states)}"
                )

            encoder_full_seq = states[-1]

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_full_seq * K.expand_dims(inputs, -1), axis=1)

            logger.debug(f"ci.shape = {c_i.shape}")

            return c_i, [c_i]

        # we don't maintain states between steps when computing attention
        # attention is stateless, so we're passing a fake state for RNN step function
        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq,
                             axis=2)  # <= (batch_size, enc_seq_len, latent_dim
        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(energy_step,
                                       decoder_out_seq, [fake_state_e],
                                       constants=[encoder_out_seq])
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(context_step,
                                       e_outputs, [fake_state_c],
                                       constants=[encoder_out_seq])

        return c_outputs, e_outputs
Ejemplo n.º 18
0
def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias,
                  activation, recurrent_activation, mask, time_major,
                  go_backwards, sequence_lengths, zero_output_for_mask):
  """LSTM with standard kernel implementation.

  This implementation can be run on all types for hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Note that the first half of the bias tensor should be ignored by this impl.
  The CuDNN impl need an extra set of input gate bias. In order to make the both
  function take same shape of parameter, that extra set of bias is also feed
  here.

  Args:
    inputs: input tensor of LSTM layer.
    init_h: initial state tensor for the cell output.
    init_c: initial state tensor for the cell hidden state.
    kernel: weights for cell kernel.
    recurrent_kernel: weights for cell recurrent kernel.
    bias: weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Boolean tensor for mask out the steps within sequence.
    time_major: boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.
    zero_output_for_mask: Boolean, whether to output zero for masked timestep.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    state_1: the cell hidden state, which has same shape as init_c.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  def step(cell_inputs, cell_states):
        return LSTM_step(cell_inputs, cell_states, kernel, recurrent_kernel, bias)

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=(sequence_lengths
                    if sequence_lengths is not None else timesteps),
      zero_output_for_mask=zero_output_for_mask)
  return (last_output, outputs, new_states[0], new_states[1],
          _runtime(_RUNTIME_CPU))
Ejemplo n.º 19
0
    def call(self, inputs, training=None, mask=None):
        kwargs = {}
        if generic_utils.has_arg(self.layer.call, 'training'):
            kwargs['training'] = training

        input_shape = nest.map_structure(
            lambda x: tensor_shape.TensorShape(backend.int_shape(x)), inputs)
        batch_size = tf_utils.convert_shapes(input_shape)
        batch_size = nest.flatten(batch_size)[0]
        if batch_size and not self._always_use_reshape:
            inputs, row_lengths = backend.convert_inputs_if_ragged(inputs)
            is_ragged_input = row_lengths is not None
            input_length = tf_utils.convert_shapes(input_shape)
            input_length = nest.flatten(input_length)[1]

            # batch size matters, use rnn-based implementation
            def step(x, _):
                output = self.layer(x, **kwargs)
                return output, []

            _, outputs, _ = backend.rnn(step,
                                        inputs,
                                        initial_states=[],
                                        input_length=row_lengths[0]
                                        if is_ragged_input else input_length,
                                        mask=mask,
                                        unroll=False)
            # pylint: disable=g-long-lambda
            y = nest.map_structure(
                lambda output: backend.maybe_convert_to_ragged(
                    is_ragged_input, output, row_lengths), outputs)
        else:
            # No batch size specified, therefore the layer will be able
            # to process batches of any size.
            # We can go with reshape-based implementation for performance.
            is_ragged_input = nest.map_structure(
                lambda x: isinstance(x, ragged_tensor.RaggedTensor), inputs)
            is_ragged_input = nest.flatten(is_ragged_input)
            if all(is_ragged_input):
                input_values = nest.map_structure(lambda x: x.values, inputs)
                input_row_lenghts = nest.map_structure(
                    lambda x: x.nested_row_lengths()[0], inputs)
                y = self.layer(input_values, **kwargs)
                y = nest.map_structure(
                    ragged_tensor.RaggedTensor.from_row_lengths, y,
                    input_row_lenghts)
            elif any(is_ragged_input):
                raise ValueError(
                    'All inputs has to be either ragged or not, '
                    'but not mixed. You passed: {}'.format(inputs))
            else:
                input_length = tf_utils.convert_shapes(input_shape)
                input_length = nest.flatten(input_length)[1]
                if not input_length:
                    input_length = nest.map_structure(
                        lambda x: array_ops.shape(x)[1], inputs)
                    input_length = generic_utils.to_list(
                        nest.flatten(input_length))[0]

                inner_input_shape = nest.map_structure(
                    lambda x: self._get_shape_tuple((-1, ), x, 2), inputs)
                # Shape: (num_samples * timesteps, ...). And track the
                # transformation in self._input_map.
                inputs = nest.map_structure_up_to(inputs, array_ops.reshape,
                                                  inputs, inner_input_shape)
                # (num_samples * timesteps, ...)
                if generic_utils.has_arg(self.layer.call,
                                         'mask') and mask is not None:
                    inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
                    kwargs['mask'] = backend.reshape(mask, inner_mask_shape)

                y = self.layer(inputs, **kwargs)

                # Shape: (num_samples, timesteps, ...)
                output_shape = self.compute_output_shape(input_shape)
                # pylint: disable=g-long-lambda
                output_shape = nest.map_structure(
                    lambda tensor, int_shape: self._get_shape_tuple(
                        (-1, input_length), tensor, 1, int_shape[2:]), y,
                    output_shape)
                y = nest.map_structure_up_to(y, array_ops.reshape, y,
                                             output_shape)
                if not context.executing_eagerly():
                    # Set the static shape for the result since it might be lost during
                    # array_ops reshape, eg, some `None` dim in the result could be
                    # inferred.
                    nest.map_structure_up_to(
                        y, lambda tensor, shape: tensor.set_shape(shape), y,
                        self.compute_output_shape(input_shape))

        return y
Ejemplo n.º 20
0
def standard_gru(inputs, init_h, kernel, recurrent_kernel, bias, activation,
                 recurrent_activation, mask, time_major, go_backwards):
  """GRU with standard kernel implementation.

  This implementation can be run on all types of hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Arguments:
    inputs: Input tensor of GRU layer.
    init_h: Initial state tensor for the cell output.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. The bias contains the
      combined input_bias and recurrent_bias.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Binary tensor of shape `(samples, timesteps)` indicating whether
      a given timestep should be masked.
    time_major: Boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  input_bias, recurrent_bias = array_ops.unstack(bias)

  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]

    # inputs projected by all gate matrices at once
    matrix_x = K.dot(cell_inputs, kernel)
    matrix_x = K.bias_add(matrix_x, input_bias)

    x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)

    # hidden state projected by all gate matrices at once
    matrix_inner = K.dot(h_tm1, recurrent_kernel)
    matrix_inner = K.bias_add(matrix_inner, recurrent_bias)

    recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3,
                                                            axis=1)
    z = recurrent_activation(x_z + recurrent_z)
    r = recurrent_activation(x_r + recurrent_r)
    hh = activation(x_h + r * recurrent_h)

    # previous and candidate state mixed by update gate
    h = z * h_tm1 + (1 - z) * hh
    return h, [h]

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=timesteps)
  return last_output, outputs, new_states[0], _runtime('cpu')
Ejemplo n.º 21
0
    def call(self,
             inputs,
             mask=None,
             training=None,
             initial_state=None,
             constants=None):
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.
        inputs, initial_state, constants = self._process_inputs(
            inputs, initial_state, constants)

        if isinstance(mask, list):
            mask = mask[0]
        timesteps = K.int_shape(inputs)[1]

        kwargs = {}
        if generic_utils.has_arg(self.cell.call, 'training'):
            kwargs['training'] = training

        if constants:
            if not generic_utils.has_arg(self.cell.call, 'constants'):
                raise ValueError('RNN cell does not support constants')

            def step(inputs, states):
                constants = states[-self._num_constants:]  # pylint: disable=invalid-unary-operand-type
                states = states[:-self._num_constants]  # pylint: disable=invalid-unary-operand-type
                return self.cell.call(inputs,
                                      states,
                                      constants=constants,
                                      **kwargs)
        else:

            def step(inputs, states):
                return self.cell.call(inputs, states, **kwargs)

        last_output, outputs, states = K.rnn(step,
                                             inputs,
                                             initial_state,
                                             constants=constants,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             input_length=timesteps)
        if self.stateful:
            updates = [
                K.update(self_state, state)
                for self_state, state in zip(self.states, states)
            ]
            self.add_update(updates)

        if self.return_sequences:
            output = outputs
        else:
            output = last_output

        if self.return_state:
            if not isinstance(states, (list, tuple)):
                states = [states]
            else:
                states = list(states)
            return [output] + states
        else:
            return output
Ejemplo n.º 22
0
    def call(self, inputs, initial_state=None, training=None, **kwargs):
        memory, dec_inputs = inputs

        dec_original_shape = K.shape(dec_inputs)

        dec_inputs_reshaped = K.reshape(
            dec_inputs,
            [dec_original_shape[0], -1, self.n_mels * self.output_per_step])
        go_frame = K.zeros_like(dec_inputs_reshaped[:, 1, :])
        dec_inputs_with_goframe = K.concatenate(
            [K.expand_dims(go_frame, axis=1), dec_inputs_reshaped[:, :-1, :]],
            axis=1)

        values = memory  # TODO mask option for memory
        keys = self.attention_mechanism.memory_layer(memory)

        if training is None:
            training = K.learning_phase()

        if initial_state is None:
            initial_state = [
                go_frame,
                K.sum(K.zeros_like(memory), axis=1),
                K.sum(K.zeros_like(memory), axis=2),
                self.attn_rnn_cell.get_initial_state(
                    batch_size=dec_original_shape[0], dtype=dec_inputs.dtype),
                self.decoderRNNCell1.get_initial_state(
                    batch_size=dec_original_shape[0], dtype=dec_inputs.dtype),
                self.decoderRNNCell2.get_initial_state(
                    batch_size=dec_original_shape[0], dtype=dec_inputs.dtype)
            ]

        def step(dec_input, states):
            (prev_output, prev_attention, prev_alignment, prev_attn_rnn_state,
             prev_dec_rnn1_state, prev_dec_rnn2_state) = states

            dec_input = K.switch(training, dec_input, prev_output)

            prenet_out = self.prenet(dec_input)
            cell_inputs = K.concatenate([prenet_out, prev_attention], axis=-1)
            cell_out, next_attn_rnn_state = self.attn_rnn_cell(
                cell_inputs, [prev_attn_rnn_state])
            next_attention, next_alignment = self.attention_mechanism(
                [cell_out, values, keys])
            concatenated = K.concatenate([next_attention, cell_out], axis=-1)
            projected = self.projection(concatenated)
            dec_rnn1_out, next_dec_rnn1_state = self.decoderRNNCell1(
                projected, [prev_dec_rnn1_state])
            res_conn1 = projected + dec_rnn1_out
            dec_rnn2_out, next_dec_rnn2_state = self.decoderRNNCell2(
                res_conn1, [prev_dec_rnn2_state])
            res_conn2 = res_conn1 + dec_rnn2_out
            next_output = self.output_projection(res_conn2)

            return [next_output, next_alignment], [
                next_output, next_attention, next_alignment,
                next_attn_rnn_state, next_dec_rnn1_state, next_dec_rnn2_state
            ]

        _, all_outputs, _ = K.rnn(step, dec_inputs_with_goframe, initial_state)
        dec_outputs = K.reshape(
            all_outputs[0],
            (dec_original_shape[0], dec_original_shape[1], self.n_mels))
        alignments = all_outputs[1]

        return dec_outputs, alignments
Ejemplo n.º 23
0
def standard_gru(inputs, init_h, kernel, recurrent_kernel, bias, activation,
                 recurrent_activation, mask, time_major, go_backwards):
  """GRU with standard kernel implementation.

  This implementation can be run on all types of hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Arguments:
    inputs: Input tensor of GRU layer.
    init_h: Initial state tensor for the cell output.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. The bias contains the
      combined input_bias and recurrent_bias.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Binary tensor of shape `(samples, timesteps)` indicating whether
      a given timestep should be masked.
    time_major: Boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  input_bias, recurrent_bias = array_ops.unstack(bias)

  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]

    # inputs projected by all gate matrices at once
    matrix_x = K.dot(cell_inputs, kernel)
    matrix_x = K.bias_add(matrix_x, input_bias)

    x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)

    # hidden state projected by all gate matrices at once
    matrix_inner = K.dot(h_tm1, recurrent_kernel)
    matrix_inner = K.bias_add(matrix_inner, recurrent_bias)

    recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3,
                                                            axis=1)
    z = recurrent_activation(x_z + recurrent_z)
    r = recurrent_activation(x_r + recurrent_r)
    hh = activation(x_h + r * recurrent_h)

    # previous and candidate state mixed by update gate
    h = z * h_tm1 + (1 - z) * hh
    return h, [h]

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=timesteps)
  return last_output, outputs, new_states[0], _runtime(_RUNTIME_CPU)
Ejemplo n.º 24
0
  def call(self,
           inputs,
           mask=None,
           training=None,
           initial_state=None,
           constants=None):
    # note that the .build() method of subclasses MUST define
    # self.input_spec and self.state_spec with complete input shapes.
    if isinstance(inputs, list):
      inputs = inputs[0]
    if initial_state is not None:
      pass
    elif self.stateful:
      initial_state = self.states
    else:
      initial_state = self.get_initial_state(inputs)

    if isinstance(mask, list):
      mask = mask[0]

    if len(initial_state) != len(self.states):
      raise ValueError('Layer has ' + str(len(self.states)) +
                       ' states but was passed ' +
                       str(len(initial_state)) +
                       ' initial states.')
    timesteps = K.int_shape(inputs)[1]

    kwargs = {}
    if generic_utils.has_arg(self.cell.call, 'training'):
      kwargs['training'] = training

    if constants:
      if not generic_utils.has_arg(self.cell.call, 'constants'):
        raise ValueError('RNN cell does not support constants')

      def step(inputs, states):
        constants = states[-self._num_constants:]
        states = states[:-self._num_constants]
        return self.cell.call(inputs, states, constants=constants,
                              **kwargs)
    else:
      def step(inputs, states):
        return self.cell.call(inputs, states, **kwargs)

    last_output, outputs, states = K.rnn(step,
                                         inputs,
                                         initial_state,
                                         constants=constants,
                                         go_backwards=self.go_backwards,
                                         mask=mask,
                                         input_length=timesteps)
    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(K.update(self.states[i], states[i]))
      self.add_update(updates, inputs=True)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    # Properly set learning phase
    if getattr(last_output, '_uses_learning_phase', False):
      output._uses_learning_phase = True

    if self.return_state:
      if not isinstance(states, (list, tuple)):
        states = [states]
      else:
        states = list(states)
      return [output] + states
    else:
      return output
Ejemplo n.º 25
0
def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias,
                  activation, recurrent_activation, mask, time_major,
                  go_backwards):
  """LSTM with standard kernel implementation.

  This implementation can be run on all types for hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Note that the first half of the bias tensor should be ignored by this impl.
  The CuDNN impl need an extra set of input gate bias. In order to make the both
  function take same shape of parameter, that extra set of bias is also feed
  here.

  Args:
    inputs: input tensor of LSTM layer.
    init_h: initial state tensor for the cell output.
    init_c: initial state tensor for the cell hidden state.
    kernel: weights for cell kernel.
    recurrent_kernel: weights for cell recurrent kernel.
    bias: weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Boolean tensor for mask out the steps within sequence.
    time_major: boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    state_1: the cell hidden state, which has same shape as init_c.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, bias)

    z0, z1, z2, z3 = array_ops.split(z, 4, axis=1)

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=timesteps)
  return (last_output, outputs, new_states[0], new_states[1],
          _runtime(_RUNTIME_CPU))
Ejemplo n.º 26
0
  def call(self,
           inputs,
           mask=None,
           training=None,
           initial_state=None,
           constants=None):
    # note that the .build() method of subclasses MUST define
    # self.input_spec and self.state_spec with complete input shapes.
    if isinstance(inputs, list):
      inputs = inputs[0]
    if initial_state is not None:
      pass
    elif self.stateful:
      initial_state = self.states
    else:
      initial_state = self.get_initial_state(inputs)

    if isinstance(mask, list):
      mask = mask[0]

    if len(initial_state) != len(self.states):
      raise ValueError('Layer has ' + str(len(self.states)) +
                       ' states but was passed ' +
                       str(len(initial_state)) +
                       ' initial states.')
    timesteps = K.int_shape(inputs)[1]

    kwargs = {}
    if generic_utils.has_arg(self.cell.call, 'training'):
      kwargs['training'] = training

    if constants:
      if not generic_utils.has_arg(self.cell.call, 'constants'):
        raise ValueError('RNN cell does not support constants')

      def step(inputs, states):
        constants = states[-self._num_constants:]
        states = states[:-self._num_constants]
        return self.cell.call(inputs, states, constants=constants,
                              **kwargs)
    else:
      def step(inputs, states):
        return self.cell.call(inputs, states, **kwargs)
    
    last_output, outputs, states = K.rnn(step,
                                         inputs,
                                         initial_state,
                                         constants=constants,
                                         go_backwards=self.go_backwards,
                                         mask=mask,
                                         input_length=timesteps)
    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(K.update(self.states[i], states[i]))
      self.add_update(updates)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      if not isinstance(states, (list, tuple)):
        states = [states]
      else:
        states = list(states)
      return [output] + states
    else:
      return output
Ejemplo n.º 27
0
    def call(self, inputs, verbose=False):
        """
        inputs: {"values": [encoder_output_sequence], "query": decoder_output_sequence}
        """
        assert isinstance(inputs, dict)
        assert 'values' in inputs
        assert 'query' in inputs

        if not isinstance(inputs["values"], list):
            inputs["values"] = [inputs["values"]]

        encoder_out_seq, decoder_out_seq = inputs["values"], inputs["query"]
        if verbose:
            print('encoder_out_seq>',
                  [K.int_shape(eos) for eos in encoder_out_seq])
            print('decoder_out_seq>', K.int_shape(decoder_out_seq))

        # [h . W_h] of shape [(batch_size, hi, d3)]
        W_hi = [
            K.dot(eos, self.W_h[i]) for i, eos in enumerate(encoder_out_seq)
        ]

        def broadcast_sum(a, b):
            # Going from (batch_size, a1, d3) to (batch_size, a1, 1, d3)
            a = K.expand_dims(a, 2)
            # Going from (batch_size, b1, d3) to (batch_size, 1, b1, d3)
            b = K.expand_dims(b, 1)
            #  Will be broadcast to (batch_size, a1, b1, d3)
            c = a + b
            # Going from (batch_size, a1, b1, d3) to (batch_size, a1*b2, d3)
            cs = K.shape(c)
            new_shape = K.concatenate([cs[0:1], cs[1:2] * cs[2:3], cs[3:4]])
            return K.reshape(c, new_shape)

        # (batch_size, h1*h2*...*hn, d3)
        W_hi = functools.reduce(broadcast_sum, W_hi)

        def broadcast_concat(a, b):
            a_shape = K.shape(a)
            b_shape = K.shape(b)

            # Going from (batch_size, a1, lat_a) to (batch_size, a1, 1, lat_a)
            a = K.expand_dims(a, 2)
            # Going from (batch_size, b1, lat_b) to (batch_size, 1, b1, lat_b)
            b = K.expand_dims(b, 1)

            # (batch_size, b1, lat_a)
            a_complement = tf.zeros(
                K.concatenate([
                    a_shape[0:1],
                    b_shape[1:2],
                    a_shape[2:3],
                ]))
            # (batch_size, a1, lat_b)
            b_complement = tf.zeros(
                K.concatenate([
                    b_shape[0:1],
                    a_shape[1:2],
                    b_shape[2:3],
                ]))

            # Going from (batch_size, b1, lat_a) to (batch_size, 1, b1, lat_a)
            a_complement = K.expand_dims(a_complement, 1)
            # Going from (batch_size, a1, lat_b) to (batch_size, a1, 1, lat_b)
            b_complement = K.expand_dims(b_complement, 2)

            # Just to broadcast
            a = a + a_complement
            b = b + b_complement

            # (batch_size, a1, b1, lat_a+lat_b)
            c = K.concatenate([a, b])
            c_shape = K.shape(c)
            # (batch_size, a1*b1, lat_a+lat_b)
            r = K.reshape(
                c,
                K.concatenate([
                    c_shape[0:1],
                    c_shape[1:2] * c_shape[2:3],
                    c_shape[3:4],
                ]))
            print("r.shape: ", r.shape)
            return r

        # (batch_size, h1*h2*...*hn, lat1+lat2+...+latn) for later
        hiddens_combined = functools.reduce(broadcast_concat, encoder_out_seq)

        if verbose:
            print('wa.s>', K.int_shape(W_hi))

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            # input: (batch_size, latent_dim)
            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Computing sj.Ua """
            # (batch_size, 1, d3)
            U_a_dot_s = K.expand_dims(K.dot(inputs, self.U_a), 1)
            if verbose:
                print('Ua.h>', K.int_shape(U_a_dot_s))
            """ tanh(h.Wa + s.Ua) """
            # (batch_size, h1*h2*...*hn, d3) = (batch_size, h1*h2*...*hn, d3) + (batch_size, 1, d3)
            Wh_plus_Us = K.tanh(W_hi + U_a_dot_s)
            # (batch_size, d3, h1*h2*...*hn)
            Wh_plus_Us = K.permute_dimensions(Wh_plus_Us, (0, 2, 1))
            if verbose:
                print('Wh+Us>', K.int_shape(Wh_plus_Us))
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # (1, batch_size, h1*h2*...*hn) = (1, d3) . (batch_size, d3, h1*h2*...*hn)
            Wh_plus_Us_dot_Va = K.dot(self.V_a, Wh_plus_Us)
            # (batch_size, h1*h2*...*hn)
            e_i = K.squeeze(Wh_plus_Us_dot_Va, 0)
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', K.int_shape(e_i))

            # (batch_size, h1*h2*...*hn)
            return e_i, states

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # (batch_size, lat1+lat2+...+latn) = (batch_size, h1*h2*...*hn, lat1+lat2+...+latn) * (batch_size, h1*h2*...*hn, 1)
            c_i = K.sum(hiddens_combined * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', K.int_shape(c_i))
            return c_i, states

        # (batch_size, enc_seq_len, latent_dim   encoder_out_seq.shape[1]
        fake_state_e = K.zeros_like(K.placeholder(shape=(1, 1)))
        fake_state_c = K.zeros_like(K.placeholder(shape=(1, 1)))
        """ Computing energy outputs """
        # e_outputs: (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step,
            decoder_out_seq,
            [fake_state_e],
        )
        """ Computing context vectors """
        # c_outputs: (batch_size, de_seq_len, lat1+lat2+...+latn)
        last_out, c_outputs, _ = K.rnn(
            context_step,
            e_outputs,
            [fake_state_c],
        )

        return c_outputs, e_outputs
Ejemplo n.º 28
0
    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            if verbose:
                print('Ws+Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs
Ejemplo n.º 29
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # The input should be dense, padded with zeros. If a ragged input is fed
    # into the layer, it is padded and the row lengths are used for masking.
    inputs, row_lengths = K.convert_inputs_if_ragged(inputs)
    is_ragged_input = (row_lengths is not None)
    self._validate_args_if_ragged(is_ragged_input, mask)

    # LSTM does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self._could_use_gpu_kernel:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}
      self._maybe_reset_cell_dropout_mask(self.cell)

      def step(inputs, states):
        return self.cell(inputs, states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=row_lengths if row_lengths is not None else timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      runtime = _runtime(_RUNTIME_UNKNOWN)
    else:
      # Use the new defun approach for backend implementation swap.
      # Note that different implementations need to have same function
      # signature, eg, the tensor parameters need to have same shape and dtypes.
      # Since the CuDNN has an extra set of bias, those bias will be passed to
      # both normal and CuDNN implementations.
      self.reset_dropout_mask()
      dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
      if dropout_mask is not None:
        inputs = inputs * dropout_mask[0]
      gpu_lstm_kwargs = {
          'inputs': inputs,
          'init_h': initial_state[0],
          'init_c': initial_state[1],
          'kernel': self.cell.kernel,
          'recurrent_kernel': self.cell.recurrent_kernel,
          'bias': self.cell.bias,
          'mask': mask,
          'time_major': self.time_major,
          'go_backwards': self.go_backwards,
          'sequence_lengths': row_lengths
      }
      normal_lstm_kwargs = gpu_lstm_kwargs.copy()
      normal_lstm_kwargs.update({
          'activation': self.activation,
          'recurrent_activation': self.recurrent_activation,
          'zero_output_for_mask': self.zero_output_for_mask,
      })

      if context.executing_eagerly():
        device_type = _get_context_device_type()
        can_use_gpu = (
            # Either user specified GPU or unspecified but GPU is available.
            (device_type == _GPU_DEVICE_NAME
             or (device_type is None and context.num_gpus() > 0))
            and
            (mask is None or is_sequence_right_padded(mask, self.time_major)))
        # Under eager context, check the device placement and prefer the
        # GPU implementation when GPU is available.
        if can_use_gpu:
          last_output, outputs, new_h, new_c, runtime = gpu_lstm(
              **gpu_lstm_kwargs)
        else:
          last_output, outputs, new_h, new_c, runtime = standard_lstm(
              **normal_lstm_kwargs)
      else:
        (last_output, outputs, new_h, new_c,
         runtime) = lstm_with_backend_selection(**normal_lstm_kwargs)

      states = [new_h, new_c]

    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(state_ops.assign(self.states[i], states[i]))
      self.add_update(updates)

    if self.return_sequences:
      output = K.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths)
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self.return_runtime:
      return output, runtime
    else:
      return output
Ejemplo n.º 30
0
    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>', W_a_dot_s.shape)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        def create_inital_state(inputs, hidden_size):
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(
                inputs)  # <= (batch_size, enc_seq_len, latent_dim
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            fake_state = K.tile(fake_state,
                                [1, hidden_size])  # <= (batch_size, latent_dim
            return fake_state

        fake_state_c = create_inital_state(encoder_out_seq,
                                           encoder_out_seq.shape[-1])
        fake_state_e = create_inital_state(
            encoder_out_seq, encoder_out_seq.shape[1]
        )  # <= (batch_size, enc_seq_len, latent_dim
        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step,
            decoder_out_seq,
            [fake_state_e],
        )
        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step,
            e_outputs,
            [fake_state_c],
        )

        return c_outputs, e_outputs
    def call(self, inputs, mask=None, training=None, initial_state=None):
        # LSTM does not support constants. Ignore it during process.
        inputs, initial_state, _ = self._process_inputs(
            inputs, initial_state, None)

        if isinstance(mask, list):
            mask = mask[0]

        input_shape = K.int_shape(inputs)
        timesteps = input_shape[0] if self.time_major else input_shape[1]

        if mask is not None or not self.could_use_cudnn:
            # CuDNN does not support masking, fall back to use the normal LSTM.
            kwargs = {'training': training}

            def step(inputs, states):
                return self.cell.call(inputs, states, **kwargs)

            last_output, outputs, states = K.rnn(
                step,
                inputs,
                initial_state,
                constants=None,
                go_backwards=self.go_backwards,
                mask=mask,
                unroll=self.unroll,
                input_length=timesteps,
                time_major=self.time_major,
                zero_output_for_mask=self.zero_output_for_mask)
            runtime = _runtime('unknown')
        else:
            # Use the new defun approach for backend implementation swap.
            # Note that different implementations need to have same function
            # signature, eg, the tensor parameters need to have same shape and dtypes.
            # Since the CuDNN has an extra set of bias, those bias will be passed to
            # both normal and CuDNN implementations.
            if self.go_backwards:
                # Reverse time axis.
                inputs = K.reverse(inputs, 0 if self.time_major else 1)

            self.reset_dropout_mask()
            dropout_mask = self.get_dropout_mask_for_cell(inputs,
                                                          training,
                                                          count=4)
            if dropout_mask is not None:
                inputs *= dropout_mask[0]

            if context.executing_eagerly():
                device_type = _get_context_device_type()
                if device_type == _GPU_DEVICE_NAME or (device_type is None and
                                                       context.num_gpus() > 0):
                    # Under eager context, check the device placement and prefer the
                    # GPU implementation when GPU is available.
                    last_output, outputs, new_h, new_c, runtime = cudnn_lstm(
                        inputs, initial_state[0], initial_state[1],
                        self.cell.kernel, self.cell.recurrent_kernel,
                        self.cell.bias, self.time_major)
                else:
                    last_output, outputs, new_h, new_c, runtime = standard_lstm(
                        inputs, initial_state[0], initial_state[1],
                        self.cell.kernel, self.cell.recurrent_kernel,
                        self.cell.bias, self.activation,
                        self.recurrent_activation, self.time_major)
            else:
                # Each time a `tf.function` is called, we will give it a unique
                # identifiable API name, so that Grappler won't get confused when it
                # sees multiple LSTM layers added into same graph, and it will be able
                # to pair up the different implementations across them.
                api_name = 'lstm_' + str(uuid.uuid4())
                defun_standard_lstm = _generate_defun_backend(
                    api_name, _CPU_DEVICE_NAME, standard_lstm)
                defun_cudnn_lstm = _generate_defun_backend(
                    api_name, _GPU_DEVICE_NAME, cudnn_lstm)

                # Call the normal LSTM impl and register the CuDNN impl function. The
                # grappler will kick in during session execution to optimize the graph.
                last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(
                    inputs, initial_state[0], initial_state[1],
                    self.cell.kernel, self.cell.recurrent_kernel,
                    self.cell.bias, self.activation, self.recurrent_activation,
                    self.time_major)

                function.register(defun_cudnn_lstm, inputs, initial_state[0],
                                  initial_state[1], self.cell.kernel,
                                  self.cell.recurrent_kernel, self.cell.bias,
                                  self.time_major)
            states = [new_h, new_c]

        if self.stateful:
            updates = []
            for i in range(len(states)):
                updates.append(state_ops.assign(self.states[i], states[i]))
            self.add_update(updates, inputs)

        if self.return_sequences:
            output = outputs
        else:
            output = last_output

        if self.return_state:
            return [output] + list(states)
        elif self.return_runtime:
            return output, runtime
        else:
            return output