コード例 #1
0
    def _defun_gru_call(self, inputs, initial_state, training, mask,
                        sequence_lengths):
        # Use the new defun approach for backend implementation swap.
        # Note that different implementations need to have same function
        # signature, eg, the tensor parameters need to have same shape and dtypes.

        self.reset_dropout_mask()
        dropout_mask = self.get_dropout_mask_for_cell(inputs,
                                                      training,
                                                      count=3)
        if dropout_mask is not None:
            inputs = inputs * dropout_mask[0]

        if gru_lstm_utils.use_new_gru_lstm_impl():
            gru_kwargs = {
                "inputs":
                inputs,
                "init_h":
                gru_lstm_utils.read_variable_value(initial_state[0]),
                "kernel":
                gru_lstm_utils.read_variable_value(self.cell.kernel),
                "recurrent_kernel":
                gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel),
                "bias":
                gru_lstm_utils.read_variable_value(self.cell.bias),
                "mask":
                mask,
                "time_major":
                self.time_major,
                "go_backwards":
                self.go_backwards,
                "sequence_lengths":
                sequence_lengths,
                "zero_output_for_mask":
                self.zero_output_for_mask,
            }
            (
                last_output,
                outputs,
                new_h,
                runtime,
            ) = self._defun_wrapper.defun_layer(**gru_kwargs)
        else:
            gpu_gru_kwargs = {
                "inputs":
                inputs,
                "init_h":
                gru_lstm_utils.read_variable_value(initial_state[0]),
                "kernel":
                gru_lstm_utils.read_variable_value(self.cell.kernel),
                "recurrent_kernel":
                gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel),
                "bias":
                gru_lstm_utils.read_variable_value(self.cell.bias),
                "mask":
                mask,
                "time_major":
                self.time_major,
                "go_backwards":
                self.go_backwards,
                "sequence_lengths":
                sequence_lengths,
                "return_sequences":
                self.return_sequences,
            }
            normal_gru_kwargs = gpu_gru_kwargs.copy()
            normal_gru_kwargs.update({
                "zero_output_for_mask":
                self.zero_output_for_mask,
            })

            if tf.executing_eagerly():
                device_type = gru_lstm_utils.get_context_device_type()
                can_use_gpu = (
                    # Either user specified GPU or unspecified but GPU is available.
                    (device_type == gru_lstm_utils.GPU_DEVICE_NAME or
                     (device_type is None
                      and tf.config.list_logical_devices("GPU"))) and
                    (mask is None or gru_lstm_utils.is_cudnn_supported_inputs(
                        mask, self.time_major)))
                # Under eager context, check the device placement and prefer the
                if can_use_gpu:
                    last_output, outputs, new_h, runtime = gpu_gru(
                        **gpu_gru_kwargs)
                else:
                    last_output, outputs, new_h, runtime = standard_gru(
                        **normal_gru_kwargs)
            else:
                (
                    last_output,
                    outputs,
                    new_h,
                    runtime,
                ) = gru_with_backend_selection(**normal_gru_kwargs)

        states = [new_h]
        return last_output, outputs, runtime, states
コード例 #2
0
    def __init__(
        self,
        units,
        activation="tanh",
        recurrent_activation="sigmoid",
        use_bias=True,
        kernel_initializer="glorot_uniform",
        recurrent_initializer="orthogonal",
        bias_initializer="zeros",
        kernel_regularizer=None,
        recurrent_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        recurrent_constraint=None,
        bias_constraint=None,
        dropout=0.0,
        recurrent_dropout=0.0,
        return_sequences=False,
        return_state=False,
        go_backwards=False,
        stateful=False,
        unroll=False,
        time_major=False,
        reset_after=True,
        **kwargs,
    ):
        # return_runtime is a flag for testing, which shows the real backend
        # implementation chosen by grappler in graph mode.
        self._return_runtime = kwargs.pop("return_runtime", False)
        implementation = kwargs.pop("implementation", 2)
        if implementation == 0:
            logging.warning("`implementation=0` has been deprecated, "
                            "and now defaults to `implementation=2`."
                            "Please update your layer call.")
        if "enable_caching_device" in kwargs:
            cell_kwargs = {
                "enable_caching_device": kwargs.pop("enable_caching_device")
            }
        else:
            cell_kwargs = {}
        cell = GRUCell(
            units,
            activation=activation,
            recurrent_activation=recurrent_activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            recurrent_initializer=recurrent_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            recurrent_regularizer=recurrent_regularizer,
            bias_regularizer=bias_regularizer,
            kernel_constraint=kernel_constraint,
            recurrent_constraint=recurrent_constraint,
            bias_constraint=bias_constraint,
            dropout=dropout,
            recurrent_dropout=recurrent_dropout,
            implementation=implementation,
            reset_after=reset_after,
            dtype=kwargs.get("dtype"),
            trainable=kwargs.get("trainable", True),
            **cell_kwargs,
        )
        super().__init__(
            cell,
            return_sequences=return_sequences,
            return_state=return_state,
            go_backwards=go_backwards,
            stateful=stateful,
            unroll=unroll,
            time_major=time_major,
            **kwargs,
        )
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.input_spec = [InputSpec(ndim=3)]

        # GPU kernel uses following setting by default and not configurable.
        self._could_use_gpu_kernel = (
            self.activation in (activations.tanh, tf.tanh)
            and self.recurrent_activation in (activations.sigmoid, tf.sigmoid)
            and recurrent_dropout == 0 and not unroll and use_bias
            and reset_after
            and tf.compat.v1.executing_eagerly_outside_functions())
        if tf.config.list_logical_devices("GPU"):
            # Only show the message when there is GPU available, user will not care
            # about the cuDNN if there isn't any GPU.
            if self._could_use_gpu_kernel:
                logging.debug(gru_lstm_utils.CUDNN_AVAILABLE_MSG % self.name)
            else:
                logging.warning(gru_lstm_utils.CUDNN_NOT_AVAILABLE_MSG %
                                self.name)

        if gru_lstm_utils.use_new_gru_lstm_impl():
            self._defun_wrapper = gru_lstm_utils.DefunWrapper(
                time_major, go_backwards, "gru")
コード例 #3
0
ファイル: lstm.py プロジェクト: ttigong/keras
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # The input should be dense, padded with zeros. If a ragged input is fed
    # into the layer, it is padded and the row lengths are used for masking.
    inputs, row_lengths = backend.convert_inputs_if_ragged(inputs)
    is_ragged_input = (row_lengths is not None)
    self._validate_args_if_ragged(is_ragged_input, mask)

    # LSTM does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = backend.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self._could_use_gpu_kernel:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}
      self._maybe_reset_cell_dropout_mask(self.cell)

      def step(inputs, states):
        return self.cell(inputs, states, **kwargs)

      last_output, outputs, states = backend.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=row_lengths if row_lengths is not None else timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN)
    else:
      # Use the new defun approach for backend implementation swap.
      # Note that different implementations need to have same function
      # signature, eg, the tensor parameters need to have same shape and dtypes.
      # Since the cuDNN has an extra set of bias, those bias will be passed to
      # both normal and cuDNN implementations.
      self.reset_dropout_mask()
      dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
      if dropout_mask is not None:
        inputs = inputs * dropout_mask[0]
      if gru_lstm_utils.use_new_gru_lstm_impl():
        lstm_kwargs = {
            'inputs':
                inputs,
            'init_h':
                gru_lstm_utils.read_variable_value(initial_state[0]),
            'init_c':
                gru_lstm_utils.read_variable_value(initial_state[1]),
            'kernel':
                gru_lstm_utils.read_variable_value(self.cell.kernel),
            'recurrent_kernel':
                gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel),
            'bias':
                gru_lstm_utils.read_variable_value(self.cell.bias),
            'mask':
                mask,
            'time_major':
                self.time_major,
            'go_backwards':
                self.go_backwards,
            'sequence_lengths':
                row_lengths,
            'zero_output_for_mask':
                self.zero_output_for_mask,
        }
        (last_output, outputs, new_h, new_c,
         runtime) = self._defun_wrapper.defun_layer(**lstm_kwargs)
      else:
        gpu_lstm_kwargs = {
            'inputs':
                inputs,
            'init_h':
                gru_lstm_utils.read_variable_value(initial_state[0]),
            'init_c':
                gru_lstm_utils.read_variable_value(initial_state[1]),
            'kernel':
                gru_lstm_utils.read_variable_value(self.cell.kernel),
            'recurrent_kernel':
                gru_lstm_utils.read_variable_value(self.cell.recurrent_kernel),
            'bias':
                gru_lstm_utils.read_variable_value(self.cell.bias),
            'mask':
                mask,
            'time_major':
                self.time_major,
            'go_backwards':
                self.go_backwards,
            'sequence_lengths':
                row_lengths
        }
        normal_lstm_kwargs = gpu_lstm_kwargs.copy()
        normal_lstm_kwargs.update({
            'zero_output_for_mask': self.zero_output_for_mask,
        })

        if tf.executing_eagerly():
          device_type = gru_lstm_utils.get_context_device_type()
          can_use_gpu = (
              # Either user specified GPU or unspecified but GPU is available.
              (device_type == gru_lstm_utils.GPU_DEVICE_NAME or
               (device_type is None
                and tf.config.list_logical_devices('GPU'))) and
              (mask is None or
               gru_lstm_utils.is_cudnn_supported_inputs(mask, self.time_major)))
          # Under eager context, check the device placement and prefer the
          # GPU implementation when GPU is available.
          if can_use_gpu:
            last_output, outputs, new_h, new_c, runtime = gpu_lstm(
                **gpu_lstm_kwargs)
          else:
            last_output, outputs, new_h, new_c, runtime = standard_lstm(
                **normal_lstm_kwargs)
        else:
          (last_output, outputs, new_h, new_c,
           runtime) = lstm_with_backend_selection(**normal_lstm_kwargs)

      states = [new_h, new_c]

    if self.stateful:
      updates = [
          tf.compat.v1.assign(self_state, tf.cast(state, self_state.dtype))
          for self_state, state in zip(self.states, states)
      ]
      self.add_update(updates)

    if self.return_sequences:
      output = backend.maybe_convert_to_ragged(
          is_ragged_input, outputs, row_lengths, go_backwards=self.go_backwards)
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self.return_runtime:
      return output, runtime
    else:
      return output
コード例 #4
0
def gru_with_backend_selection(
    inputs,
    init_h,
    kernel,
    recurrent_kernel,
    bias,
    mask,
    time_major,
    go_backwards,
    sequence_lengths,
    zero_output_for_mask,
    return_sequences,
):
    """Call the GRU with optimized backend kernel selection.

    Under the hood, this function will create two TF function, one with the most
    generic kernel and can run on all device condition, and the second one with
    cuDNN specific kernel, which can only run on GPU.

    The first function will be called with normal_lstm_params, while the second
    function is not called, but only registered in the graph. The Grappler will
    do the proper graph rewrite and swap the optimized TF function based on the
    device placement.

    Args:
      inputs: Input tensor of GRU layer.
      init_h: Initial state tensor for the cell output.
      kernel: Weights for cell kernel.
      recurrent_kernel: Weights for cell recurrent kernel.
      bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
        is used in this case.
      mask: Boolean tensor for mask out the steps within sequence.
        An individual `True` entry indicates that the corresponding timestep
        should be utilized, while a `False` entry indicates that the corresponding
        timestep should be ignored.
      time_major: Boolean, whether the inputs are in the format of
        [time, batch, feature] or [batch, time, feature].
      go_backwards: Boolean (default False). If True, process the input sequence
        backwards and return the reversed sequence.
      sequence_lengths: The lengths of all sequences coming from a variable length
        input, such as ragged tensors. If the input has a fixed timestep size,
        this should be None.
      zero_output_for_mask: Boolean, whether to output zero for masked timestep.
      return_sequences: Boolean. If True, return the recurrent outputs for all
        timesteps in the sequence. If False, only return the output for the
        last timestep (which consumes less memory).

    Returns:
      List of output tensors, same as standard_gru.
    """
    params = {
        "inputs": inputs,
        "init_h": init_h,
        "kernel": kernel,
        "recurrent_kernel": recurrent_kernel,
        "bias": bias,
        "mask": mask,
        "time_major": time_major,
        "go_backwards": go_backwards,
        "sequence_lengths": sequence_lengths,
        "zero_output_for_mask": zero_output_for_mask,
        "return_sequences": return_sequences,
    }

    def gpu_gru_with_fallback(
        inputs,
        init_h,
        kernel,
        recurrent_kernel,
        bias,
        mask,
        time_major,
        go_backwards,
        sequence_lengths,
        zero_output_for_mask,
        return_sequences,
    ):
        """Use cuDNN kernel when mask is none or strictly right padded."""
        if mask is None:
            return gpu_gru(
                inputs=inputs,
                init_h=init_h,
                kernel=kernel,
                recurrent_kernel=recurrent_kernel,
                bias=bias,
                mask=mask,
                time_major=time_major,
                go_backwards=go_backwards,
                sequence_lengths=sequence_lengths,
                return_sequences=return_sequences,
            )

        def cudnn_gru_fn():
            return gpu_gru(
                inputs=inputs,
                init_h=init_h,
                kernel=kernel,
                recurrent_kernel=recurrent_kernel,
                bias=bias,
                mask=mask,
                time_major=time_major,
                go_backwards=go_backwards,
                sequence_lengths=sequence_lengths,
                return_sequences=return_sequences,
            )

        def standard_gru_fn():
            return standard_gru(
                inputs=inputs,
                init_h=init_h,
                kernel=kernel,
                recurrent_kernel=recurrent_kernel,
                bias=bias,
                mask=mask,
                time_major=time_major,
                go_backwards=go_backwards,
                sequence_lengths=sequence_lengths,
                zero_output_for_mask=zero_output_for_mask,
                return_sequences=return_sequences,
            )

        return tf.cond(
            gru_lstm_utils.is_cudnn_supported_inputs(mask, time_major),
            true_fn=cudnn_gru_fn,
            false_fn=standard_gru_fn,
        )

    if gru_lstm_utils.use_new_gru_lstm_impl():
        # Chooses the implementation dynamically based on the running device.
        (
            last_output,
            outputs,
            new_h,
            runtime,
        ) = tf.__internal__.execute_fn_for_device(
            {
                gru_lstm_utils.CPU_DEVICE_NAME:
                lambda: standard_gru(**params),
                gru_lstm_utils.GPU_DEVICE_NAME:
                lambda: gpu_gru_with_fallback(**params),
            },
            lambda: standard_gru(**params),
        )
    else:
        # Each time a `tf.function` is called, we will give it a unique
        # identifiable API name, so that Grappler won't get confused when it
        # sees multiple GRU layers added into same graph, and it will be able
        # to pair up the different implementations across them.
        api_name = "gru_" + str(uuid.uuid4())
        supportive_attribute = {
            "time_major": time_major,
            "go_backwards": go_backwards,
        }
        defun_standard_gru = gru_lstm_utils.generate_defun_backend(
            api_name,
            gru_lstm_utils.CPU_DEVICE_NAME,
            standard_gru,
            supportive_attribute,
        )
        defun_gpu_gru = gru_lstm_utils.generate_defun_backend(
            api_name,
            gru_lstm_utils.GPU_DEVICE_NAME,
            gpu_gru_with_fallback,
            supportive_attribute,
        )

        # Call the normal GRU impl and register the cuDNN impl function. The
        # grappler will kick in during session execution to optimize the graph.
        last_output, outputs, new_h, runtime = defun_standard_gru(**params)
        gru_lstm_utils.function_register(defun_gpu_gru, **params)

    return last_output, outputs, new_h, runtime
コード例 #5
0
ファイル: lstm.py プロジェクト: ttigong/keras
  def __init__(self,
               units,
               activation='tanh',
               recurrent_activation='sigmoid',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               unit_forget_bias=True,
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               dropout=0.,
               recurrent_dropout=0.,
               return_sequences=False,
               return_state=False,
               go_backwards=False,
               stateful=False,
               time_major=False,
               unroll=False,
               **kwargs):
    # return_runtime is a flag for testing, which shows the real backend
    # implementation chosen by grappler in graph mode.
    self.return_runtime = kwargs.pop('return_runtime', False)

    super(LSTM, self).__init__(
        units,
        activation=activation,
        recurrent_activation=recurrent_activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        unit_forget_bias=unit_forget_bias,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        implementation=kwargs.pop('implementation', 2),
        return_sequences=return_sequences,
        return_state=return_state,
        go_backwards=go_backwards,
        stateful=stateful,
        time_major=time_major,
        unroll=unroll,
        **kwargs)

    self.state_spec = [
        InputSpec(shape=(None, dim)) for dim in (self.units, self.units)
    ]
    self._could_use_gpu_kernel = (
        self.activation in (activations.tanh, tf.tanh) and
        self.recurrent_activation in (activations.sigmoid, tf.sigmoid) and
        recurrent_dropout == 0 and not unroll and use_bias and
        tf.compat.v1.executing_eagerly_outside_functions())
    if tf.config.list_logical_devices('GPU'):
      # Only show the message when there is GPU available, user will not care
      # about the cuDNN if there isn't any GPU.
      if self._could_use_gpu_kernel:
        logging.debug(gru_lstm_utils.CUDNN_AVAILABLE_MSG % self.name)
      else:
        logging.warning(gru_lstm_utils.CUDNN_NOT_AVAILABLE_MSG % self.name)

    if gru_lstm_utils.use_new_gru_lstm_impl():
      self._defun_wrapper = gru_lstm_utils.DefunWrapper(
          time_major, go_backwards, 'lstm')