예제 #1
0
  def __init__(self,
               num_layers,
               num_units=512,
               num_heads=8,
               ffn_inner_dim=2048,
               dropout=0.1,
               attention_dropout=0.1,
               relu_dropout=0.1,
               position_encoder=SinusoidalPositionEncoder()):
    """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      relu_dropout: The probability to drop units from the ReLU activation in
        the feed forward layer.
      position_encoder: The :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs or ``None``.
    """
    self.num_layers = num_layers
    self.num_units = num_units
    self.num_heads = num_heads
    self.ffn_inner_dim = ffn_inner_dim
    self.dropout = dropout
    self.attention_dropout = attention_dropout
    self.relu_dropout = relu_dropout
    self.position_encoder = position_encoder
예제 #2
0
    def __init__(self,
                 source_inputter,
                 target_inputter,
                 num_layers,
                 num_units,
                 num_heads,
                 ffn_inner_dim,
                 dropout=0.1,
                 attention_dropout=0.1,
                 relu_dropout=0.1,
                 position_encoder=SinusoidalPositionEncoder(),
                 decoder_self_attention_type="scaled_dot",
                 name="transformer"):
        """Initializes a Transformer model.

    Args:
      source_inputter: A :class:`opennmt.inputters.inputter.Inputter` to process
        the source data.
      target_inputter: A :class:`opennmt.inputters.inputter.Inputter` to process
        the target data. Currently, only the
        :class:`opennmt.inputters.text_inputter.WordEmbedder` is supported.
      num_layers: The shared number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in each self-attention layers.
      ffn_inner_dim: The inner dimension of the feed forward layers.
      dropout: The probability to drop units in each layer output.
      attention_dropout: The probability to drop units from the attention.
      relu_dropout: The probability to drop units from the ReLU activation in
        the feed forward layer.
      position_encoder: A :class:`opennmt.layers.position.PositionEncoder` to
        apply on the inputs.
      decoder_self_attention_type: Type of self attention in the decoder,
        "scaled_dot" or "average" (case insensitive).
      name: The name of this model.
    """
        encoder = SelfAttentionEncoder(num_layers,
                                       num_units=num_units,
                                       num_heads=num_heads,
                                       ffn_inner_dim=ffn_inner_dim,
                                       dropout=dropout,
                                       attention_dropout=attention_dropout,
                                       relu_dropout=relu_dropout,
                                       position_encoder=position_encoder)
        decoder = SelfAttentionDecoder(
            num_layers,
            num_units=num_units,
            num_heads=num_heads,
            ffn_inner_dim=ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            relu_dropout=relu_dropout,
            position_encoder=position_encoder,
            self_attention_type=decoder_self_attention_type)

        super(Transformer, self).__init__(source_inputter,
                                          target_inputter,
                                          encoder,
                                          decoder,
                                          daisy_chain_variables=True,
                                          name=name)
예제 #3
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 position_encoder=SinusoidalPositionEncoder(),
                 num_sources=1,
                 **kwargs):
        """Initializes the parameters of the decoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder: The :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs.
      num_sources: The number of source contexts expected by this decoder.
      **kwargs: Additional layer arguments.
    """
        super(SelfAttentionDecoderV2, self).__init__(num_sources=num_sources,
                                                     **kwargs)
        self.num_units = num_units
        self.num_heads = num_heads
        self.dropout = dropout
        self.position_encoder = position_encoder
        self.layer_norm = common.LayerNorm(name="output_norm")
        self.layers = [
            _SelfAttentionDecoderLayer(self.num_units,
                                       self.num_heads,
                                       ffn_inner_dim,
                                       num_sources=num_sources,
                                       dropout=dropout,
                                       attention_dropout=attention_dropout,
                                       ffn_dropout=ffn_dropout,
                                       ffn_activation=ffn_activation,
                                       name="layer_%d" % i)
            for i in range(num_layers)
        ]
예제 #4
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 relu_dropout=0.1,
                 position_encoder=SinusoidalPositionEncoder(),
                 self_attention_type="scaled_dot"):
        """Initializes the parameters of the decoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      relu_dropout: The probability to drop units from the ReLU activation in
        the feed forward layer.
      position_encoder: A :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs or ``None``.
      self_attention_type: Type of self attention, "scaled_dot" or "average" (case
        insensitive).

    Raises:
      ValueError: if :obj:`self_attention_type` is invalid.
    """
        self.num_layers = num_layers
        self.num_units = num_units
        self.num_heads = num_heads
        self.ffn_inner_dim = ffn_inner_dim
        self.dropout = dropout
        self.attention_dropout = attention_dropout
        self.relu_dropout = relu_dropout
        self.position_encoder = position_encoder
        self.self_attention_type = self_attention_type.lower()
        if self.self_attention_type not in ("scaled_dot", "average"):
            raise ValueError("invalid attention type %s" %
                             self.self_attention_type)
        if self.self_attention_type == "average":
            tf.logging.warning(
                "Support for average attention network is experimental "
                "and may change in future versions.")
예제 #5
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 relu_dropout=0.1,
                 position_encoder=None,
                 **kwargs):
        """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      relu_dropout: The probability to drop units from the ReLU activation in
        the feed forward layer.
      position_encoder: The :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs. If ``None``, defaults to
        :class:`opennmt.layers.position.SinusoidalPositionEncoder`.
    """
        super(SelfAttentionEncoderV2, self).__init__(**kwargs)
        self.num_units = num_units
        self.dropout = dropout
        self.position_encoder = position_encoder
        if self.position_encoder is None:
            self.position_encoder = SinusoidalPositionEncoder()
        self.layer_norm = common.LayerNorm()
        self.layers = [
            _SelfAttentionEncoderLayer(num_units,
                                       num_heads,
                                       ffn_inner_dim,
                                       dropout=dropout,
                                       attention_dropout=attention_dropout,
                                       relu_dropout=relu_dropout,
                                       name="layer_%d" % i)
            for i in range(num_layers)
        ]
예제 #6
0
    def __init__(self,
                 source_inputter,
                 target_inputter,
                 num_layers,
                 num_units,
                 num_heads,
                 ffn_inner_dim,
                 dropout=0.1,
                 attention_dropout=0.1,
                 relu_dropout=0.1,
                 position_encoder=SinusoidalPositionEncoder(),
                 decoder_self_attention_type="scaled_dot",
                 share_embeddings=EmbeddingsSharingLevel.NONE,
                 share_encoders=False,
                 alignment_file_key="train_alignments",
                 name="transformer"):
        """Initializes a Transformer model.

    Args:
      source_inputter: A :class:`opennmt.inputters.inputter.Inputter` to process
        the source data. If this inputter returns parallel inputs, a multi
        source Transformer architecture will be constructed.
      target_inputter: A :class:`opennmt.inputters.inputter.Inputter` to process
        the target data. Currently, only the
        :class:`opennmt.inputters.text_inputter.WordEmbedder` is supported.
      num_layers: The shared number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in each self-attention layers.
      ffn_inner_dim: The inner dimension of the feed forward layers.
      dropout: The probability to drop units in each layer output.
      attention_dropout: The probability to drop units from the attention.
      relu_dropout: The probability to drop units from the ReLU activation in
        the feed forward layer.
      position_encoder: A :class:`opennmt.layers.position.PositionEncoder` to
        apply on the inputs.
      decoder_self_attention_type: Type of self attention in the decoder,
        "scaled_dot" or "average" (case insensitive).
      share_embeddings: Level of embeddings sharing, see
        :class:`opennmt.models.sequence_to_sequence.EmbeddingsSharingLevel`
        for possible values.
      share_encoders: In case of multi source architecture, whether to share the
        separate encoders parameters or not.
      alignment_file_key: The data configuration key of the training alignment
        file to support guided alignment.
      name: The name of this model.
    """
        encoders = [
            SelfAttentionEncoder(num_layers,
                                 num_units=num_units,
                                 num_heads=num_heads,
                                 ffn_inner_dim=ffn_inner_dim,
                                 dropout=dropout,
                                 attention_dropout=attention_dropout,
                                 relu_dropout=relu_dropout,
                                 position_encoder=position_encoder)
            for _ in range(source_inputter.num_outputs)
        ]
        if len(encoders) > 1:
            encoder = ParallelEncoder(encoders,
                                      outputs_reducer=None,
                                      states_reducer=None,
                                      share_parameters=share_encoders)
        else:
            encoder = encoders[0]
        decoder = SelfAttentionDecoder(
            num_layers,
            num_units=num_units,
            num_heads=num_heads,
            ffn_inner_dim=ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            relu_dropout=relu_dropout,
            position_encoder=position_encoder,
            self_attention_type=decoder_self_attention_type)

        self._num_units = num_units
        super(Transformer,
              self).__init__(source_inputter,
                             target_inputter,
                             encoder,
                             decoder,
                             share_embeddings=share_embeddings,
                             alignment_file_key=alignment_file_key,
                             daisy_chain_variables=True,
                             name=name)