コード例 #1
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 position_encoder_class=SinusoidalPositionEncoder,
                 maximum_relative_position=None,
                 pre_norm=True,
                 **kwargs):
        """Initializes the parameters of the encoder.

        Args:
          num_layers: The number of layers.
          num_units: The number of hidden units.
          num_heads: The number of heads in the multi-head attention.
          ffn_inner_dim: The number of units of the inner linear transformation
            in the feed forward layer.
          dropout: The probability to drop units from the outputs.
          attention_dropout: The probability to drop units from the attention.
          ffn_dropout: The probability to drop units from the activation output in
            the feed forward layer.
          ffn_activation: The activation function to apply between the two linear
            transformations of the feed forward layer.
          position_encoder_class: The :class:`opennmt.layers.PositionEncoder`
            class to use for position encoding (or a callable that returns an
            instance).
          maximum_relative_position: Maximum relative position representation
            (from https://arxiv.org/abs/1803.02155).
          pre_norm: If ``True``, layer normalization is applied before each
            sub-layer. Otherwise it is applied after.
          **kwargs: Additional layer arguments.
        """
        super().__init__(**kwargs)
        self.num_units = num_units
        self.dropout = dropout
        self.position_encoder = None
        if position_encoder_class is not None:
            self.position_encoder = position_encoder_class()
        self.layer_norm = common.LayerNorm() if pre_norm else None
        self.layers = [
            transformer.SelfAttentionEncoderLayer(
                num_units,
                num_heads,
                ffn_inner_dim,
                dropout=dropout,
                attention_dropout=attention_dropout,
                ffn_dropout=ffn_dropout,
                ffn_activation=ffn_activation,
                maximum_relative_position=maximum_relative_position,
                pre_norm=pre_norm,
            ) for i in range(num_layers)
        ]
コード例 #2
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 position_encoder_class=SinusoidalPositionEncoder,
                 **kwargs):
        """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder_class: The :class:`opennmt.layers.PositionEncoder`
        class to use for position encoding (or a callable that returns an
        instance).
    """
        super(SelfAttentionEncoder, self).__init__(**kwargs)
        self.num_units = num_units
        self.dropout = dropout
        self.position_encoder = None
        if position_encoder_class is not None:
            self.position_encoder = position_encoder_class()
        self.layer_norm = common.LayerNorm()
        self.layers = [
            transformer.SelfAttentionEncoderLayer(
                num_units,
                num_heads,
                ffn_inner_dim,
                dropout=dropout,
                attention_dropout=attention_dropout,
                ffn_dropout=ffn_dropout,
                ffn_activation=ffn_activation) for i in range(num_layers)
        ]
コード例 #3
0
  def __init__(self,
               num_layers,
               num_units=512,
               num_heads=8,
               ffn_inner_dim=2048,
               dropout=0.1,
               attention_dropout=0.1,
               ffn_dropout=0.1,
               ffn_activation=tf.nn.relu,
               position_encoder_class=SinusoidalPositionEncoder,
               maximum_relative_position=None,
               attention_span=None,
               num_attended_heads=1,
               **kwargs):
    """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder_class: The :class:`opennmt.layers.PositionEncoder`
        class to use for position encoding (or a callable that returns an
        instance).
      maximum_relative_position: Maximum relative position representation
        (from https://arxiv.org/abs/1803.02155).
      attention_span: Maximum relative position to attend to
        (from https://arxiv.org/abs/1904.03107).
      num_attended_heads: How many heads should be attended. Defaults to 1
        as each head only attends to itself in vanilla Transformer. Increase to
        an odd number < `num_heads` to also model head interaction.
        (from ttps://arxiv.org/abs/1904.03107).
      **kwargs: Additional layer arguments.
    """
    super(SelfAttentionEncoder, self).__init__(**kwargs)
    self.num_units = num_units
    self.dropout = dropout
    self.position_encoder = None
    if position_encoder_class is not None:
      self.position_encoder = position_encoder_class()
    self.layer_norm = common.LayerNorm()

    if attention_span is None:
      num_unconstrained_layers = num_layers
    else:
      num_unconstrained_layers = math.floor(num_layers / 2)
    num_constrained_layers = num_layers - num_unconstrained_layers
    self.layers = [
        transformer.SelfAttentionEncoderLayer(
            num_units,
            num_heads,
            ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            ffn_dropout=ffn_dropout,
            ffn_activation=ffn_activation,
            maximum_relative_position=maximum_relative_position,
            attention_span=attention_span,
            num_attended_heads=num_attended_heads)
        for _ in range(num_constrained_layers)]
    self.layers += [
        transformer.SelfAttentionEncoderLayer(
            num_units,
            num_heads,
            ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            ffn_dropout=ffn_dropout,
            ffn_activation=ffn_activation,
            maximum_relative_position=maximum_relative_position)
        for _ in range(num_unconstrained_layers)]