Example #1
0
 def __init__(self, size, self_attn, feed_forward, dropout_rate,
              normalize_before=True, concat_after=False):
     """Construct an EncoderLayer object."""
     super(EncoderLayer, self).__init__()
     self.self_attn = self_attn
     self.feed_forward = feed_forward
     self.norm1 = LayerNorm(size)
     self.norm2 = LayerNorm(size)
     self.dropout = nn.Dropout(dropout_rate)
     self.size = size
     self.normalize_before = normalize_before
     self.concat_after = concat_after
     if self.concat_after:
         self.concat_linear = nn.Linear(size + size, size)
Example #2
0
 def __init__(self,
              odim,
              attention_dim=256,
              attention_heads=4,
              linear_units=2048,
              num_blocks=6,
              dropout_rate=0.1,
              death_rate=0.0,
              positional_dropout_rate=0.1,
              self_attention_dropout_rate=0.0,
              src_attention_dropout_rate=0.0,
              input_layer="embed",
              use_output_layer=True,
              pos_enc_class=PositionalEncoding,
              normalize_before=True,
              concat_after=False):
     """Construct an Decoder object."""
     torch.nn.Module.__init__(self)
     if input_layer == "embed":
         self.embed = torch.nn.Sequential(
             torch.nn.Embedding(odim, attention_dim),
             pos_enc_class(attention_dim, positional_dropout_rate))
     elif input_layer == "linear":
         self.embed = torch.nn.Sequential(
             torch.nn.Linear(odim, attention_dim),
             torch.nn.LayerNorm(attention_dim),
             torch.nn.Dropout(dropout_rate), torch.nn.ReLU(),
             pos_enc_class(attention_dim, positional_dropout_rate))
     elif isinstance(input_layer, torch.nn.Module):
         self.embed = torch.nn.Sequential(
             input_layer,
             pos_enc_class(attention_dim, positional_dropout_rate))
     else:
         raise NotImplementedError(
             "only `embed` or torch.nn.Module is supported.")
     self.normalize_before = normalize_before
     self.decoders = repeat_i(
         num_blocks, lambda i: StochasticDecoderLayer(
             attention_dim,
             MultiHeadedAttention(attention_heads, attention_dim,
                                  self_attention_dropout_rate),
             MultiHeadedAttention(attention_heads, attention_dim,
                                  src_attention_dropout_rate),
             PositionwiseFeedForward(attention_dim, linear_units,
                                     dropout_rate), dropout_rate, death_rate
             * i / num_blocks, normalize_before, concat_after))
     if self.normalize_before:
         self.after_norm = LayerNorm(attention_dim)
     if use_output_layer:
         self.output_layer = torch.nn.Linear(attention_dim, odim)
     else:
         self.output_layer = None
Example #3
0
    def __init__(self,
                 idim,
                 attention_dim=256,
                 attention_heads=4,
                 linear_units=2048,
                 num_blocks=6,
                 dropout_rate=0.1,
                 death_rate=0.0,
                 positional_dropout_rate=0.1,
                 attention_dropout_rate=0.0,
                 input_layer="conv2d",
                 pos_enc_class=PositionalEncoding,
                 normalize_before=True,
                 concat_after=False,
                 positionwise_layer_type="linear",
                 positionwise_conv_kernel_size=1,
                 padding_idx=-1):
        """Construct an Encoder object."""
        super(Encoder, self).__init__()

        if input_layer == "linear":
            self.embed = torch.nn.Sequential(
                torch.nn.Linear(idim, attention_dim),
                torch.nn.LayerNorm(attention_dim),
                torch.nn.Dropout(dropout_rate), torch.nn.ReLU(),
                pos_enc_class(attention_dim, positional_dropout_rate))
        elif input_layer == "conv2d":
            self.embed = Conv2dSubsampling(idim, attention_dim, dropout_rate)
        elif input_layer == "conv2d_1layer_with_deltas":
            self.embed = Conv2dSubsampling_1layer_with_deltas(idim,
                                                              attention_dim,
                                                              dropout_rate,
                                                              delta=True)
        elif input_layer == "conv2d_with_deltas":
            self.embed = Conv2dSubsampling_with_deltas(idim, attention_dim,
                                                       dropout_rate)
        elif input_layer == "conv2d_yingbo":
            self.embed = Conv2dSubsampling_yingbo(idim, attention_dim,
                                                  dropout_rate)
        elif input_layer == "embed":
            self.embed = torch.nn.Sequential(
                torch.nn.Embedding(idim,
                                   attention_dim,
                                   padding_idx=padding_idx),
                pos_enc_class(attention_dim, positional_dropout_rate))
        elif isinstance(input_layer, torch.nn.Module):
            self.embed = torch.nn.Sequential(
                input_layer,
                pos_enc_class(attention_dim, positional_dropout_rate),
            )
        elif input_layer is None:
            self.embed = torch.nn.Sequential(
                pos_enc_class(attention_dim, positional_dropout_rate))
        else:
            raise ValueError("unknown input_layer: " + input_layer)
        self.normalize_before = normalize_before
        if positionwise_layer_type == "linear":
            positionwise_layer = PositionwiseFeedForward
            positionwise_layer_args = (attention_dim, linear_units,
                                       dropout_rate)
        elif positionwise_layer_type == "conv1d":
            positionwise_layer = MultiLayeredConv1d
            positionwise_layer_args = (attention_dim, linear_units,
                                       positionwise_conv_kernel_size,
                                       dropout_rate)
        elif positionwise_layer_type == "conv1d-linear":
            positionwise_layer = Conv1dLinear
            positionwise_layer_args = (attention_dim, linear_units,
                                       positionwise_conv_kernel_size,
                                       dropout_rate)
        else:
            raise NotImplementedError("Support only linear or conv1d.")
        self.encoders = repeat_i(
            num_blocks, lambda i: StochasticEncoderLayer(
                attention_dim,
                MultiHeadedAttention(attention_heads, attention_dim,
                                     attention_dropout_rate),
                positionwise_layer(*positionwise_layer_args), dropout_rate,
                death_rate * i / num_blocks, normalize_before, concat_after))
        if self.normalize_before:
            self.after_norm = LayerNorm(attention_dim)