Exemple #1
0
    def __init__(
        self, conv_layers_before=None, input_size=83, hidden_size=512,
        num_layers=1, dropout_in=0.1, dropout_out=0.1, bidirectional=False,
        residual=False, left_pad=False, padding_value=0., src_bucketed=False,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(None)  # no src dictionary
        self.conv_layers_before = conv_layers_before
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(dropout_out, module_name=self.__class__.__name__)
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.residual = residual
        self.max_source_positions = max_source_positions

        self.lstm = nn.ModuleList([
            LSTM(
                input_size=input_size if layer == 0 else 2 * hidden_size if self.bidirectional else hidden_size,
                hidden_size=hidden_size,
                bidirectional=bidirectional,
            )
            for layer in range(num_layers)
        ])
        self.left_pad = left_pad
        self.padding_value = padding_value
        self.src_bucketed = src_bucketed

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemple #2
0
    def __init__(self, args, no_encoder_attn=False, kernel_size=0):
        super().__init__()
        self.embed_dim = args.decoder_embed_dim
        self.conv_dim = args.decoder_conv_dim
        if args.decoder_glu:
            self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim)
            self.act = nn.GLU()
        else:
            self.linear1 = Linear(self.embed_dim, self.conv_dim)
            self.act = None
        if args.decoder_conv_type == "lightweight":
            self.conv = LightweightConv(
                self.conv_dim,
                kernel_size,
                padding_l=kernel_size - 1,
                weight_softmax=args.weight_softmax,
                num_heads=args.decoder_attention_heads,
                weight_dropout=args.weight_dropout,
            )
        elif args.decoder_conv_type == "dynamic":
            self.conv = DynamicConv(
                self.conv_dim,
                kernel_size,
                padding_l=kernel_size - 1,
                weight_softmax=args.weight_softmax,
                num_heads=args.decoder_attention_heads,
                weight_dropout=args.weight_dropout,
            )
        else:
            raise NotImplementedError
        self.linear2 = Linear(self.conv_dim, self.embed_dim)

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)
        self.relu_dropout_module = FairseqDropout(
            args.relu_dropout, module_name=self.__class__.__name__)
        self.input_dropout_module = FairseqDropout(
            args.input_dropout, module_name=self.__class__.__name__)
        self.normalize_before = args.decoder_normalize_before

        self.conv_layer_norm = LayerNorm(self.embed_dim)

        if no_encoder_attn:
            self.encoder_attn = None
            self.encoder_attn_layer_norm = None
        else:
            self.encoder_attn = MultiheadAttention(
                self.embed_dim,
                args.decoder_attention_heads,
                dropout=args.attention_dropout,
                encoder_decoder_attention=True,
            )
            self.encoder_attn_layer_norm = LayerNorm(self.embed_dim)

        self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim)
        self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim)

        self.final_layer_norm = LayerNorm(self.embed_dim)
        self.need_attn = True
Exemple #3
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=False,
        left_pad_source=True,
        pretrained_embed=None,
        padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
        rnn_type="gru"
    ):
        super().__init__(dictionary)
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__
        )
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.max_source_positions = max_source_positions

        num_embeddings = len(dictionary)
        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = torch.nn.Embedding(num_embeddings, embed_dim, self.padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.rnn_type = rnn_type
        if rnn_type == "gru":
            self.hidden = GRU(
                input_size=embed_dim,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
                bidirectional=bidirectional,
            )
        elif rnn_type == "lstm":
            self.hidden = LSTM(
                input_size=embed_dim,
                hidden_size=hidden_size,
                num_layers=num_layers,
                dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
                bidirectional=bidirectional,
            )

        self.left_pad_source = left_pad_source

        self.output_units = hidden_size
        if bidirectional:
            self.bidir_dense = torch.nn.Linear(2, 1)
Exemple #4
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__
        )
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        self.embed_positions = (
            PositionalEmbedding(
                args.max_source_positions,
                embed_dim,
                self.padding_idx,
                learned=args.encoder_learned_pos,
            )
            if not args.no_token_positional_embeddings
            else None 
        )

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        if not args.adaptive_input and args.quant_noise_pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            self.quant_noise = None

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend(
            [self.build_encoder_layer(args) for i in range(args.encoder_layers)]
        )
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
Exemple #5
0
    def __init__(self, args, kernel_size=0):
        super().__init__()
        self.embed_dim = args.encoder_embed_dim
        self.conv_dim = args.encoder_conv_dim
        padding_l = (
            kernel_size // 2
            if kernel_size % 2 == 1
            else ((kernel_size - 1) // 2, kernel_size // 2)
        )

        if args.encoder_glu:
            self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim)
            self.act = nn.GLU()
        else:
            self.linear1 = Linear(self.embed_dim, self.conv_dim)
            self.act = None
        if args.encoder_conv_type == "lightweight":
            self.conv = LightweightConv(
                self.conv_dim,
                kernel_size,
                padding_l=padding_l,
                weight_softmax=args.weight_softmax,
                num_heads=args.encoder_attention_heads,
                weight_dropout=args.weight_dropout,
            )
        elif args.encoder_conv_type == "dynamic":
            self.conv = DynamicConv(
                self.conv_dim,
                kernel_size,
                padding_l=padding_l,
                weight_softmax=args.weight_softmax,
                num_heads=args.encoder_attention_heads,
                weight_dropout=args.weight_dropout,
            )
        else:
            raise NotImplementedError
        self.linear2 = Linear(self.conv_dim, self.embed_dim)

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__
        )
        self.relu_dropout_module = FairseqDropout(
            args.relu_dropout, module_name=self.__class__.__name__
        )
        self.input_dropout_module = FairseqDropout(
            args.input_dropout, module_name=self.__class__.__name__
        )
        self.normalize_before = args.encoder_normalize_before
        self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
        self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
        self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)])
Exemple #6
0
    def __init__(
        self,
        rnn_type: Union[str, Namespace],
        dictionary,
        embed_dim=512,
        hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=False,
        left_pad=True,
        pretrained_embed=None,
        padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(dictionary)
        rnn_type = rnn_type.rnn_type if isinstance(rnn_type,
                                                   Namespace) else rnn_type
        self.rnn_type = rnn_type.lower().strip()
        self.is_lstm = True if self.rnn_type == 'lstm' else False  # 方便后面做判断,以便处理lstm的 cell值
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__)
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.max_source_positions = max_source_positions
        num_embeddings = len(dictionary)
        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad(
        )
        if pretrained_embed is None:
            self.embed_tokens = JqEmbedding(num_embeddings, embed_dim,
                                            self.padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        JQRNN, _ = get_rnn_cell(self.rnn_type)  # 返回的是方法
        self.rnn = JQRNN(
            input_size=embed_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
            bidirectional=bidirectional,
        )
        self.left_pad = left_pad

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemple #7
0
    def __init__(
        self, input_size, output_size, hidden_sizes=256, kernel_sizes=3, strides=1,
        dilations=3, num_layers=1, dropout_in=0.0, dropout_out=0.0, residual=False,
        chunk_width=None, chunk_left_context=0, training_stage=True,
    ):
        super().__init__(None)  # no src dictionary
        self.num_layers = num_layers
        if isinstance(hidden_sizes, int):
            hidden_sizes = [hidden_sizes] * num_layers
        else:
            assert len(hidden_sizes) == num_layers
        if isinstance(kernel_sizes, int):
            kernel_sizes = [kernel_sizes] * num_layers
        else:
            assert len(kernel_sizes) == num_layers
        if isinstance(strides, int):
            strides = [strides] * num_layers
        else:
            assert len(strides) == num_layers
        if isinstance(dilations, int):
            dilations = [dilations] * num_layers
        else:
            assert len(dilations) == num_layers
        self.dropout_in_module = FairseqDropout(dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(dropout_out, module_name=self.__class__.__name__)
        self.residual = residual

        self.tdnn = nn.ModuleList([
            TdnnBNReLU(
                in_channels=input_size if layer == 0 else hidden_sizes[layer - 1],
                out_channels=hidden_sizes[layer], kernel_size=kernel_sizes[layer],
                stride=strides[layer], dilation=dilations[layer],
            )
            for layer in range(num_layers)
        ])

        receptive_field_radius = sum(layer.padding for layer in self.tdnn)
        assert chunk_width is None or (chunk_width > 0 and chunk_left_context >= receptive_field_radius)
        if (
            chunk_width is not None and chunk_width > 0
            and chunk_left_context > receptive_field_radius
        ):
            logger.warning("chunk_{{left,right}}_context can be reduced to {}".format(receptive_field_radius))
        self.out_chunk_begin = self.output_lengths(chunk_left_context + 1) - 1
        self.out_chunk_end = self.output_lengths(chunk_left_context + chunk_width) \
            if chunk_width is not None else None
        self.training_stage = training_stage

        self.fc_out = Linear(hidden_sizes[-1], output_size, dropout=self.dropout_out_module.p)
Exemple #8
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)

        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)
        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        self.upsample_scale = args.upsample_scale
        self.upsampler = nn.Linear(embed_dim, self.upsample_scale * embed_dim)
Exemple #9
0
    def __init__(self, args):
        super().__init__(None)

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__)
        self.embed_scale = math.sqrt(args.encoder_embed_dim)
        if args.no_scale_embedding:
            self.embed_scale = 1.0
        self.padding_idx = 1

        self.subsample = Conv1dSubsampler(
            args.input_feat_per_channel * args.input_channels,
            args.conv_channels,
            args.encoder_embed_dim,
            [int(k) for k in args.conv_kernel_sizes.split(",")],
        )

        self.embed_positions = PositionalEmbedding(args.max_source_positions,
                                                   args.encoder_embed_dim,
                                                   self.padding_idx)

        self.transformer_layers = nn.ModuleList([
            TransformerEncoderLayer(args) for _ in range(args.encoder_layers)
        ])
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None
Exemple #10
0
 def __init__(self, args):
     super().__init__()
     self.conv1 = nn.Sequential(
         nn.Conv1d(
             args.encoder_embed_dim,
             args.var_pred_hidden_dim,
             kernel_size=args.var_pred_kernel_size,
             padding=(args.var_pred_kernel_size - 1) // 2,
         ),
         nn.ReLU(),
     )
     self.ln1 = nn.LayerNorm(args.var_pred_hidden_dim)
     self.dropout_module = FairseqDropout(
         p=args.var_pred_dropout, module_name=self.__class__.__name__)
     self.conv2 = nn.Sequential(
         nn.Conv1d(
             args.var_pred_hidden_dim,
             args.var_pred_hidden_dim,
             kernel_size=args.var_pred_kernel_size,
             padding=1,
         ),
         nn.ReLU(),
     )
     self.ln2 = nn.LayerNorm(args.var_pred_hidden_dim)
     self.proj = nn.Linear(args.var_pred_hidden_dim, 1)
Exemple #11
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(args.dropout, module_name=self.__class__.__name__)
        self.dropword_module = FairseqFeatureDropout(args.word_dropout, module_name=self.__class__.__name__)
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim
        assert embed_dim == args.encoder_embed_dim, 'encoder embedding dim mismatch.'
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        self.embed_positions = (
            PositionalEmbedding(
                args.max_source_positions,
                embed_dim,
                self.padding_idx,
                learned=args.encoder_learned_pos,
            )
            if not args.no_token_positional_embeddings
            else None
        )

        assert not args.layernorm_embedding or not args.encoder_normalize_before

        if args.layernorm_embedding:
            self.layernorm_embedding = LayerNorm(embed_dim)
            self.layernorm_porjected_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
            self.layernorm_porjected_embedding = None

        self.proj_len = args.projection_length
        self.dynamic_projection = not args.fix_projection_length
        self.projected_embeddings = Parameter(torch.Tensor(self.proj_len, embed_dim))
        nn.init.normal_(self.projected_embeddings, mean=0., std=embed_dim ** -0.5)
        if not args.no_token_positional_embeddings and not args.encoder_learned_pos:
            projected_positions = get_sinusoidal_positional_embedding(self.proj_len, embed_dim)
        else:
            projected_positions = None
        self.register_buffer('projected_positions', projected_positions)

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend([self.build_encoder_layer(i, args) for i in range(args.encoder_layers)])
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
            self.proj_layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
            self.proj_layer_norm = None
Exemple #12
0
    def __init__(self, args, dictionary, embed_tokens):
        super().__init__(dictionary)
        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        self.layers = nn.ModuleList([])
        self.layers.extend([
            LightConvEncoderLayer(args,
                                  kernel_size=args.encoder_kernel_size_list[i])
            for i in range(args.encoder_layers)
        ])
        self.register_buffer("version", torch.Tensor([2]))
        self.normalize = args.encoder_normalize_before
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)
    def __init__(
            self,
            args,  # Chỉ cần truyền vào cái này là đủ các tham số dòng lệnh cần thiết rồi. Còn nó là gì thì chưa được liệt kê
            dictionary,
            embed_tokens):
        self.args = args
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_tokens.embedding_dim  # Số chiều sau khi đã mã hóa thành nhị phân
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)

        # self.src_word_emb = embed_tokens  # Sửa

        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            embed_dim,
            self.padding_idx,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        if not args.adaptive_input and args.quant_noise_pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            self.quant_noise = None

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend([
            self.build_encoder_layer(args, )
            for i in range(args.encoder_layers)
        ])
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
Exemple #14
0
 def __init__(self, embed_dim, proj_dim, dropout):
     super().__init__()
     self.layer_norm = LayerNorm(embed_dim)
     self.down_proj = nn.Linear(embed_dim, proj_dim)
     self.up_proj = nn.Linear(proj_dim, embed_dim)
     self.dropout_module = FairseqDropout(
         dropout, module_name=self.__class__.__name__
     )
Exemple #15
0
    def __init__(self, args, dictionary, embed_tokens, embed_bytes):
        self.args = args
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = embed_bytes.embedding_dim

        # assume the padding will be the same for a sequences,
        self.padding_idx = embed_bytes.padding_idx

        self.max_source_positions = args.max_source_positions

        self.embed_tokens = embed_tokens

        if not self.args.input_combine == 'drop_bytes':
            self.embed_bytes = embed_bytes

        if self.args.input_combine == 'cnn':
            self.byte_combine = ByteCombineCNN(embed_dim, embed_dim)
        elif self.args.input_combine == 'drop_bytes':
            self.byte_combine = None
        else:  # input_combine == 'sum'
            self.byte_combine = ByteCombineSUM()

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        if not args.adaptive_input and args.quant_noise_pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            self.quant_noise = None

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend([
            self.build_encoder_layer(args) for _ in range(args.encoder_layers)
        ])
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
Exemple #16
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        bidirectional=False,
        left_pad=True,
        pretrained_embed=None,
        padding_idx=None,
        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
    ):
        super().__init__(dictionary)
        self.num_layers = num_layers
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__)
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__)
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.max_source_positions = max_source_positions

        num_embeddings = len(dictionary)
        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad(
        )
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim // 2,
                                          self.padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.lstm = LSTM(
            input_size=embed_dim,  # TODO(hainan): adding the delta field
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
            bidirectional=bidirectional,
        )
        self.left_pad = left_pad

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2
Exemple #17
0
    def __init__(self, args, embed_dim):

        super(TransformerEncoder, self).__init__()

        self.dropout_module = FairseqDropout(args.dropout,
                                             module_name=self.__class__.__name__)
        self.encoder_layerdrop = args.encoder_layerdrop

        # embed_dim = embed_tokens.embedding_dim
        # self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = args.max_source_positions

        # self.embed_tokens = embed_tokens

        # self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        # self.embed_positions = (
        #     PositionalEmbedding(
        #         args.max_source_positions,
        #         embed_dim,
        #         self.padding_idx,
        #         learned=args.encoder_learned_pos,
        #     )
        #     if not args.no_token_positional_embeddings
        #     else None
        # )
        self.embed_positions = None

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        if not args.adaptive_input and args.quant_noise_pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            self.quant_noise = None

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])


        self.layers.extend(
            [self.build_encoder_layer(args) for i in range(args.encoder_layers)]
        )
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
Exemple #18
0
    def __init__(self,
                 args,
                 conv_layers_before=None,
                 input_size=83,
                 transformer_context=None):
        self.args = args
        super(TransformerEncoder, self).__init__(None)  # no src dictionary
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)
        self.encoder_layerdrop = args.encoder_layerdrop

        embed_dim = args.encoder_embed_dim
        self.max_source_positions = args.max_source_positions

        self.conv_layers_before = conv_layers_before
        self.fc0 = Linear(input_size,
                          embed_dim) if input_size != embed_dim else None

        self.embed_positions = (PositionalEmbedding(
            self.output_lengths(self.max_source_positions),
            embed_dim,
            0,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        if not args.adaptive_input and args.quant_noise_pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            self.quant_noise = None

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend([
            self.build_encoder_layer(args) for i in range(args.encoder_layers)
        ])
        self.num_layers = len(self.layers)

        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        self.transformer_context = transformer_context
Exemple #19
0
    def __init__(self, cfg, dictionary, embed_tokens, return_fc=False):
        self.cfg = cfg
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.dropout_module = FairseqDropout(
            cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__)
        )
        self.encoder_layerdrop = cfg.encoder.layerdrop
        self.return_fc = return_fc

        embed_dim = embed_tokens.embedding_dim
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = cfg.max_source_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim)

        self.embed_positions = (
            PositionalEmbedding(
                cfg.max_source_positions,
                embed_dim,
                self.padding_idx,
                learned=cfg.encoder.learned_pos,
            )
            if not cfg.no_token_positional_embeddings
            else None
        )
        if cfg.layernorm_embedding:
            self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export)
        else:
            self.layernorm_embedding = None

        if not cfg.adaptive_input and cfg.quant_noise.pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                cfg.quant_noise.pq,
                cfg.quant_noise.pq_block_size,
            )
        else:
            self.quant_noise = None

        if self.encoder_layerdrop > 0.0:
            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend(
            [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)]
        )
        self.num_layers = len(self.layers)

        if cfg.encoder.normalize_before:
            self.layer_norm = LayerNorm(embed_dim, export=cfg.export)
        else:
            self.layer_norm = None
Exemple #20
0
    def __init__(self, args, src_dict, embed_speaker):
        super().__init__(src_dict)
        self.args = args
        self.padding_idx = src_dict.pad()
        self.n_frames_per_step = args.n_frames_per_step
        self.out_dim = args.output_frame_dim * args.n_frames_per_step

        self.embed_speaker = embed_speaker
        self.spk_emb_proj = None
        if embed_speaker is not None:
            self.spk_emb_proj = nn.Linear(
                args.encoder_embed_dim + args.speaker_embed_dim,
                args.encoder_embed_dim)

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__)
        self.embed_tokens = Embedding(len(src_dict),
                                      args.encoder_embed_dim,
                                      padding_idx=self.padding_idx)

        self.embed_positions = PositionalEmbedding(args.max_source_positions,
                                                   args.encoder_embed_dim,
                                                   self.padding_idx)
        self.pos_emb_alpha = nn.Parameter(torch.ones(1))
        self.dec_pos_emb_alpha = nn.Parameter(torch.ones(1))

        self.encoder_fft_layers = nn.ModuleList(
            FFTLayer(args.encoder_embed_dim,
                     args.encoder_attention_heads,
                     args.fft_hidden_dim,
                     args.fft_kernel_size,
                     dropout=args.dropout,
                     attention_dropout=args.attention_dropout)
            for _ in range(args.encoder_layers))

        self.var_adaptor = VarianceAdaptor(args)

        self.decoder_fft_layers = nn.ModuleList(
            FFTLayer(args.decoder_embed_dim,
                     args.decoder_attention_heads,
                     args.fft_hidden_dim,
                     args.fft_kernel_size,
                     dropout=args.dropout,
                     attention_dropout=args.attention_dropout)
            for _ in range(args.decoder_layers))

        self.out_proj = nn.Linear(args.decoder_embed_dim, self.out_dim)

        self.postnet = None
        if args.add_postnet:
            self.postnet = Postnet(self.out_dim, args.postnet_conv_dim,
                                   args.postnet_conv_kernel_size,
                                   args.postnet_layers, args.postnet_dropout)

        self.apply(model_init)
Exemple #21
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        dropout=0.1,
        attention=False,
        attention_nheads=1,
    ):
        super().__init__(dictionary)
        self.dropout_module = FairseqDropout(
            dropout, module_name=self.__class__.__name__)
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
        )

        def expand_bool_array(val):
            if isinstance(val, bool):
                # expand True into [True, True, ...] and do the same with False
                return [val] * len(convolutions)
            return val

        attention = expand_bool_array(attention)

        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.attention = nn.ModuleList()
        self.attproj = nn.ModuleList()
        for i, (out_channels, kernel_size) in enumerate(convolutions):
            self.projections.append(
                Linear(in_channels, out_channels
                       ) if in_channels != out_channels else None)
            self.convolutions.append(
                ConvTBC(in_channels,
                        out_channels * 2,
                        kernel_size,
                        dropout=dropout))

            self.attention.append(
                SelfAttention(out_channels, embed_dim, attention_nheads
                              ) if attention[i] else None)
            in_channels = out_channels

        self.fc2 = Linear(in_channels, embed_dim)
Exemple #22
0
 def __init__(self, in_dim, hidden_dim, kernel_size, dropout):
     super().__init__()
     self.ffn = nn.Sequential(
         nn.Conv1d(in_dim,
                   hidden_dim,
                   kernel_size=kernel_size,
                   padding=(kernel_size - 1) // 2), nn.ReLU(),
         nn.Conv1d(hidden_dim,
                   in_dim,
                   kernel_size=kernel_size,
                   padding=(kernel_size - 1) // 2))
     self.layer_norm = LayerNorm(in_dim)
     self.dropout = self.dropout_module = FairseqDropout(
         p=dropout, module_name=self.__class__.__name__)
Exemple #23
0
    def __init__(
        self, dictionary, embed_dim=512, embed_dict=None, max_positions=1024,
        convolutions=((512, 3),) * 20, dropout=0.1,
    ):
        super().__init__(dictionary)
        self.dropout_module = FairseqDropout(
            dropout, module_name=self.__class__.__name__
        )
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
        )

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for _, (out_channels, kernel_size, residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(Linear(residual_dim, out_channels)
                                    if residual_dim != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(in_channels, out_channels * 2, kernel_size,
                        dropout=dropout, padding=padding)
            )
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        self.fc2 = Linear(in_channels, embed_dim)
    def __init__(self, args, src_dict, padding_idx=1):
        super().__init__(None)
        self._future_mask = torch.empty(0)

        self.args = args
        self.padding_idx = src_dict.pad() if src_dict else padding_idx
        self.n_frames_per_step = args.n_frames_per_step
        self.out_dim = args.output_frame_dim * args.n_frames_per_step

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__
        )
        self.embed_positions = PositionalEmbedding(
            args.max_target_positions, args.decoder_embed_dim, self.padding_idx
        )
        self.pos_emb_alpha = nn.Parameter(torch.ones(1))
        self.prenet = nn.Sequential(
            Prenet(
                self.out_dim, args.prenet_layers, args.prenet_dim, args.prenet_dropout
            ),
            nn.Linear(args.prenet_dim, args.decoder_embed_dim),
        )

        self.n_transformer_layers = args.decoder_transformer_layers
        self.transformer_layers = nn.ModuleList(
            TransformerDecoderLayer(args) for _ in range(self.n_transformer_layers)
        )
        if args.decoder_normalize_before:
            self.layer_norm = LayerNorm(args.decoder_embed_dim)
        else:
            self.layer_norm = None

        self.feat_proj = nn.Linear(args.decoder_embed_dim, self.out_dim)
        self.eos_proj = nn.Linear(args.decoder_embed_dim, 1)

        self.postnet = Postnet(
            self.out_dim,
            args.postnet_conv_dim,
            args.postnet_conv_kernel_size,
            args.postnet_layers,
            args.postnet_dropout,
        )

        self.ctc_proj = None
        if getattr(args, "ctc_weight", 0.0) > 0.0:
            self.ctc_proj = nn.Linear(self.out_dim, len(src_dict))

        self.apply(decoder_init)
    def __init__(self, args, src_dict, embed_speaker):
        super().__init__(src_dict)
        self.padding_idx = src_dict.pad()
        self.embed_speaker = embed_speaker
        self.spk_emb_proj = None
        if embed_speaker is not None:
            self.spk_emb_proj = nn.Linear(
                args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim
            )

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__
        )
        self.embed_tokens = nn.Embedding(
            len(src_dict), args.encoder_embed_dim, padding_idx=self.padding_idx
        )
        assert args.encoder_conv_kernel_size % 2 == 1
        self.prenet = nn.ModuleList(
            nn.Sequential(
                nn.Conv1d(
                    args.encoder_embed_dim,
                    args.encoder_embed_dim,
                    kernel_size=args.encoder_conv_kernel_size,
                    padding=((args.encoder_conv_kernel_size - 1) // 2),
                ),
                nn.BatchNorm1d(args.encoder_embed_dim),
                nn.ReLU(),
                nn.Dropout(args.encoder_dropout),
            )
            for _ in range(args.encoder_conv_layers)
        )
        self.prenet_proj = nn.Linear(args.encoder_embed_dim, args.encoder_embed_dim)
        self.embed_positions = PositionalEmbedding(
            args.max_source_positions, args.encoder_embed_dim, self.padding_idx
        )
        self.pos_emb_alpha = nn.Parameter(torch.ones(1))

        self.transformer_layers = nn.ModuleList(
            TransformerEncoderLayer(args)
            for _ in range(args.encoder_transformer_layers)
        )
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None

        self.apply(encoder_init)
    def __init__(self, args, dictionary):
        super().__init__(dictionary)

        self.dropout_module = FairseqDropout(
            p=args.dropout, module_name=self.__class__.__name__)
        self.embed_scale = math.sqrt(args.encoder_embed_dim)
        if args.no_scale_embedding:
            self.embed_scale = 1.0
        self.padding_idx = 1

        self.subsample = Conv1dSubsampler(
            args.input_feat_per_channel * args.input_channels,
            args.conv_channels,
            args.encoder_embed_dim,
            [int(k) for k in args.conv_kernel_sizes.split(",")],
        )

        self.embed_positions = PositionalEmbedding(args.max_source_positions,
                                                   args.encoder_embed_dim,
                                                   self.padding_idx)

        self.transformer_layers = nn.ModuleList([
            TransformerEncoderLayer(args) for _ in range(args.encoder_layers)
        ])
        if args.encoder_normalize_before:
            self.layer_norm = LayerNorm(args.encoder_embed_dim)
        else:
            self.layer_norm = None
        # ctc
        self.ctc_compress_out = args.ctc_compress_out
        if self.ctc_compress_out:
            self.ctc_fc = nn.Linear(args.encoder_embed_dim, len(dictionary))
            assert args.criterion == "ctc_multi_loss"
            self.ctc_layer = args.ctc_encoder_layer
            self.ctc_compress_method = getattr(CTCCompressStrategy,
                                               args.ctc_compress_strategy)
    def __init__(self, args):
        super().__init__(None)  # no dictionary
        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)
        self.max_source_positions = args.max_source_positions
        self.embed_positions = (PositionalEmbedding(
            args.max_source_positions,
            args.encoder_embed_dim,
            padding_idx=0,
            learned=args.encoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)
        if args.encoder_layers < 2:
            raise ValueError(
                "encoder_layers should be larger than 2, got: {}".format(
                    args.encoder_layers))
        vgg_config = json.loads(getattr(args, "vgg_config", "{}") or "{}")
        logger.info(vgg_config)
        self.vggblocks = nn.ModuleList([
            VGGNet(
                **vgg_config[0]
                # in_channels=3,
                # subsample=3,
                # conv_kernel_size=5,
                # activation_fn="gelu"
            )
        ])
        for i in range(1, args.encoder_layers):

            self.vggblocks.append(
                VGGNet(
                    **vgg_config[i]
                    # in_channels=64,
                    # subsample=2,
                    # out_channels=args.encoder_embed_dim if i==args.encoder_layers-1 else 64,
                    # activation_fn="gelu"
                ))
Exemple #28
0
    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
        self.args = args
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))
        self._future_mask = torch.empty(0)

        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__)
        self.decoder_layerdrop = args.decoder_layerdrop
        self.only_drop_topk = args.only_drop_topk
        self.share_input_output_embed = args.share_decoder_input_output_embed

        input_embed_dim = embed_tokens.embedding_dim
        embed_dim = args.decoder_embed_dim
        self.embed_dim = embed_dim
        self.output_embed_dim = args.decoder_output_dim

        self.padding_idx = embed_tokens.padding_idx
        self.max_target_positions = args.max_target_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            embed_dim)

        if not args.adaptive_input and args.quant_noise_pq > 0:
            self.quant_noise = apply_quant_noise_(
                nn.Linear(embed_dim, embed_dim, bias=False),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            self.quant_noise = None

        self.project_in_dim = (Linear(input_embed_dim, embed_dim, bias=False)
                               if embed_dim != input_embed_dim else None)

        self.embed_positions = (PositionalEmbedding(
            args.max_target_positions,
            embed_dim,
            self.padding_idx,
            learned=args.decoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None)

        if getattr(args, "layernorm_embedding", False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None

        self.cross_self_attention = getattr(args, "cross_self_attention",
                                            False)

        if self.decoder_layerdrop > 0.0:
            if self.only_drop_topk > 0:
                self.layers = PartLayerDropModuleList(
                    p=self.decoder_layerdrop,
                    top_k=self.only_drop_topk,
                    layer_num=args.decoder_layers)
            else:
                self.layers = LayerDropModuleList(p=self.decoder_layerdrop)
        else:
            self.layers = nn.ModuleList([])
        self.layers.extend([
            self.build_decoder_layer(args, no_encoder_attn)
            for _ in range(args.decoder_layers)
        ])
        self.num_layers = len(self.layers)

        if args.decoder_normalize_before and not getattr(
                args, "no_decoder_final_norm", False):
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None

        self.project_out_dim = (Linear(
            embed_dim, self.output_embed_dim, bias=False)
                                if embed_dim != self.output_embed_dim
                                and not args.tie_adaptive_weights else None)

        self.adaptive_softmax = None
        self.output_projection = None
        if args.adaptive_softmax_cutoff is not None:
            self.adaptive_softmax = AdaptiveSoftmax(
                len(dictionary),
                self.output_embed_dim,
                utils.eval_str_list(args.adaptive_softmax_cutoff, type=int),
                dropout=args.adaptive_softmax_dropout,
                adaptive_inputs=embed_tokens
                if args.tie_adaptive_weights else None,
                factor=args.adaptive_softmax_factor,
                tie_proj=args.tie_adaptive_proj,
            )
        elif self.share_input_output_embed:
            self.output_projection = nn.Linear(
                self.embed_tokens.weight.shape[1],
                self.embed_tokens.weight.shape[0],
                bias=False,
            )
            self.output_projection.weight = self.embed_tokens.weight
        else:
            self.output_projection = nn.Linear(self.output_embed_dim,
                                               len(dictionary),
                                               bias=False)
            nn.init.normal_(self.output_projection.weight,
                            mean=0,
                            std=self.output_embed_dim**-0.5)
Exemple #29
0
    def __init__(
        self, args, dictionary, embed_tokens, no_encoder_attn=False, final_norm=True
    ):
        super().__init__(dictionary)
        self.dropout_module = FairseqDropout(
            args.dropout, module_name=self.__class__.__name__
        )
        self.share_input_output_embed = args.share_decoder_input_output_embed

        input_embed_dim = embed_tokens.embedding_dim
        embed_dim = args.decoder_embed_dim
        output_embed_dim = args.decoder_output_dim

        padding_idx = embed_tokens.padding_idx
        self.max_target_positions = args.max_target_positions

        self.embed_tokens = embed_tokens
        self.embed_scale = math.sqrt(embed_dim)  # todo: try with input_embed_dim

        self.project_in_dim = (
            Linear(input_embed_dim, embed_dim, bias=False)
            if embed_dim != input_embed_dim
            else None
        )

        self.embed_positions = (
            PositionalEmbedding(
                args.max_target_positions,
                embed_dim,
                padding_idx,
                learned=args.decoder_learned_pos,
            )
            if not args.no_token_positional_embeddings
            else None
        )

        self.layers = nn.ModuleList([])
        self.layers.extend(
            [
                LightConvDecoderLayer(
                    args, no_encoder_attn, kernel_size=args.decoder_kernel_size_list[i]
                )
                for i in range(args.decoder_layers)
            ]
        )

        self.adaptive_softmax = None

        self.project_out_dim = (
            Linear(embed_dim, output_embed_dim, bias=False)
            if embed_dim != output_embed_dim and not args.tie_adaptive_weights
            else None
        )

        if args.adaptive_softmax_cutoff is not None:
            self.adaptive_softmax = AdaptiveSoftmax(
                len(dictionary),
                output_embed_dim,
                utils.eval_str_list(args.adaptive_softmax_cutoff, type=int),
                dropout=args.adaptive_softmax_dropout,
                adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None,
                factor=args.adaptive_softmax_factor,
                tie_proj=args.tie_adaptive_proj,
            )
        elif not self.share_input_output_embed:
            self.embed_out = nn.Parameter(
                torch.Tensor(len(dictionary), output_embed_dim)
            )
            nn.init.normal_(self.embed_out, mean=0, std=output_embed_dim ** -0.5)
        self.register_buffer("version", torch.Tensor([2]))
        self.normalize = args.decoder_normalize_before and final_norm
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)
Exemple #30
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention=True,
        encoder_output_units=512,
        pretrained_embed=None,
        share_input_output_embed=False,
        adaptive_softmax_cutoff=None,
        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
        residuals=False,
    ):
        super().__init__(dictionary)
        self.dropout_in_module = FairseqDropout(
            dropout_in, module_name=self.__class__.__name__
        )
        self.dropout_out_module = FairseqDropout(
            dropout_out, module_name=self.__class__.__name__
        )
        self.hidden_size = hidden_size
        self.share_input_output_embed = share_input_output_embed
        self.need_attn = True
        self.max_target_positions = max_target_positions
        self.residuals = residuals
        self.num_layers = num_layers

        self.adaptive_softmax = None
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        if pretrained_embed is None:
            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        else:
            self.embed_tokens = pretrained_embed

        self.encoder_output_units = encoder_output_units
        if encoder_output_units != hidden_size and encoder_output_units != 0:
            self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
            self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)
        else:
            self.encoder_hidden_proj = self.encoder_cell_proj = None

        # disable input feeding if there is no encoder
        # input feeding is described in arxiv.org/abs/1508.04025
        input_feed_size = 0 if encoder_output_units == 0 else hidden_size
        self.layers = nn.ModuleList(
            [
                LSTMCell(
                    input_size=input_feed_size + embed_dim
                    if layer == 0
                    else hidden_size,
                    hidden_size=hidden_size,
                )
                for layer in range(num_layers)
            ]
        )

        if attention:
            # TODO make bias configurable
            self.attention = AttentionLayer(
                hidden_size, encoder_output_units, hidden_size, bias=False
            )
        else:
            self.attention = None

        if hidden_size != out_embed_dim:
            self.additional_fc = Linear(hidden_size, out_embed_dim)

        if adaptive_softmax_cutoff is not None:
            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                hidden_size,
                adaptive_softmax_cutoff,
                dropout=dropout_out,
            )
        elif not self.share_input_output_embed:
            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)