Example #1
0
 def build_embedding(dictionary,
                     embed_dim,
                     path=None,
                     num_embed_chunks=1):
     assert embed_dim % num_embed_chunks == 0, (
         f"Number of embedding chunks = {num_embed_chunks} should be " +
         f"divisible by the embedding dimension = {embed_dim}")
     assert path is None or num_embed_chunks == 1, (
         "Loading embedding from a path with number of embedding chunks > 1"
         + " is not yet supported")
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     # if provided, load from preloaded dictionaries
     if path:
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     else:
         embed_chunk_dim = embed_dim // num_embed_chunks
         emb = nn.ModuleList()
         for i in range(num_embed_chunks):
             emb.append(
                 Embedding(num_embeddings, embed_chunk_dim,
                           padding_idx))
     return emb
 def build_embedding(dictionary, embed_dim, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Example #3
0
 def load_pretrained_embedding_from_file(embed_path, dictionary,
                                         embed_dim):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
     embed_dict = utils.parse_embedding(embed_path)
     utils.print_embed_overlap(embed_dict, dictionary)
     return utils.load_embedding(embed_dict, dictionary, embed_tokens)
Example #4
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        embed_dict=None,
        out_embed_dim=256,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        attention=True,
        dropout=0.1,
        share_embed=False,
        positional_embeddings=True,
        adaptive_softmax_cutoff=None,
        adaptive_softmax_dropout=0.0,
    ):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([2]))
        self.dropout_module = FairseqDropout(
            dropout, module_name=self.__class__.__name__)
        self.need_attn = True

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        if isinstance(attention, bool):
            # expand True into [True, True, ...] and do the same with False
            attention = [attention] * len(convolutions)
        if not isinstance(attention,
                          list) or len(attention) != len(convolutions):
            raise ValueError(
                "Attention is expected to be a list of booleans of "
                "length equal to the number of layers.")

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = (PositionalEmbedding(
            max_positions,
            embed_dim,
            padding_idx,
        ) if positional_embeddings else None)

        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.attention = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for i, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            self.convolutions.append(
                LinearizedConv1d(
                    in_channels,
                    out_channels * 2,
                    kernel_size,
                    padding=(kernel_size - 1),
                    dropout=dropout,
                ))
            self.attention.append(
                AttentionLayer(out_channels, embed_dim
                               ) if attention[i] else None)
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)

        self.adaptive_softmax = None
        self.fc2 = self.fc3 = None

        if adaptive_softmax_cutoff is not None:
            assert not share_embed
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                in_channels,
                adaptive_softmax_cutoff,
                dropout=adaptive_softmax_dropout,
            )
        else:
            self.fc2 = Linear(in_channels, out_embed_dim)
            if share_embed:
                assert out_embed_dim == embed_dim, (
                    "Shared embed weights implies same dimensions "
                    " out_embed_dim={} vs embed_dim={}".format(
                        out_embed_dim, embed_dim))
                self.fc3 = nn.Linear(out_embed_dim, num_embeddings)
                self.fc3.weight = self.embed_tokens.weight
            else:
                self.fc3 = Linear(out_embed_dim,
                                  num_embeddings,
                                  dropout=dropout)
Example #5
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        embed_dict=None,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        dropout=0.1,
    ):
        super().__init__(dictionary)
        self.dropout_module = FairseqDropout(
            dropout, module_name=self.__class__.__name__)
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
        )

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for _, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(
                    in_channels,
                    out_channels * 2,
                    kernel_size,
                    dropout=dropout,
                    padding=padding,
                ))
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        self.fc2 = Linear(in_channels, embed_dim)