예제 #1
0
 def build_embedding(dictionary, embed_dim, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
 def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
     embed_dict = utils.parse_embedding(embed_path)
     utils.print_embed_overlap(embed_dict, dictionary)
     return utils.load_embedding(embed_dict, dictionary, embed_tokens)
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        embed_dict=None,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        dropout=0.1,
    ):
        super().__init__(dictionary)
        self.dropout = dropout
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
        )

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for _, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(in_channels,
                        out_channels * 2,
                        kernel_size,
                        dropout=dropout,
                        padding=padding))
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        self.fc2 = Linear(in_channels, embed_dim)
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        embed_dict=None,
        out_embed_dim=256,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        attention=True,
        dropout=0.1,
        share_embed=False,
        positional_embeddings=True,
        adaptive_softmax_cutoff=None,
        adaptive_softmax_dropout=0,
    ):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([2]))
        self.dropout = dropout
        self.need_attn = True

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        if isinstance(attention, bool):
            # expand True into [True, True, ...] and do the same with False
            attention = [attention] * len(convolutions)
        if not isinstance(attention,
                          list) or len(attention) != len(convolutions):
            raise ValueError(
                'Attention is expected to be a list of booleans of '
                'length equal to the number of layers.')

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            padding_idx,
        ) if positional_embeddings else None

        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.attention = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for i, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            self.convolutions.append(
                LinearizedConv1d(in_channels,
                                 out_channels * 2,
                                 kernel_size,
                                 padding=(kernel_size - 1),
                                 dropout=dropout))
            self.attention.append(
                AttentionLayer(out_channels, embed_dim
                               ) if attention[i] else None)
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)

        self.adaptive_softmax = None
        self.fc2 = self.fc3 = None

        if adaptive_softmax_cutoff is not None:
            assert not share_embed
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                in_channels,
                adaptive_softmax_cutoff,
                dropout=adaptive_softmax_dropout)
        else:
            self.fc2 = Linear(in_channels, out_embed_dim)
            if share_embed:
                assert out_embed_dim == embed_dim, \
                    "Shared embed weights implies same dimensions " \
                    " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim)
                self.fc3 = nn.Linear(out_embed_dim, num_embeddings)
                self.fc3.weight = self.embed_tokens.weight
            else:
                self.fc3 = Linear(out_embed_dim,
                                  num_embeddings,
                                  dropout=dropout)