Ejemplo n.º 1
0
 def build_embedding(dictionary, embed_dim, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Ejemplo n.º 2
0
def build_embedding(dictionary, embed_dim, path=None):
    padding_idx = dictionary.pad()
    eos_index = dictionary.eos()
    emb = Embedding(len(dictionary), embed_dim, padding_idx, eos_index)
    # if provided, load from preloaded dictionaries
    if path:
        embed_dict = parse_embedding(path)
        utils.load_embedding(embed_dict, dictionary, emb)
    return emb
Ejemplo n.º 3
0
 def build_embedding(dictionary, embed_dim, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Ejemplo n.º 4
0
def build_embedding(dictionary, embed_dim, path=None, freeze=False):
    num_embeddings = len(dictionary)
    padding_idx = dictionary.pad()
    emb = TransformerTokenEmbedding(num_embeddings, embed_dim, padding_idx,
                                    freeze)
    # if provided, load from preloaded dictionaries
    if path:
        embed_dict = utils.parse_embedding(path)
        utils.load_embedding(embed_dict, dictionary, emb)
    return emb
Ejemplo n.º 5
0
 def build_embedding(dictionary, embed_dim, path=None, sde=False):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     if sde:
         emb = SDEembedding(char_vsize=num_embeddings, d_vec=embed_dim, padding_idx=padding_idx)
     else:
         emb = Embedding(num_embeddings, embed_dim, padding_idx, fix_norm=args.fix_norm)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Ejemplo n.º 6
0
 def build_embedding(dictionary, embed_dim, args, mask_file=None, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     if args.one_emb:
         emb = OneEmbedding(num_embeddings, embed_dim, padding_idx, args.one_emb, args.one_emb_dropout, args.one_emb_std, args.codenum, args.codebooknum, args.one_emb_layernum, args.one_emb_inter_dim, args.one_emb_relu_dropout, mask_file)
     else:
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Ejemplo n.º 7
0
    def build_embedding(cls, args, dictionary, embed_dim, path=None):
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()

        emb = Embedding(num_embeddings, embed_dim, padding_idx)
        # if provided, load from preloaded dictionaries
        if path:
            embed_dict = utils.parse_embedding(path)
            utils.load_embedding(embed_dict, dictionary, emb)
        '''for i in range(0,10):
            print(dictionary[i])
            print("********")'''
        return emb
Ejemplo n.º 8
0
 def build_embedding(dictionary, embed_dim, path=None):
     # construct and return an embedding layer;
     # load pretrained embedding path if specified.
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
         logging.info(
             'Loaded pretrained embeddings from {}'.format(path))
     return emb
Ejemplo n.º 9
0
 def build_embedding(dictionary, embed_dim, path=None, feat=False):
     
     if feat:
         emb = Embeddings([Embedding(len(vocab), embed_dim, vocab.pad())
                     for _, vocab in dictionary.items()])
     else:
         padding_idx = dictionary.pad()
         num_embeddings = len(dictionary)
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
         # if provided, load from preloaded dictionaries
         if path:
             embed_dict = utils.parse_embedding(path)
             utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Ejemplo n.º 10
0
        def build_embedding(dictionary, embed_dim, path=None):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = Embedding(num_embeddings, embed_dim, padding_idx)

            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            # if not path and args.disable_training_embeddings:
            #     raise ValueError('Do not set --disable_training_embeddings when pretrained embeddings are not provided.')

            # if args.disable_training_embeddings:
            #     emb.weight.requires_grad = False
            return emb
Ejemplo n.º 11
0
        def build_embedding(dictionary, embed_dim, is_encoder, path=None):

            if path is not None:
                if path.startswith('elmo:'):
                    lm_path = path[5:]
                    task = LanguageModelingTask(args, dictionary, dictionary)
                    models, _ = utils.load_ensemble_for_inference(
                        [lm_path], task, {'remove_head': True})
                    assert len(
                        models
                    ) == 1, 'ensembles are currently not supported for elmo embeddings'

                    embedder = ElmoTokenEmbedder(
                        models[0],
                        dictionary.eos(),
                        dictionary.pad(),
                        add_bos=is_encoder,
                        remove_bos=is_encoder,
                        combine_tower_states=is_encoder,
                        projection_dim=embed_dim,
                        add_final_predictive=is_encoder,
                        add_final_context=is_encoder)
                    return embedder, 1
                elif path.startswith('bilm:'):
                    lm_path = path[5:]
                    task = LanguageModelingTask(args, dictionary, dictionary)
                    models, _ = utils.load_ensemble_for_inference(
                        [lm_path], task, {
                            'remove_head': True,
                            'dropout': args.bilm_model_dropout,
                            'attention_dropout': args.bilm_attention_dropout,
                            'relu_dropout': args.bilm_relu_dropout,
                        })
                    assert len(
                        models
                    ) == 1, 'ensembles are currently not supported for elmo embeddings'

                    return BILMEmbedder(models[0], args, args.encoder_embed_dim) if is_encoder \
                        else LMEmbedder(models[0], args.decoder_embed_dim)

            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = nn.Embedding(num_embeddings, embed_dim, padding_idx)
            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            return emb
Ejemplo n.º 12
0
    def __init__(self,
                 dictionary,
                 encoder_embed_dim=512,
                 embed_dim=512,
                 embed_dict=None,
                 out_embed_dim=512,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1,
                 attention=True):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.layers = nn.ModuleList([
            LSTMCell(
                encoder_embed_dim + embed_dim if layer == 0 else embed_dim,
                embed_dim) for layer in range(num_layers)
        ])
        self.attention = AttentionLayer(encoder_embed_dim,
                                        embed_dim) if attention else None
        if embed_dim != out_embed_dim:
            self.additional_fc = Linear(embed_dim, out_embed_dim)
        self.fc_out = Linear(out_embed_dim,
                             num_embeddings,
                             dropout=dropout_out)
Ejemplo n.º 13
0
    def __init__(self,
                 dictionary,
                 embed_dim=512,
                 embed_dict=None,
                 num_layers=1,
                 dropout_in=0.1,
                 dropout_out=0.1):
        super().__init__(dictionary)
        self.num_layers = num_layers
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.lstm = LSTM(
            input_size=embed_dim,
            hidden_size=embed_dim,
            num_layers=num_layers,
            dropout=self.dropout_out,
            bidirectional=False,
        )
Ejemplo n.º 14
0
    def __init__(self, dictionary, embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1):
        super().__init__(dictionary)
        self.dropout = dropout
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            padding_idx,
            left_pad=LanguagePairDataset.LEFT_PAD_SOURCE,
        )

        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        for (out_channels, kernel_size) in convolutions:
            self.projections.append(Linear(in_channels, out_channels)
                                    if in_channels != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(in_channels, out_channels * 2, kernel_size,
                        dropout=dropout, padding=padding)
            )
            in_channels = out_channels
        self.fc2 = Linear(in_channels, embed_dim)
Ejemplo n.º 15
0
 def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
     embed_dict = utils.parse_embedding(embed_path)
     utils.print_embed_overlap(embed_dict, dictionary)
     return utils.load_embedding(embed_dict, dictionary, embed_tokens)
Ejemplo n.º 16
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        embed_dict=None,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        dropout=0.1,
    ):
        super().__init__(dictionary)
        self.dropout = dropout
        self.num_attention_layers = None
        self.pad = dictionary.pad

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
        )

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for _, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(in_channels,
                        out_channels * 2,
                        kernel_size,
                        dropout=dropout,
                        padding=padding))
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        self.fc2 = Linear(in_channels, embed_dim)
Ejemplo n.º 17
0
def build_embedding(dictionary, embed_dim, path=None):
    """
    Copied from fairseq.models.transformer
    :param dictionary:
    :param embed_dim:
    :param path:
    :return:
    """
    num_embeddings = len(dictionary)
    padding_idx = dictionary.pad()
    emb = Embedding(num_embeddings, embed_dim, padding_idx)
    # if provided, load from preloaded dictionaries
    if path:
        embed_dict = utils.parse_embedding(path)
        utils.load_embedding(embed_dict, dictionary, emb)
    return emb
Ejemplo n.º 18
0
 def build_embedding(dictionary, embed_dim, path=None):
     # The dictionary may include additional items that can be used in
     # place of the normal OOV token and that all map to the same
     # embedding. Using a different token for each input position allows
     # one to restore the word identities from the original source text.
     num_embeddings = len(dictionary) - args.source_position_markers
     padding_idx = dictionary.pad()
     unk_idx = dictionary.unk()
     logger.info(
         "dictionary indices from {0} to {1} will be mapped to {2}".
         format(num_embeddings,
                len(dictionary) - 1, unk_idx))
     emb = Embedding(num_embeddings, embed_dim, padding_idx, unk_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     return emb
Ejemplo n.º 19
0
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
    num_embedding = len(dictionary)
    padding_idx = dictionary.pad()
    embed_tokens = Embedding(num_embedding, embed_dim, padding_idx)
    embed_dict = utils.parse_embedding(embed_path)
    utils.print_embed_overlap(embed_dict, dictionary)
    # embed_keys = set(embed_dict.keys())
    # vocab_keys = set(dictionary.symbols))
    # print(vocab_keys - embed_keys)
    return utils.load_embedding(embed_dict, dictionary,
                                embed_tokens), embed_dict
Ejemplo n.º 20
0
    def build_embedding(cls, args, dictionary, embed_dim, path=None):
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()

        if args.no_embed:
            one_hot_matrix = F.one_hot(torch.arange(num_embeddings)).float()
            one_hot_embed = torch.cat((one_hot_matrix, torch.zeros((num_embeddings, embed_dim - num_embeddings))),
                                      dim=1)
            one_hot_embed[padding_idx] = torch.zeros(embed_dim).unsqueeze(0)
            emb = nn.Embedding(num_embeddings, embed_dim, padding_idx=padding_idx)
            emb.weight = torch.nn.parameter.Parameter(one_hot_embed, requires_grad=False)
        else:
            emb = Embedding(num_embeddings, embed_dim, padding_idx)


        # if provided, load from preloaded dictionaries
        if path:
            embed_dict = utils.parse_embedding(path)
            utils.load_embedding(embed_dict, dictionary, emb)
        return emb
Ejemplo n.º 21
0
    def __init__(
        self, dictionary, args, embed_dim=512, embed_dict=None, max_positions=1024,
        convolutions=((512, 3),) * 20, dropout=0.1, normalization_constant=0.5,
        left_pad=True,
    ):
        super().__init__(dictionary)
        self.args = args
        self.dropout = dropout
        self.normalization_constant = normalization_constant
        self.left_pad = left_pad
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()

        self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
            left_pad=self.left_pad
            #left_pad=False, #TODO: check LearnedPositionalEmbedding.forward() for the case of True
        )

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for i, (out_channels, kernel_size, residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(Linear(residual_dim, out_channels)
                                    if residual_dim != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(in_channels, out_channels * 2, kernel_size,
                        dropout=dropout, padding=padding)
            )
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        self.fc2 = Linear(in_channels, embed_dim)
Ejemplo n.º 22
0
def load_embedding(embedding, dictionary, pretrained_embed):
    """Loads pretrained embeddings.

    Loads pretrained embeddings into a nn.Embedding layer. pretrained_embed
    can either be a nn.Embedding layer, in which case the embedding is set
    to the pretrained_embed argument, or a path to an embedding file.

    Arguments:
        embedding (nn.Embedding): Embedding layer whose weights are to be set.
        dictionary (fairseq.data.dictionary.Dictionary): dictionary with the
            same vocabulary size as the embedding argument.
        pretrained_embed (Union(string, nn.Embedding)): source of the
            weights to be loaded.
    """
    if pretrained_embed is None:
        pass
    elif isinstance(pretrained_embed, torch.nn.Embedding):
        embedding.weight = pretrained_embed.weight
    else:
        embed_dict = utils.parse_embedding(pretrained_embed)
        utils.load_embedding(embed_dict, dictionary, embedding)
Ejemplo n.º 23
0
    def __init__(self, dictionary, embed_dim=512,
                 embed_dict=None, out_embed_dim=256,
                 max_positions=1024, convolutions=((512, 3),) * 20,
                 attention=True, dropout=0.1, share_embed=False):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([2]))
        self.dropout = dropout

        in_channels = convolutions[0][0]
        if isinstance(attention, bool):
            # expand True into [True, True, ...] and do the same with False
            attention = [attention] * len(convolutions)
        if not isinstance(attention, list) or len(attention) != len(convolutions):
            raise ValueError('Attention is expected to be a list of booleans of '
                             'length equal to the number of layers.')

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            padding_idx,
            left_pad=LanguagePairDataset.LEFT_PAD_TARGET,
        )

        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.attention = nn.ModuleList()
        for i, (out_channels, kernel_size) in enumerate(convolutions):
            self.projections.append(Linear(in_channels, out_channels)
                                    if in_channels != out_channels else None)
            self.convolutions.append(
                LinearizedConv1d(in_channels, out_channels * 2, kernel_size,
                                 padding=(kernel_size - 1), dropout=dropout)
            )
            self.attention.append(AttentionLayer(out_channels, embed_dim)
                                  if attention[i] else None)
            in_channels = out_channels
        self.fc2 = Linear(in_channels, out_embed_dim)
        if share_embed:
            assert out_embed_dim == embed_dim, \
                "Shared embed weights implies same dimensions " \
                " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim)
            self.fc3 = nn.Linear(out_embed_dim, num_embeddings)
            self.fc3.weight = self.embed_tokens.weight
        else:
            self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout)
Ejemplo n.º 24
0
def copy_prev_embedding(embed_path, dictionary, embed_dim, prev_embedded_tokens_path, prev_dict):
	num_embeddings = len(dictionary)
	padding_idx = dictionary.pad()
	embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx)
	prev_embedded_tokens = load_random_embedding(prev_embedded_tokens_path)
	for i in range(5, num_embeddings):
		if prev_dict.index(dictionary.symbols[i])!= prev_dict.unk() and i!=dictionary.unk():
			embed_tokens.weight.data[i] = prev_embedded_tokens[prev_dict.index(dictionary.symbols[i])]

	#embed_tokens.weight = nn.Parameter(prev_embedded_tokens)
	embed_dict = utils.parse_embedding(embed_path)
	utils.print_embed_overlap(embed_dict, dictionary)
	return utils.load_embedding(embed_dict, dictionary, embed_tokens)
Ejemplo n.º 25
0
 def build_embedding(dictionary, embed_dim, path=None, num_embed_chunks=1):
     assert embed_dim % num_embed_chunks == 0, (
         f"Number of embedding chunks = {num_embed_chunks} should be "
         + f"divisible by the embedding dimension = {embed_dim}"
     )
     assert path is None or num_embed_chunks == 1, (
         "Loading embedding from a path with number of embedding chunks > 1"
         + " is not yet supported"
     )
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     # if provided, load from preloaded dictionaries
     if path:
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
     else:
         embed_chunk_dim = embed_dim // num_embed_chunks
         emb = nn.ModuleList()
         for i in range(num_embed_chunks):
             emb.append(Embedding(num_embeddings, embed_chunk_dim, padding_idx))
     return emb
Ejemplo n.º 26
0
    def __init__(
        self,
        dictionary,
        embed_dim=512,
        embed_dict=None,
        out_embed_dim=256,
        max_positions=1024,
        convolutions=((512, 3), ) * 20,
        attention=True,
        dropout=0.1,
        share_embed=False,
        positional_embeddings=True,
        adaptive_softmax_cutoff=None,
        adaptive_softmax_dropout=0,
    ):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([2]))
        self.dropout = dropout
        self.need_attn = True

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        if isinstance(attention, bool):
            # expand True into [True, True, ...] and do the same with False
            attention = [attention] * len(convolutions)
        if not isinstance(attention,
                          list) or len(attention) != len(convolutions):
            raise ValueError(
                'Attention is expected to be a list of booleans of '
                'length equal to the number of layers.')

        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad
        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            padding_idx,
        ) if positional_embeddings else None

        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.attention = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for i, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            self.convolutions.append(
                LinearizedConv1d(in_channels,
                                 out_channels * 2,
                                 kernel_size,
                                 padding=(kernel_size - 1),
                                 dropout=dropout))
            self.attention.append(
                AttentionLayer(out_channels, embed_dim
                               ) if attention[i] else None)
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)

        self.adaptive_softmax = None
        self.fc2 = self.fc3 = None

        if adaptive_softmax_cutoff is not None:
            assert not share_embed
            self.adaptive_softmax = AdaptiveSoftmax(
                num_embeddings,
                in_channels,
                adaptive_softmax_cutoff,
                dropout=adaptive_softmax_dropout)
        else:
            self.fc2 = Linear(in_channels, out_embed_dim)
            if share_embed:
                assert out_embed_dim == embed_dim, \
                    "Shared embed weights implies same dimensions " \
                    " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim)
                self.fc3 = nn.Linear(out_embed_dim, num_embeddings)
                self.fc3.weight = self.embed_tokens.weight
            else:
                self.fc3 = Linear(out_embed_dim,
                                  num_embeddings,
                                  dropout=dropout)
Ejemplo n.º 27
0
    def __init__(
            self,
            dictionary,
            embed_dim,
            embed_dict,
            max_positions,
            dropout,
            num_inputs,
            num_units,
            num_labels,
            num_layers=1,
            in_arcs=True,
            out_arcs=True,
            batch_first=False,
            residual='',
            use_gates=True,
            use_glus=False,
            # morph_embeddings=None,
            left_pad=True):
        super(GCNEncoder, self).__init__(dictionary)
        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.left_pad = left_pad
        self.dropout = dropout
        self.batch_first = batch_first
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
            left_pad=self.left_pad
            # left_pad=False, #TODO: check LearnedPositionalEmbedding.forward() for the case of True
        )

        self.num_layers = num_layers
        self.num_inputs = num_inputs
        self.num_units = num_units
        self.residual = residual
        self.use_gates = use_gates
        self.use_glus = use_glus

        # if morph_embeddings is not None:
        #     self.morph_embeddings = morph_embeddings
        #     self.emb_morph_emb = nn.Linear(num_inputs+morph_embeddings.embedding_size, num_inputs)

        # self.H_1 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units))
        # nn.init.xavier_normal_(self.H_1)
        # self.H_2 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units))
        # nn.init.xavier_normal_(self.H_2)
        # self.H_3 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units))
        # nn.init.xavier_normal_(self.H_3)
        # self.H_4 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units))
        # nn.init.xavier_normal_(self.H_4)

        self.gcn_layers = []
        if residual == '' or residual == 'residual':

            for i in range(self.num_layers):
                gcn = GCNLayer(num_inputs,
                               num_units,
                               num_labels,
                               in_arcs=in_arcs,
                               out_arcs=out_arcs,
                               batch_first=self.batch_first,
                               use_gates=self.use_gates,
                               use_glus=self.use_glus)
                self.gcn_layers.append(gcn)

            self.gcn_seq = nn.Sequential(*self.gcn_layers)

        elif residual == 'dense':
            for i in range(self.num_layers):
                input_size = num_inputs + (i * num_units)
                gcn = GCNLayer(input_size,
                               num_units,
                               num_labels,
                               in_arcs=in_arcs,
                               out_arcs=out_arcs,
                               batch_first=self.batch_first,
                               use_gates=self.use_gates,
                               use_glus=self.use_glus)
                self.gcn_layers.append(gcn)

            self.gcn_seq = nn.Sequential(*self.gcn_layers)
Ejemplo n.º 28
0
    def __init__(self,
                 dictionary,
                 args,
                 encoder_embed_dim=512,
                 embed_dict=None,
                 max_positions=1024,
                 convolutions=((512, 3), ) * 20,
                 dropout=0.1,
                 left_pad=True):
        super().__init__(dictionary)
        self.elmo = Elmo(options_file,
                         weight_file,
                         args.num_output_repr,
                         dropout=args.elmo_dropout,
                         do_layer_norm=args.elmo_do_layer_norm)
        self.args = args
        if self.args.merge_mode == 'sum':
            # just use in `sum` mode
            self.elmo_projection = Linear(args.elmo_repr_dim,
                                          encoder_embed_dim)
        self.id2token = {v: k for k, v in dictionary.indices.items()}
        self.dropout = dropout
        self.left_pad = left_pad
        self.num_attention_layers = None

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, args.token_embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            args.token_embed_dim,
            self.padding_idx,
            left_pad=self.left_pad,
        )

        convolutions = extend_conv_spec(convolutions)
        in_channels = convolutions[0][0]
        self.fc1 = Linear(encoder_embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.residuals = []

        layer_in_channels = [in_channels]
        for _, (out_channels, kernel_size,
                residual) in enumerate(convolutions):
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            self.projections.append(
                Linear(residual_dim, out_channels
                       ) if residual_dim != out_channels else None)
            if kernel_size % 2 == 1:
                padding = kernel_size // 2
            else:
                padding = 0
            self.convolutions.append(
                ConvTBC(in_channels,
                        out_channels * 2,
                        kernel_size,
                        dropout=dropout,
                        padding=padding))
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        if args.num_output_repr == 2 and args.merge_mode == 'concat':
            self.fc2 = Linear(in_channels + args.elmo_repr_dim,
                              encoder_embed_dim)
        else:
            self.fc2 = Linear(in_channels, encoder_embed_dim)
Ejemplo n.º 29
0
    def __init__(self,
                 dictionary,
                 embed_dim=512,
                 embed_dict=None,
                 max_positions=1024,
                 convolutions=((512, 3), ) * 20,
                 dropout=0.1,
                 batch_norm=False,
                 use_linear_se=False):
        super().__init__(dictionary)
        self.dropout = dropout
        self.num_attention_layers = None
        self.batch_norm = batch_norm

        num_embeddings = len(dictionary)
        self.padding_idx = dictionary.pad()
        self.embed_tokens = Embedding(num_embeddings, embed_dim,
                                      self.padding_idx)
        if embed_dict:
            self.embed_tokens = utils.load_embedding(embed_dict,
                                                     self.dictionary,
                                                     self.embed_tokens)

        self.embed_positions = PositionalEmbedding(
            max_positions,
            embed_dim,
            self.padding_idx,
        )

        convolutions = extend_conv_spec_extended(convolutions)
        in_channels = convolutions[0][0]
        if use_linear_se:
            self.fc1 = LinearSE(embed_dim, in_channels, dropout=dropout)
        else:
            self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
        self.projections = nn.ModuleList()
        self.convolutions = nn.ModuleList()
        self.inner_convolutions = nn.ModuleList()
        #self.se_layers = nn.ModuleList()
        self.residuals = []
        self.kernel_sizes = 0

        layer_in_channels = [in_channels]
        for idx, (out_channels, kernel_sizes,
                  residual) in enumerate(convolutions):
            self.kernel_sizes = len(kernel_sizes)
            self.inner_convolutions.append(nn.ModuleList())
            if residual == 0:
                residual_dim = out_channels
            else:
                residual_dim = layer_in_channels[-residual]
            if use_linear_se:
                self.projections.append(
                    LinearSE(residual_dim, out_channels
                             ) if residual_dim != out_channels else None)
            else:
                self.projections.append(
                    Linear(residual_dim, out_channels
                           ) if residual_dim != out_channels else None)
            for kernel_size in kernel_sizes:
                if kernel_size % 2 == 1:
                    padding = kernel_size // 2
                else:
                    padding = 0
                self.inner_convolutions[idx].append(
                    ConvTBC(in_channels,
                            out_channels * 2,
                            kernel_size,
                            dropout=dropout,
                            padding=padding))
            # TODO(naetherm): Combine the outputs of the convolution to one single instance max_pooling
            #self.convolutions.append(torch.stack(self.inner_convolutions[idx], dim=0).sum(dim=0))
            #self.se_layers.append(SqueezeExcitationLayer(n_features=16))
            self.residuals.append(residual)
            in_channels = out_channels
            layer_in_channels.append(out_channels)
        self.mp2d = torch.nn.MaxPool2d(kernel_size=(self.kernel_sizes, 1))
        if use_linear_se:
            self.fc2 = LinearSE(in_channels, embed_dim)
        else:
            self.fc2 = Linear(in_channels, embed_dim)