Exemple #1
0
    def load_embeddings(self, src_embeddings, tgt_embeddings,
                        vocabulary: Vocabulary):
        aligned_embeddings = torch.div(torch.randn(vocabulary.size(), 300), 10)
        found_count = 0
        for i in range(len(vocabulary.index2word)):
            word = vocabulary.get_word(i)
            language = vocabulary.get_language(i)
            if language == "src" and word in src_embeddings.wv:
                aligned_embeddings[i] = torch.FloatTensor(
                    src_embeddings.wv[word])
                found_count += 1
            elif language == "src" and word.lower() in src_embeddings.wv:
                aligned_embeddings[i] = torch.FloatTensor(
                    src_embeddings.wv[word.lower()])
                found_count += 1

            if language == "tgt" and word in tgt_embeddings.wv:
                aligned_embeddings[i] = torch.FloatTensor(
                    tgt_embeddings.wv[word])
                found_count += 1
            elif language == "tgt" and word.lower() in tgt_embeddings.wv:
                aligned_embeddings[i] = torch.FloatTensor(
                    tgt_embeddings.wv[word.lower()])
                found_count += 1
        logger.info("Embeddings filled: " + str(found_count) + " of " +
                    str(vocabulary.size()))

        enable_training = self.encoder.embedding.weight.requires_grad
        self.encoder.embedding.weight = nn.Parameter(
            aligned_embeddings, requires_grad=enable_training)
        self.decoder.embedding.weight = nn.Parameter(
            aligned_embeddings, requires_grad=enable_training)
Exemple #2
0
 def __init__(self, vocabulary: Vocabulary, use_cuda):
     weight = torch.ones(vocabulary.size())
     weight[vocabulary.get_pad("src")] = 0
     weight[vocabulary.get_pad("tgt")] = 0
     weight = weight.cuda() if use_cuda else weight
     self.criterion = nn.NLLLoss(weight, size_average=False)