Ejemplo n.º 1
0
    def mlm(self, memory_bank, tgt_mlm):
        # Masked language model (MLM) with full softmax prediction.
        output_mlm = gelu(self.mlm_linear_1(memory_bank))
        output_mlm = self.layer_norm(output_mlm)
        output_mlm = self.mlm_linear_2(output_mlm)
        output_mlm = output_mlm.contiguous().view(-1, self.vocab_size)
        # Full probability distribution.
        output_mlm = self.softmax(output_mlm)

        tgt_mlm = tgt_mlm.contiguous().view(-1, 1)
        label_mask = (tgt_mlm > 0).float()

        label_mask = (tgt_mlm > 0).float().to(torch.device(output_mlm.device))
        one_hot = torch.zeros(label_mask.size(0),  self.vocab_size). \
           to(torch.device(output_mlm.device)). \
           scatter_(1, tgt_mlm, 1.0)

        numerator = -torch.sum(output_mlm * one_hot, 1)
        label_mask = label_mask.contiguous().view(-1)
        tgt_mlm = tgt_mlm.contiguous().view(-1)
        numerator = torch.sum(label_mask * numerator)
        denominator = torch.sum(label_mask) + 1e-6
        loss_mlm = numerator / denominator
        correct_mlm = torch.sum(
            label_mask * (output_mlm.argmax(dim=-1).eq(tgt_mlm)).float())

        return loss_mlm, correct_mlm, denominator
Ejemplo n.º 2
0
    def mlm(self, memory_bank, tgt_mlm):
        # Masked language modeling (MLM) with full softmax prediction.
        output_mlm = gelu(self.mlm_linear_1(memory_bank))
        output_mlm = self.layer_norm(output_mlm)
        if self.factorized_embedding_parameterization:
            output_mlm = output_mlm.contiguous().view(-1, self.emb_size)
        else:
            output_mlm = output_mlm.contiguous().view(-1, self.hidden_size)
        tgt_mlm = tgt_mlm.contiguous().view(-1)
        output_mlm = output_mlm[tgt_mlm > 0, :]
        tgt_mlm = tgt_mlm[tgt_mlm > 0]
        output_mlm = self.mlm_linear_2(output_mlm)
        output_mlm = self.softmax(output_mlm)

        one_hot = torch.zeros(output_mlm.size(0),  self.vocab_size). \
           to(torch.device(output_mlm.device)). \
           scatter_(1, tgt_mlm.contiguous().view(-1,1), 1.0)
        numerator = -torch.sum(output_mlm * one_hot, 1)
        denominator = torch.tensor(output_mlm.size(0) + 1e-6)
        loss_mlm = torch.sum(numerator) / denominator
        if output_mlm.size(0) == 0:
            correct_mlm = torch.tensor(0.0)
        else:
            correct_mlm = torch.sum(
                (output_mlm.argmax(dim=-1).eq(tgt_mlm)).float())

        return loss_mlm, correct_mlm, denominator
Ejemplo n.º 3
0
 def forward(self, src, seg):
     emb = self.embedding(src, seg)
     output = self.encoder(emb, seg)
     output = gelu(self.target.mlm_linear_1(output))
     output = self.target.layer_norm(output)
     output = self.target.mlm_linear_2(output)
     prob = torch.nn.Softmax(dim=-1)(output)
     return prob
Ejemplo n.º 4
0
 def forward(self, src, seg):
     emb = self.embedding(src, seg)
     output = self.encoder(emb, seg)
     output = gelu(self.target.output_layer(output))
     prob = torch.nn.Softmax(dim=-1)(output)
     return prob
Ejemplo n.º 5
0
 def forward(self, src, seg):
     emb = self.embedding(src, seg)
     output = self.encoder(emb, seg)
     output = gelu(self.target.output_layer(output))
     return output
Ejemplo n.º 6
0
 def forward(self, x):
     inter = gelu(self.linear_1(x))
     inter = self.act(self.linear_1(x))
     output = self.linear_2(inter)
     return output