def mlm(self, memory_bank, tgt_mlm): # Masked language model (MLM) with full softmax prediction. output_mlm = gelu(self.mlm_linear_1(memory_bank)) output_mlm = self.layer_norm(output_mlm) output_mlm = self.mlm_linear_2(output_mlm) output_mlm = output_mlm.contiguous().view(-1, self.vocab_size) # Full probability distribution. output_mlm = self.softmax(output_mlm) tgt_mlm = tgt_mlm.contiguous().view(-1, 1) label_mask = (tgt_mlm > 0).float() label_mask = (tgt_mlm > 0).float().to(torch.device(output_mlm.device)) one_hot = torch.zeros(label_mask.size(0), self.vocab_size). \ to(torch.device(output_mlm.device)). \ scatter_(1, tgt_mlm, 1.0) numerator = -torch.sum(output_mlm * one_hot, 1) label_mask = label_mask.contiguous().view(-1) tgt_mlm = tgt_mlm.contiguous().view(-1) numerator = torch.sum(label_mask * numerator) denominator = torch.sum(label_mask) + 1e-6 loss_mlm = numerator / denominator correct_mlm = torch.sum( label_mask * (output_mlm.argmax(dim=-1).eq(tgt_mlm)).float()) return loss_mlm, correct_mlm, denominator
def mlm(self, memory_bank, tgt_mlm): # Masked language modeling (MLM) with full softmax prediction. output_mlm = gelu(self.mlm_linear_1(memory_bank)) output_mlm = self.layer_norm(output_mlm) if self.factorized_embedding_parameterization: output_mlm = output_mlm.contiguous().view(-1, self.emb_size) else: output_mlm = output_mlm.contiguous().view(-1, self.hidden_size) tgt_mlm = tgt_mlm.contiguous().view(-1) output_mlm = output_mlm[tgt_mlm > 0, :] tgt_mlm = tgt_mlm[tgt_mlm > 0] output_mlm = self.mlm_linear_2(output_mlm) output_mlm = self.softmax(output_mlm) one_hot = torch.zeros(output_mlm.size(0), self.vocab_size). \ to(torch.device(output_mlm.device)). \ scatter_(1, tgt_mlm.contiguous().view(-1,1), 1.0) numerator = -torch.sum(output_mlm * one_hot, 1) denominator = torch.tensor(output_mlm.size(0) + 1e-6) loss_mlm = torch.sum(numerator) / denominator if output_mlm.size(0) == 0: correct_mlm = torch.tensor(0.0) else: correct_mlm = torch.sum( (output_mlm.argmax(dim=-1).eq(tgt_mlm)).float()) return loss_mlm, correct_mlm, denominator
def forward(self, src, seg): emb = self.embedding(src, seg) output = self.encoder(emb, seg) output = gelu(self.target.mlm_linear_1(output)) output = self.target.layer_norm(output) output = self.target.mlm_linear_2(output) prob = torch.nn.Softmax(dim=-1)(output) return prob
def forward(self, src, seg): emb = self.embedding(src, seg) output = self.encoder(emb, seg) output = gelu(self.target.output_layer(output)) prob = torch.nn.Softmax(dim=-1)(output) return prob
def forward(self, src, seg): emb = self.embedding(src, seg) output = self.encoder(emb, seg) output = gelu(self.target.output_layer(output)) return output
def forward(self, x): inter = gelu(self.linear_1(x)) inter = self.act(self.linear_1(x)) output = self.linear_2(inter) return output