def negative_sampling_loss(target: torch.LongTensor, context: torch.LongTensor, noise: torch.LongTensor): batch_size, embed_size = target.shape target = target.view(batch_size, embed_size, 1) context = context.view(batch_size, 1, embed_size) output_loss = torch.bmm(context, target).sigmoid().log().squeeze() noise_loss = torch.bmm(noise.neg(), target).sigmoid().log().squeeze().sum(1) return -(output_loss + noise_loss).mean()