def _do_biencoder_fwd_pass( model: nn.Module, input: BiEncoderBatch, tensorizer: Tensorizer, cfg, encoder_type: str, rep_positions=0, loss_scale: float = None, ) -> Tuple[torch.Tensor, int]: input = BiEncoderBatch(**move_to_device(input._asdict(), cfg.device)) q_attn_mask = tensorizer.get_attn_mask(input.question_ids) ctx_attn_mask = tensorizer.get_attn_mask(input.context_ids) if model.training: model_out = model( input.question_ids, input.question_segments, q_attn_mask, input.context_ids, input.ctx_segments, ctx_attn_mask, encoder_type=encoder_type, representation_token_pos=rep_positions, ) else: with torch.no_grad(): model_out = model( input.question_ids, input.question_segments, q_attn_mask, input.context_ids, input.ctx_segments, ctx_attn_mask, encoder_type=encoder_type, representation_token_pos=rep_positions, ) local_q_vector, local_ctx_vectors = model_out loss_function = BiEncoderNllLoss() loss, is_correct = _calc_loss( cfg, loss_function, local_q_vector, local_ctx_vectors, input.is_positive, input.hard_negatives, loss_scale=loss_scale, ) is_correct = is_correct.sum().item() if cfg.n_gpu > 1: loss = loss.mean() if cfg.train.gradient_accumulation_steps > 1: loss = loss / cfg.gradient_accumulation_steps return loss, is_correct
def _do_biencoder_fwd_pass(model: nn.Module, input: BiEncoderBatch, tensorizer: Tensorizer, args) -> (torch.Tensor, int): input = BiEncoderBatch(**move_to_device(input._asdict(), args.device)) q_attn_mask = tensorizer.get_attn_mask(input.question_ids) ctx_attn_mask = tensorizer.get_attn_mask(input.context_ids) if model.training: model_out = model(input.question_ids, input.question_segments, q_attn_mask, input.context_ids, input.ctx_segments, ctx_attn_mask) else: with torch.no_grad(): model_out = model(input.question_ids, input.question_segments, q_attn_mask, input.context_ids, input.ctx_segments, ctx_attn_mask) local_q_vector, local_ctx_vectors = model_out loss_function = BiEncoderNllLoss() loss, is_correct = _calc_loss(args, loss_function, local_q_vector, local_ctx_vectors, input.is_positive, input.hard_negatives) is_correct = is_correct.sum().item() if args.n_gpu > 1: loss = loss.mean() if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps return loss, is_correct