def validation_step(self, batch, batch_idx): batch = transfer_batch_to_device(batch, self.device) all_preds = [] val_loss = 0 query_text = batch['query_text'] b = next(self.batcher.iter_batches(query_text, verbose=False)) b = transfer_batch_to_device(b, self.device) query_output = self.bert(**b)['contextual_embeddings'] query_pooler_output = torch.mean(query_output, 1) for i in range(len(batch['label'])): labels = batch['label'][i].float() # response_dict = batch['response_enc_dict'][i] response_text = batch['response_text'][i] b = next(self.batcher.iter_batches(query_text, verbose=False)) b = transfer_batch_to_device(b, self.device) response_output = self.bert(**b)['contextual_embeddings'] response_pooler_output = torch.mean(response_output, 1) preds = self.cosine_sim( self.query_rescale_layer(query_pooler_output), self.query_rescale_layer(response_pooler_output)) # preds = self.cosine_sim(self.query_rescale_layer(query_pooler_output), self.response_rescale_layer(response_pooler_output)).to(self.device) # preds = self.cosine_sim(query_pooler_output, response_pooler_output)#.to(self.device) # preds = torch.sigmoid(preds) preds = preds.to('cpu') labels = labels.to('cpu') val_loss += mse_loss(preds, labels) all_preds.append(torch.unsqueeze(preds, 1)) # val_loss = torch.nn.BCEWithLogitsLoss()(preds,labels) self.log('val_acc_step', self.accuracy(preds, labels)) self.log('val_loss_step', val_loss / len(batch['label'])) all_preds = torch.cat(all_preds, 1) all_preds_np = all_preds.cpu().numpy() np_labels = [x.cpu().numpy() for x in batch['label']] np_labels = np.array(np_labels).transpose() res = [] for i in range(len(batch['label'][0])): b_i = all_preds_np[i, :] idxs = (-b_i).argsort() labels_idx = np_labels[i, idxs] res.append(labels_idx) self.val_res.extend(res) return val_loss
def test_step(self, batch, batch_idx): batch = transfer_batch_to_device(batch, self.device) all_preds = [] val_loss = 0 query_dict = batch['query_enc_dict'] out = self.bert(query_dict['input_ids'], token_type_ids=query_dict['token_type_ids'], attention_mask=query_dict['attention_mask'], entity_embeds=batch["q_retro"]) query_last_hidden_state = out.last_hidden_state query_pooler_output = out.pooler_output query_pooler_output = torch.mean(query_last_hidden_state, 1) for i in range(len(batch['label'])): labels = batch['label'][i].float() response_dict = batch['response_enc_dict'][i] out = self.bert(response_dict['input_ids'], token_type_ids=response_dict['token_type_ids'], attention_mask=response_dict['attention_mask'], entity_embeds=batch["r_retro"][i]) response_last_hidden_state = out.last_hidden_state response_pooler_output = out.pooler_output response_pooler_output = torch.mean(response_last_hidden_state, 1) preds = self.cosine_sim( self.query_rescale_layer(query_pooler_output), self.query_rescale_layer(response_pooler_output)) # preds = self.cosine_sim(self.query_rescale_layer(query_pooler_output), self.response_rescale_layer(response_pooler_output)) # preds = self.cosine_sim(self.query_rescale_layer(query_pooler_output), self.response_rescale_layer(response_pooler_output)).to(self.device) # preds = self.cosine_sim(query_pooler_output, response_pooler_output) # .to(self.device) # preds = torch.sigmoid(preds) preds = preds.to('cpu') labels = labels.to('cpu') val_loss += mse_loss(preds, labels) all_preds.append(torch.unsqueeze(preds, 1)) # val_loss = torch.nn.BCEWithLogitsLoss()(preds,labels) self.log('test_acc_step', self.testaccuracy(torch.clamp(preds, 0, 1), labels.int())) self.log('test_loss_step', val_loss / len(batch['label'])) all_preds = torch.cat(all_preds, 1) all_preds_np = all_preds.cpu().numpy() np_labels = [x.cpu().numpy() for x in batch['label']] np_labels = np.array(np_labels).transpose() res = [] for i in range(len(batch['label'][0])): b_i = all_preds_np[i, :] idxs = (-b_i).argsort() labels_idx = np_labels[i, idxs] res.append(labels_idx) self.eval_res.extend(res) return val_loss
def training_step(self, batch, batch_idx): labels = batch['label'].float() query_text = batch['query_text'] response_text = batch['response_text'] b = next(self.batcher.iter_batches(query_text, verbose=False)) b = transfer_batch_to_device(b, self.device) query_output = self.bert(**b)['contextual_embeddings'] query_pooler_output = torch.mean(query_output, 1) b = next(self.batcher.iter_batches(response_text, verbose=False)) b = transfer_batch_to_device(b, self.device) res_output = self.bert(**b)['contextual_embeddings'] res_pooler_output = torch.mean(res_output, 1) preds = self.cosine_sim(self.query_rescale_layer(query_pooler_output), self.query_rescale_layer(res_pooler_output)) loss = mse_loss(preds, labels) self.log('train_loss', loss, on_step=True) return loss