def test_submission(self): data = DATADOUBAN(debug=False, data_dir=self.data_dir) test_examples = data.read_examples_test( os.path.join(self.data_dir, 'test.csv')) print('eval_examples的数量', len(test_examples)) prediction = np.zeros((len(test_examples), self.num_labels)) gold_labels_ = np.zeros((len(test_examples), self.num_labels)) logits_ = np.zeros((len(test_examples), self.num_labels)) questions = [x.text_a for x in test_examples] test_features = data.convert_examples_to_features( test_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(test_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field( test_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field( test_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in test_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.eval_batch_size) for i in range(5): config = BertConfig.from_pretrained(self.model_name_or_path, num_labels=self.num_labels) model = BertForSequenceClassification.from_pretrained( os.path.join(self.output_dir, "pytorch_model_{}.bin".format(i)), self.args, config=config) model.to(self.device) model.eval() inference_labels = [] gold_labels = [] for input_ids, input_mask, segment_ids, label_ids in eval_dataloader: input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) label_ids = label_ids.to(self.device) with torch.no_grad(): logits = model( input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask).detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() inference_labels.append(logits) gold_labels.append(label_ids) gold_labels = np.concatenate(gold_labels, 0) gold_labels_ = gold_labels logits = np.concatenate(inference_labels, 0) print(logits.shape) print(prediction.shape) prediction += logits / 5 test_id = [x.guid for x in test_examples] assert len(test_id) == len(prediction) # print(accuracyCQA(prediction, gold_labels_)) # print(compute_MRR_CQA(questions)) logits_ = np.argmax(prediction, axis=1) submission = pd.DataFrame({'id': test_id, 'predict': logits_}) submission.to_csv(os.path.join(self.output_dir, "sub.csv"), index=False, header=False)
def create_dataloader(self): data = DATADOUBAN( debug=False, data_dir=self.data_dir, ) train_examples = data.read_examples( os.path.join(self.data_dir, 'train.csv')) train_features = data.convert_examples_to_features( train_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(train_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field( train_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field( train_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.train_batch_size) ''' eval_examples = data.read_examples(examples_[1]) eval_features = data.convert_examples_to_features(eval_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(eval_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field(eval_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field(eval_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.eval_batch_size) ''' eval_examples = data.read_examples( os.path.join(self.data_dir, 'dev.csv')) eval_features = data.convert_examples_to_features( eval_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(eval_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field( eval_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field( eval_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.eval_batch_size) return train_dataloader, eval_dataloader, train_examples, eval_examples
def test_eval(self): data = DATADOUBAN(debug=False, data_dir=self.data_dir) test_examples = data.read_examples( os.path.join(self.data_dir, 'test.csv')) print('eval_examples的数量', len(test_examples)) questions = [x.text_a for x in test_examples] ID = [x.guid for x in test_examples] test_features = data.convert_examples_to_features( test_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(test_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field( test_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field( test_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in test_features], dtype=torch.long) test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) # Run prediction for full data test_sampler = SequentialSampler(test_data) test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=self.eval_batch_size) config = BertConfig.from_pretrained(self.model_name_or_path, num_labels=self.num_labels) model = BertForSequenceClassification.from_pretrained(os.path.join( self.output_dir, "pytorch_model.bin"), self.args, config=config) model.to(self.device) model.eval() inference_labels = [] gold_labels = [] scores = [] for input_ids, input_mask, segment_ids, label_ids in test_dataloader: input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) label_ids = label_ids.to(self.device) with torch.no_grad(): logits = model( input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask).detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() scores.append(logits) inference_labels.append(np.argmax(logits, axis=1)) gold_labels.append(label_ids) gold_labels = np.concatenate(gold_labels, 0) scores = np.concatenate(scores, 0) logits = np.concatenate(inference_labels, 0) # 计算评价指标 assert len(ID) == scores.shape[0] == scores.shape[0] # eval_accuracy = accuracyCQA(inference_logits, gold_labels) # eval_mrr = compute_MRR_CQA(scores, gold_labels, questions) eval_5R20 = compute_5R20(scores, gold_labels, questions) eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN( ID, scores, gold_labels) # print('eval_mrr',eval_mrr) print('eval_MRR', eval_DOUBAN_MRR, eval_DOUBAN_mrr, 'eval_MAP', eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1) print('eval_5R20', eval_5R20)