def run_experiment(args): args.run_id = str(ex.current_run._id) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # tokenizer = BertTokenizer.from_pretrained(args.transformer_model) tokenizer = BertTokenizerFast.from_pretrained(args.transformer_model) # Conversation Response Ranking datasets needs special tokens if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: special_tokens_dict = {'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) #Load datasets train = pd.read_csv(args.data_folder+args.task+"/train.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) valid = pd.read_csv(args.data_folder+args.task+"/valid.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) #Choose the negative candidate sampler document_col = train.columns[1] if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler(list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini(list(train[document_col].values), args.num_ns_train, args.data_folder+args.task+"/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler(list(train[document_col].values), args.num_ns_train, args.data_folder+args.task+"/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder+args.task+"/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder+args.task+"/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) train = train.groupby(train.columns[0]).agg(list).reset_index() labels = [] sample = 10000 max_labels = 0 for idx, row in enumerate(tqdm(train[0:sample].itertuples(index=False), total=sample)): query = row[0] relevant_documents = row[1] query_labels = [] for relevant_document in relevant_documents: query_labels.append(1.0) ns_candidates, ns_scores, _, _, _= ns_train.sample(query, relevant_documents) for i, ns in enumerate(ns_candidates): query_labels.append(ns_scores[i]) labels.append(query_labels) if max_labels < len(query_labels): max_labels = len(query_labels) df_labels = pd.DataFrame(labels, columns = ["candidate_{}".format(i) for i in range(max_labels)]) df_labels.to_csv(args.output_dir+"/{}_weak_supervision.csv".format(args.task), index=False)
def run_experiment(args): args.run_id = str(ex.current_run._id) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) #Load datasets ## Conversation Response Ranking if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: add_turn_separator = (args.task != "ubuntu_dstc8") # Ubuntu data has several utterances from same user in the context. train = preprocess_crr.read_crr_tsv_as_df(args.data_folder+args.task+"/train.tsv", args.sample_data, add_turn_separator) valid = preprocess_crr.read_crr_tsv_as_df(args.data_folder+args.task+"/valid.tsv", args.sample_data, add_turn_separator) special_tokens_dict = {'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) ## Similar Question Retrieval and Passage Retrieval elif args.task in ["qqp", "linkso", "trec2020pr"]: if args.sample_data == -1: args.sample_data=None train = pd.read_csv(args.data_folder+args.task+"/train.tsv", sep="\t", nrows=args.sample_data) valid = pd.read_csv(args.data_folder+args.task+"/valid.tsv", sep="\t", nrows=args.sample_data) elif args.task=="scisumm": train, valid = preprocess_scisumm.transform_to_dfs("../data/Training-Set-2019/Task1/From-Training-Set-2018/") #Choose the negative candidate sampler document_col = train.columns[1] if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler(list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini(list(train[document_col].values), args.num_ns_train, args.data_folder+"/"+args.task+"/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler(list(train[document_col].values), args.num_ns_train, args.data_folder+"/"+args.task+"/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder+"/"+args.task+"/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder+"/"+args.task+"/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) #Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader(train, valid, valid, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + args.task) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders() #Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained(args.transformer_model) model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer(model, train_loader, val_loader, test_loader, args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_instances, args.num_epochs, args.lr, args.sacred_ex) #Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() #Predict for test logging.info("Predicting") preds, labels = trainer.test() res = results_analyses_tools.evaluate_and_aggregate(preds, labels, ['R_10@1','R_10@1', 'R_10@2', 'R_10@5', 'R_2@1']) for metric, v in res.items(): logging.info("Test {} : {:4f}".format(metric, v)) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=["prediction_"+str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir+"/"+args.run_id+"/predictions.csv", index=False) labels_df = pd.DataFrame(labels, columns=["label_"+str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir+"/"+args.run_id+"/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save(model.state_dict(), args.output_dir+"/"+args.run_id+"/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") preds, uncertainties, labels, foward_passes_preds = trainer.test_with_dropout(args.num_foward_prediction_passes) res = results_analyses_tools.evaluate_and_aggregate(preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info("Test (w. dropout and {} foward passes) {} : {:4f}".format(args.num_foward_prediction_passes, metric, v)) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=["prediction_"+str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir+"/"+args.run_id+"/predictions_with_dropout.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=["prediction_"+str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir+"/"+args.run_id+"/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame(labels, columns=["label_"+str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir+"/"+args.run_id+"/labels.csv", index=False) uncertainties_df = pd.DataFrame(uncertainties, columns=["uncertainty_"+str(i) for i in range(max_preds_column)]) uncertainties_df.to_csv(args.output_dir+"/"+args.run_id+"/uncertainties.csv", index=False) return trainer.best_ndcg
def run_experiment(args): args.run_id = str(ex.current_run._id) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) # Conversation Response Ranking datasets needs special tokens if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) #Load datasets train = pd.read_csv( args.data_folder + args.task + "/train.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) valid = pd.read_csv( args.data_folder + args.task + "/valid.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) #Choose the negative candidate sampler document_col = train.columns[1] if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler( list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini( list(train[document_col].values), args.num_ns_train, args.data_folder + args.task + "/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler( list(train[document_col].values), args.num_ns_train, args.data_folder + args.task + "/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) #Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader( train, valid, valid, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + args.task) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( ) #Instantiate transformer model to be used model = pointwise_bert.BertForPointwiseLearning.from_pretrained( args.transformer_model, loss_function=args.loss_function, smoothing=args.smoothing) model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model, train_loader, val_loader, test_loader, args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_batches, args.num_epochs, args.lr, args.sacred_ex, args.validate_every_steps, validation_metric='R_10@1', num_training_instances=args.num_training_instances) #Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() #Predict for test logging.info("Predicting for the validation set.") preds, labels, softmax_logits = trainer.test() res = results_analyses_tools.evaluate_and_aggregate( preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info("Test {} : {:3f}".format(metric, v)) wandb.log({'step': 0, "dev_" + metric: v}) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_softmax.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save(model.state_dict(), args.output_dir + "/" + args.run_id + "/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with MC dropout for the validation set.") preds, labels, softmax_logits, foward_passes_preds, uncertainties = trainer.test_with_dropout( args.num_foward_prediction_passes) res = results_analyses_tools.evaluate_and_aggregate( preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info( "Test (w. dropout and {} foward passes) {} : {:3f}".format( args.num_foward_prediction_passes, metric, v)) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_with_dropout.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_with_dropout_softmax.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + args.run_id + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=["uncertainty_" + str(i) for i in range(max_preds_column)]) uncertainties_df.to_csv(args.output_dir + "/" + args.run_id + "/uncertainties.csv", index=False) return trainer.best_eval_metric
def main(): parser = argparse.ArgumentParser() # Input and output configs parser.add_argument("--task", default=None, type=str, required=True, help="the task to run bert ranker for") parser.add_argument("--data_folder", default=None, type=str, required=True, help="the folder containing data") parser.add_argument("--output_dir", default=None, type=str, required=True, help="the folder to output raw negative_samples") parser.add_argument( "--anserini_folder", default="", type=str, required=False, help= "Path containing the anserini bin <anserini_folder>/target/appassembler/bin/IndexCollection" ) parser.add_argument( "--sample_data", default=-1, type=int, required=False, help= "Amount of data to sample for training and eval. If no sampling required use -1." ) parser.add_argument("--seed", default=42, type=str, required=False, help="random seed") parser.add_argument( "--num_ns_train", default=1, type=int, required=False, help="Number of negatively sampled documents to use during training") args = parser.parse_args() logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler()]) #Load datasets add_turn_separator = ( args.task != "ubuntu_dstc8" ) # Ubuntu data has several utterances from same user in the context. train = preprocess_crr.read_crr_tsv_as_df( args.data_folder + args.task + "/train.tsv", args.sample_data, add_turn_separator) valid = preprocess_crr.read_crr_tsv_as_df( args.data_folder + args.task + "/valid.tsv", args.sample_data, add_turn_separator) tokenizer = BertTokenizer.from_pretrained("bert-base-cased") ns_valid_random = negative_sampling.RandomNegativeSampler( list(train["response"].values) + list(valid["response"].values), args.num_ns_train) ns_valid_bm25 = negative_sampling.BM25NegativeSamplerPyserini( list(train["response"].values) + list(valid["response"].values), args.num_ns_train, args.data_folder + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder, set_rm3=True) ns_valid_sentenceBERT = negative_sampling.SentenceBERTNegativeSampler( list(train["response"].values) + list(valid["response"].values), args.num_ns_train, args.data_folder + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.data_folder + args.task + "/bert-base-cased_{}".format(args.task)) #pre-trained embedding examples = [] examples_cols = ["context", "relevant_response"] + \ ["cand_random_{}".format(i) for i in range(args.num_ns_train)] + \ ["random_retrieved_relevant", "random_rank"]+ \ ["cand_bm25_{}".format(i) for i in range(args.num_ns_train)] + \ ["bm25_retrieved_relevant", "bm25_rank"]+ \ ["cand_sentenceBERT_{}".format(i) for i in range(args.num_ns_train)] + \ ["sentenceBERT_retrieved_relevant", "sentenceBERT_rank"] logging.info("Retrieving candidates using random, bm25 and sentenceBERT.") for idx, row in enumerate( tqdm(valid.itertuples(index=False), total=len(valid))): context = row[0] relevant_response = row[1] instance = [context, relevant_response] for ns_name, ns in [("random", ns_valid_random), ("bm25", ns_valid_bm25), ("sentenceBERT", ns_valid_sentenceBERT)]: ns_candidates, had_relevant, rank_relevant = ns.sample( context, relevant_response) for ns in ns_candidates: instance.append(ns) instance.append(had_relevant) instance.append(rank_relevant) examples.append(instance) examples_df = pd.DataFrame(examples, columns=examples_cols) examples_df.to_csv(args.output_dir + "/_all_negative_samples_{}.csv".format(args.task), index=False, sep="\t")
def run_experiment(args): args.run_id = str(ex.current_run._id) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) # Load datasets ## Conversation Response Ranking if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: add_turn_separator = ( args.task != "ubuntu_dstc8" ) # Ubuntu data has several utterances from same user in the context. train = preprocess_crr.read_crr_tsv_as_df( args.data_folder + args.task + "/train.tsv", args.sample_data, add_turn_separator) valid = preprocess_crr.read_crr_tsv_as_df( args.data_folder + args.task + "/valid.tsv", args.sample_data, add_turn_separator) special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) ## Similar Question Retrieval and Passage Retrieval elif args.task in ["qqp", "linkso", "trec2020pr"]: if args.sample_data == -1: args.sample_data = None train = pd.read_csv(args.data_folder + args.task + "/train.tsv", sep="\t", nrows=args.sample_data) valid = pd.read_csv(args.data_folder + args.task + "/valid.tsv", sep="\t", nrows=args.sample_data) elif args.task == "scisumm": train, valid = preprocess_scisumm.transform_to_dfs( "../data/Training-Set-2019/Task1/From-Training-Set-2018/") elif args.task == "scisumm_ranked": train, valid, test = preprocess_scisumm_ranked.transform_to_dfs( args.path_to_ranked_file, args.path_to_ranked_test, args.path_to_ranked_dev) # Choose the negative candidate sampler document_col = train.columns[1] ns_train = None ns_val = None if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler( list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini( list(train[document_col].values), args.num_ns_train, args.data_folder + "/" + args.task + "/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler( list(train[document_col].values), args.num_ns_train, args.data_folder + "/" + args.task + "/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + "/" + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + "/" + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) # Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader( train, valid, test, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + "/" + args.task) if args.task == "scisumm_ranked": with_ranked_list = True else: with_ranked_list = False train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( with_ranked_list) # Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained( args.transformer_model) model.resize_token_embeddings(len(dataloader.tokenizer)) # Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model, train_loader, val_loader, test_loader, args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_instances, args.num_epochs, args.lr, args.sacred_ex) # Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() # Predict for test logging.info("Predicting") preds, labels, doc_ids, all_queries, preds_without_acc = trainer.validate() res = results_analyses_tools.evaluate_and_aggregate( preds, labels, [ 'R_10@1', 'R_10@2', 'R_10@5', 'R_2@1', 'accuracy_0.3', 'accuracy_0.3_upto_1', 'precision_0.3', 'recall_0.3', 'f_score_0.3', 'accuracy_0.4', 'accuracy_0.4_upto_1', 'precision_0.4', 'recall_0.4', 'f_score_0.4', 'accuracy_0.5', 'accuracy_0.5_upto_1', 'precision_0.5', 'recall_0.5', 'f_score_0.5' ]) for metric, v in res.items(): logging.info("Test {} : {:4f}".format(metric, v)) # Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) new_preds = list((np.array(preds_without_acc) > 0.3).astype(int)) d = { 'query': all_queries, 'doc_id': doc_ids, 'label': new_preds, 'similiarity': preds_without_acc } df_doc_ids = pd.DataFrame(d) df_doc_ids_ones = df_doc_ids[df_doc_ids['label'] == 1] df_doc_ids_ones = df_doc_ids_ones.groupby('query').agg(list).reset_index() df_doc_ids_non_ones = df_doc_ids.groupby('query').agg(list).reset_index() new_df = [] for i, row in df_doc_ids_non_ones.iterrows(): if all([v == 0 for v in row['label']]): highest_value = [ x for _, x in sorted(zip(row['similiarity'], row['doc_id']), key=lambda pair: pair[0]) ] highest_value_sim = [x for x in sorted(row['similiarity'])] row['label'] = [1] row['doc_id'] = [highest_value[0]] row['similiarity'] = [highest_value_sim[0]] new_df.append(row) result = pd.concat([df_doc_ids_ones, pd.DataFrame(new_df)]) result.to_csv(args.output_dir + "/" + args.run_id + "/doc_ids_dev.csv", index=False, sep='\t') # predict on the test set preds, labels, doc_ids, all_queries, preds_without_acc = trainer.test() new_preds = list((np.array(preds_without_acc) > 0.3).astype(int)) d = { 'query': all_queries, 'doc_id': doc_ids, 'label': new_preds, 'similiarity': preds_without_acc } df_doc_ids = pd.DataFrame(d) df_doc_ids_ones = df_doc_ids[df_doc_ids['label'] == 1] df_doc_ids_ones = df_doc_ids_ones.groupby('query').agg(list).reset_index() df_doc_ids_non_ones = df_doc_ids.groupby('query').agg(list).reset_index() new_df = [] for i, row in df_doc_ids_non_ones.iterrows(): if all([v == 0 for v in row['label']]): highest_value = [ x for _, x in sorted(zip(row['similiarity'], row['doc_id']), key=lambda pair: pair[0]) ] highest_value_sim = [x for x in sorted(row['similiarity'])] row['label'] = [1] row['doc_id'] = [highest_value[0]] row['similiarity'] = [highest_value_sim[0]] new_df.append(row) result = pd.concat([df_doc_ids_ones, pd.DataFrame(new_df)]) result.to_csv(args.output_dir + "/" + args.run_id + "/doc_ids_test.csv", index=False, sep='\t') # Saving model to a file if args.save_model: torch.save(model.state_dict(), args.output_dir + "/" + args.run_id + "/model") # In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") preds, uncertainties, labels, foward_passes_preds = trainer.test_with_dropout( args.num_foward_prediction_passes) res = results_analyses_tools.evaluate_and_aggregate( preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info( "Test (w. dropout and {} foward passes) {} : {:4f}".format( args.num_foward_prediction_passes, metric, v)) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_with_dropout.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + args.run_id + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=["uncertainty_" + str(i) for i in range(max_preds_column)]) uncertainties_df.to_csv(args.output_dir + "/" + args.run_id + "/uncertainties.csv", index=False) return trainer.best_ndcg
def main(): parser = argparse.ArgumentParser() # Input and output configs parser.add_argument("--task", default=None, type=str, required=True, help="the task to run bert ranker for") parser.add_argument("--data_folder", default=None, type=str, required=True, help="the folder containing data") parser.add_argument("--output_dir", default=None, type=str, required=True, help="the folder to output raw negative_samples") parser.add_argument("--anserini_folder", default="", type=str, required=True, help="Path containing the anserini bin <anserini_folder>/target/appassembler/bin/IndexCollection") parser.add_argument("--sample_data", default=-1, type=int, required=False, help="Amount of data to sample for training and eval. If no sampling required use -1.") parser.add_argument("--seed", default=42, type=str, required=False, help="random seed") parser.add_argument("--num_ns", default=1, type=int, required=False, help="Number of negatively sampled documents to use during training") parser.add_argument("--sentence_bert_model", type=str, required=False, default="all-MiniLM-L6-v2", help="Model to calculate sentence embeddings with for sentenceBERT negative sampling.") parser.add_argument("--num_expansion_terms", default=10, type=int, required=False, help="expansion terms for rm3") parser.add_argument("--num_expansion_docs", default=10, type=int, required=False, help="expansion docs for rm3") parser.add_argument("--original_query_weight", default=0.5, type=float, required=False, help="original query weight for rm3") args = parser.parse_args() logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.StreamHandler() ] ) #Load datasets train = pd.read_csv(args.data_folder+args.task+"/train.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) test = pd.read_csv(args.data_folder+args.task+"/test.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) # embed() # ns_test_random = negative_sampling.RandomNegativeSampler(list(train["response"].values)+list(test["response"].values), args.num_ns) # ns_test_bm25 = negative_sampling.BM25NegativeSamplerPyserini(list(train["response"].values)+list(test["response"].values), args.num_ns, # args.data_folder+args.task+"/anserini_test_{}/".format(args.sample_data), args.sample_data, args.anserini_folder) ns_test_bm25_rm3 = negative_sampling.BM25NegativeSamplerPyserini(list(train["response"].values)+list(test["response"].values), args.num_ns, args.data_folder+args.task+"/anserini_test_{}/".format(args.sample_data), args.sample_data, args.anserini_folder, set_rm3=True, num_expansion_docs=args.num_expansion_docs, num_expansion_terms=args.num_expansion_terms, original_query_weight=args.original_query_weight) # ns_test_sentenceBERT = negative_sampling.SentenceBERTNegativeSampler(list(train["response"].values)+list(test["response"].values), args.num_ns, # args.data_folder+args.task+"/test_sentenceBERTembeds", args.sample_data, args.sentence_bert_model, # use_cache_for_embeddings=False) ns_info = [ # (ns_test_random, ["cand_random_{}".format(i) for i in range(args.num_ns)] + ["random_retrieved_relevant", "random_rank"], 'random'), # (ns_test_bm25, ["cand_bm25_{}".format(i) for i in range(args.num_ns)] + ["bm25_retrieved_relevant", "bm25_rank"], 'bm25'), (ns_test_bm25_rm3,["cand_bm25rm3_{}".format(i) for i in range(args.num_ns)] + ["bm25rm3_retrieved_relevant", "bm25rm3_rank"], 'bm25rm3'), # (ns_test_sentenceBERT, ["cand_sentenceBERT_{}".format(i) for i in range(args.num_ns)] + ["sentenceBERT_retrieved_relevant", "sentenceBERT_rank"], 'sentenceBERT') ] examples = [] examples_cols = ["context", "relevant_response"] + \ reduce(lambda x,y:x+y, [t[1] for t in ns_info]) logging.info("Retrieving candidates using different negative sampling strategies for {}.".format(args.task)) recall_df = [] for idx, row in enumerate(tqdm(test.itertuples(index=False), total=len(test))): context = row[0] relevant_response = row[1] instance = [context, relevant_response] for ns, _ , ns_name in ns_info: ns_candidates, scores, had_relevant, rank_relevant, _ = ns.sample(context, [relevant_response]) for ns in ns_candidates: instance.append(ns) instance.append(had_relevant) instance.append(rank_relevant) if had_relevant: r10 = 1 else: r10 = 0 if rank_relevant == 0: r1 = 1 else: r1 =0 recall_df.append([r10, r1, ns_name]) examples.append(instance) recall_df = pd.DataFrame(recall_df, columns = ["R@10", "R@1", "NS"]) # recall_df[recall_df["NS"]=="random"][["R@10", "R@1"]].to_csv(args.output_dir+"/recall_df_random_{}.csv".format(args.task), index=False, sep="\t") # recall_df[recall_df["NS"]=="bm25rm3"][["R@10", "R@1"]].to_csv(args.output_dir+"/recall_df_bm25rm3_{}.csv".format(args.task), index=False, sep="\t") examples_df = pd.DataFrame(examples, columns=examples_cols) print("R@10: {}".format(examples_df[[c for c in examples_df.columns if 'retrieved_relevant' in c]].sum()/examples_df.shape[0])) rank_col = [c for c in examples_df.columns if 'rank' in c][0] print("R@1: {}".format(examples_df[examples_df[rank_col]==0].shape[0]/examples_df.shape[0]))
def run_experiment(args): args.run_id = str(ex.current_run._id) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) #Load datasets train = pd.read_csv( args.data_folder + args.task + "/train_test.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) valid = pd.read_csv( args.data_folder + args.task + "/valid_test.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) #Choose the negative candidate sampler document_col = train.columns[1] ns_train = negative_sampling.BM25NegativeSamplerPyserini( list(train[document_col].values), args.num_ns_train, args.data_folder + args.task + "/anserini_train/", args.sample_data, args.anserini_folder) ns_val_random = negative_sampling.RandomNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) ns_val_bm25 = negative_sampling.BM25NegativeSamplerPyserini( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder) ns_val_bert_sentence = negative_sampling.SentenceBERTNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) #Create the loaders for the datasets, with the respective negative samplers cross_ns_val = {} cross_ns_train = {} for (ns_name, ns_val) in [("random", ns_val_random), ("bm25", ns_val_bm25), ("sentenceBERT", ns_val_bert_sentence)]: dataloader = dataset.QueryDocumentDataLoader( train, valid, valid, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + args.task) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( ) cross_ns_val[ns_name] = val_loader cross_ns_train[ns_name] = train_loader #Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained( args.transformer_model) model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model, cross_ns_train["bm25"], cross_ns_val["bm25"], cross_ns_val["bm25"], args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_batches, args.num_epochs, args.lr, args.sacred_ex) #Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() #Cross-NS predictions for ns_index, ns_name in enumerate(["random", "bm25", "sentenceBERT"]): logging.info("Predicting for NS {}".format(ns_name)) os.makedirs(args.output_dir + "/" + str(int(args.run_id) + ns_index), exist_ok=True) with open( args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/config.json", "w") as f: config_w = {'args': vars(args)} config_w['args']['test_dataset'] = args.task config_w['args']['train_negative_sampler'] = 'bm25' config_w['args']['test_negative_sampler'] = ns_name if 'sacred_ex' in config_w['args']: del config_w['args']['sacred_ex'] json.dump(config_w, f, indent=4) # preds, labels, softmax_logits = trainer.test() trainer.num_validation_batches = -1 # no sample preds, labels, softmax_logits = trainer.predict(cross_ns_val[ns_name]) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_softmax.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save( model.state_dict(), args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") trainer.num_validation_batches = -1 # no sample preds, labels, softmax_logits, foward_passes_preds, uncertainties = \ trainer.predict_with_uncertainty(cross_ns_val[ns_name], args.num_foward_prediction_passes) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_with_dropout.csv", index=False) softmax_df = pd.DataFrame(softmax_logits, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) softmax_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_with_dropout_softmax.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=[ "uncertainty_" + str(i) for i in range(max_preds_column) ]) uncertainties_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/uncertainties.csv", index=False) #Cross-dataset predictions cross_datasets = set(["msdialog", "ubuntu_dstc8", "mantis"]) - set( [args.task]) cross_datasets = sorted(list(cross_datasets)) cross_data_val_dataloader = {} for cross_task in cross_datasets: train_cross = preprocess_crr.read_crr_tsv_as_df( args.data_folder + cross_task + "/train.tsv", args.sample_data, add_turn_separator) valid_cross = preprocess_crr.read_crr_tsv_as_df( args.data_folder + cross_task + "/valid.tsv", args.sample_data, add_turn_separator) ns_train_cross = negative_sampling.BM25NegativeSamplerPyserini( list(train_cross[document_col].values), args.num_ns_train, args.data_folder + cross_task + "/anserini_train/", args.sample_data, args.anserini_folder) ns_val_bm25_cross = negative_sampling.BM25NegativeSamplerPyserini( list(valid_cross[document_col].values) + list(train_cross[document_col].values), args.num_ns_eval, args.data_folder + cross_task + "/anserini_valid/", args.sample_data, args.anserini_folder) dataloader = dataset.QueryDocumentDataLoader( train_cross, valid_cross, valid_cross, tokenizer, ns_train_cross, ns_val_bm25_cross, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + cross_task) _, val_loader, _ = dataloader.get_pytorch_dataloaders() cross_data_val_dataloader[cross_task] = val_loader for task_index, cross_task in enumerate(cross_datasets): logging.info("Predicting for dataset {}".format(cross_task)) os.makedirs(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1), exist_ok=True) with open( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/config.json", "w") as f: config_w = {'args': vars(args)} config_w['args']['test_dataset'] = cross_task config_w['args']['train_negative_sampler'] = 'bm25' config_w['args']['test_negative_sampler'] = 'bm25' if 'sacred_ex' in config_w['args']: del config_w['args']['sacred_ex'] json.dump(config_w, f, indent=4) # preds, labels, softmax_logits = trainer.test() trainer.num_validation_batches = -1 # no sample preds, labels, softmax_logits = trainer.predict( cross_data_val_dataloader[cross_task]) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_softmax.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save( model.state_dict(), args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") preds, labels, softmax_logits, foward_passes_preds, uncertainties = \ trainer.predict_with_uncertainty(cross_data_val_dataloader[cross_task], args.num_foward_prediction_passes) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_with_dropout.csv", index=False) softmax_df = pd.DataFrame(softmax_logits, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) softmax_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_with_dropout_softmax.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=[ "uncertainty_" + str(i) for i in range(max_preds_column) ]) uncertainties_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/uncertainties.csv", index=False) return 0.0
def get_examples(self, filename, ns_name, anserini_folder, sent_bert_model, loss, output_dir, input_pair=True, eval_data=False, denoise_negatives=False, num_ns_for_denoising=100, generative_model = 'facebook/blenderbot-3B', remove_cand_subsets=True, last_utterance_only=False, use_external_corpus=False): """ filename specified which data split to use (train.csv, dev.csv, test.csv). """ filepath = os.path.join(self.dataset_folder, filename) self.data = pd.read_csv(filepath, sep="\t") if denoise_negatives: num_ns = num_ns_for_denoising else: num_ns = 10 candidates = list(self.data["response"].values) if use_external_corpus: external_datasets = [ 'movie-corpus', 'wiki-corpus', 'subreddit-Ubuntu', 'subreddit-microsoft', 'subreddit-apple', 'subreddit-Database', 'subreddit-DIY', 'subreddit-electronics', 'subreddit-ENGLISH', 'subreddit-gis', 'subreddit-Physics', 'subreddit-scifi', 'subreddit-statistics', 'subreddit-travel', 'subreddit-worldbuilding' ] for ds_name in external_datasets: corpus = Corpus(download(ds_name)) corpus.print_summary_stats() for utt in corpus.iter_utterances(): if utt.text != "": candidates.append(utt.text) if ns_name == "random" or eval_data: self.negative_sampler = negative_sampling.RandomNegativeSampler(candidates, num_ns) elif ns_name == "bm25": index_folder = "/anserini_train_-1/" if use_external_corpus: index_folder = index_folder.replace("train", "train_expanded_") self.negative_sampler = negative_sampling.BM25NegativeSamplerPyserini(candidates, num_ns, self.dataset_folder+index_folder, -1, anserini_folder) elif ns_name == "sentence_transformer": self.negative_sampler = negative_sampling.SentenceBERTNegativeSampler(candidates, num_ns, self.dataset_folder+"/train_sentenceBERTembeds", -1, sent_bert_model, large_index=use_external_corpus) elif ns_name == "generative": self.negative_sampler = negative_sampling.GenerativeNegativeSamplerForDialogue(num_ns, generative_model) if loss == 'MarginMSELoss': self.negative_sampler.score_relevant_docs = True if loss == "ContrastiveLoss" and not eval_data: input_pair = False if loss == "OnlineContrastiveLoss" and not eval_data: input_pair = False examples = [] scores_df = [] # Code used to annotate some samples # samples_to_annotate = [] # self.data = self.data.sample(200, random_state=42) # self.negative_sampler.score_relevant_docs = True count_ns_part_of_context = 0 for idx, row in enumerate(tqdm(self.data.itertuples(index=False), total=len(self.data))): context = row[0] if last_utterance_only: if 'msdialog' in self.dataset_folder: context = context.split("[TURN_SEP]")[-1].split("[UTTERANCE_SEP]")[0].strip() else: context = context.split("[TURN_SEP]")[-1].split("[UTTERANCE_SEP]")[-2].strip() relevant_response = row[1] if not input_pair: examples.append(InputExample(guid=filename+str(idx)+"_pos", texts=[context, relevant_response], label=1.0)) if ns_name == "bm25" and not eval_data: ns_candidates, ns_scores , _ , _ , rel_scores = self.negative_sampler.sample(context, [relevant_response], max_query_len = 512, normalize_scores = False, rel_doc_id = str(idx)) else: ns_candidates, ns_scores , _ , _ , rel_scores = self.negative_sampler.sample(context, [relevant_response]) rel_score = rel_scores[0] if denoise_negatives: zipped = zip(ns_candidates[-10:], ns_scores[-10:]) else: zipped = zip(ns_candidates, ns_scores) for ns, score_ns in zipped: if remove_cand_subsets and ns.replace("<<<AGENT>>>: ", "") in context: count_ns_part_of_context+=1 else: if input_pair: examples.append(InputExample(texts=[context, relevant_response, ns], label=float(rel_score-score_ns))) scores_df.append(rel_score-score_ns) # samples_to_annotate.append([self.dataset_folder.split("/")[-1], ns_name, context, relevant_response, ns, rel_score, score_ns]) else: examples.append(InputExample(guid=filename+str(idx)+"_neg", texts=[context, ns], label=0.0)) logging.info("{} {} count of ns which are part of the context: {} out of {}.".format(self.dataset_folder.split("/")[-1], ns_name, count_ns_part_of_context, len(examples))) # print(pd.DataFrame(scores_df).describe()) # pd.DataFrame(samples_to_annotate, columns=['task', 'ns', 'context', 'rel_response', 'negative_sample', 'rel_score', 'score_negative']).\ # to_csv(output_dir+"neg_samples_{}_{}.csv".format(ns_name, self.dataset_folder.split("/")[-1]), index=False) if loss == 'MarginMSELoss': pd.DataFrame(scores_df).to_csv(output_dir+"MarginScores_{}_{}.csv".format(ns_name, self.dataset_folder.split("/")[-1])) return examples