def __init__(self): self.dataset_result = LoadRawData("openbook") self.bm25_test_mrrs = self.dataset_result.result_test_bm25["mrr"] self.useqa_test_mrrs = self.dataset_result.result_test_useqa["mrr"] with open( generated_data_path + "/hybrid_classifier_result/openbook_hybrid_result.pickle", "rb") as handle: self.hybrid_models_result = pickle.load(handle) self.hybrid_threshold_test_mrrs = self.hybrid_models_result[ "hybrid_threshold"]["mrr"] self.hybrid_lr_test_mrrs = self.hybrid_models_result["hybrid_lr"][ "mrr"] # load squad test data. with open( generated_data_path + "openbook_useqa/openbook_useqa_retrieval_data.pickle", "rb") as handle: openbook_retrieval_raw = pickle.load(handle) self.test_list = openbook_retrieval_raw["test_list"] self.kb = openbook_retrieval_raw["kb"] # load the array indicating which uses ueural methods. self.router_pred = self.hybrid_models_result["hybrid_lr"][ "router_output"]
def __init__(self): self.dataset_result = LoadRawData("nq") self.bm25_test_mrrs_raw = self.dataset_result.result_test_bm25 self.useqa_test_mrrs_raw = self.dataset_result.result_test_useqa with open( generated_data_path + "/hybrid_classifier_result/nq_hybrid_result.pickle", "rb") as handle: self.hybrid_models_result = pickle.load(handle) all_test_indices = [ single_seed["test_index_in_all_list"] for single_seed in self.hybrid_models_result ] self.bm25_test_mrrs = np.concatenate([ self.bm25_test_mrrs_raw["mrr"][test_split] for test_split in all_test_indices ]) self.useqa_test_mrrs = np.concatenate([ self.useqa_test_mrrs_raw["mrr"][test_split] for test_split in all_test_indices ]) self.hybrid_threshold_test_mrrs = np.concatenate([ single_seed["hybrid_threshold"]["mrr"] for single_seed in self.hybrid_models_result ]) self.hybrid_lr_test_mrrs = np.concatenate([ single_seed["hybrid_lr"]["mrr"] for single_seed in self.hybrid_models_result ]) # load nq test data. with open( generated_data_path + "nq_retrieval_raw/nq_retrieval_data.pickle", "rb") as handle: nq_retrieval_raw = pickle.load(handle) print(nq_retrieval_raw.keys()) self.test_list = nq_retrieval_raw["train_list"] self.sent_list = nq_retrieval_raw["sent_list"] self.doc_list = nq_retrieval_raw["doc_list"] self.resp_list = nq_retrieval_raw["resp_list"] # load the array indicating which uses ueural methods. self.router_pred = np.concatenate([ single_seed["hybrid_lr"]["router_output"] for single_seed in self.hybrid_models_result ])
def debug_experiment(dataset="openbook"): dataset_results = LoadRawData(dataset) unsupervised_sum = UnsupervisedSum(dataset_results.result_dev_bm25, dataset_results.result_dev_useqa, dataset) unsupervised_sum.reranking_with_sum_score( dataset_results.result_test_bm25, dataset_results.result_test_useqa) thresholdClassifier = UnsupervisedThreshold( dataset_results.result_dev_bm25, dataset_results.result_dev_useqa, dataset) thresholdClassifier.reranking_by_threshold( dataset_results.result_test_bm25, dataset_results.result_test_useqa) lrClassifier = LogisticRegressionRouter(dataset_results.result_dev_bm25, dataset_results.result_dev_useqa, dataset, 7) lrClassifier.reranking_by_lr_router(dataset_results.result_test_bm25, dataset_results.result_test_useqa)
def __init__(self): self.dataset_result = LoadRawData("squad") self.bm25_test_mrrs = np.concatenate( [self.dataset_result.result_test_bm25["mrr"] for i in range(5)]) self.useqa_test_mrrs = np.concatenate( [self.dataset_result.result_test_useqa["mrr"] for i in range(5)]) with open( generated_data_path + "/hybrid_classifier_result/squad_hybrid_result.pickle", "rb") as handle: self.hybrid_models_result = pickle.load(handle) self.hybrid_threshold_test_mrrs = np.concatenate([ single_seed["hybrid_threshold"]["mrr"] for single_seed in self.hybrid_models_result ]) self.hybrid_lr_test_mrrs = np.concatenate([ single_seed["hybrid_lr"]["mrr"] for single_seed in self.hybrid_models_result ]) # load squad test data. with open( generated_data_path + "squad_useqa/squad_retrieval_data.pickle", "rb") as handle: squad_retrieval_raw = pickle.load(handle) self.test_list = squad_retrieval_raw["dev_list"] self.sent_list = squad_retrieval_raw["sent_list"] self.doc_list = squad_retrieval_raw["doc_list"] self.resp_list = squad_retrieval_raw["resp_list"] # load the array indicating which uses ueural methods. self.router_pred = np.concatenate([ single_seed["hybrid_lr"]["router_output"] for single_seed in self.hybrid_models_result ])
def __init__(self): self.dataset = "nq" self.dataset_results = LoadRawData(self.dataset)
def __init__(self): self.dataset = "openbook" self.dataset_results = LoadRawData(self.dataset)