def calculate_overall_ranking(self, raw_queries, settings): api = API() mean_ap_whole = [] mean_ap_doc = [] queries = self.__raw_queries_to_queries(raw_queries) settings["mode"] = Mode.without_importance_to_sections settings_sec = copy.deepcopy(settings) settings_sec["mode"] = Mode.importance_to_sections for i, query in enumerate(queries): progressBar(i, len(queries)) ranked_papers_whole = api.get_papers({"whole-document": query["search_query"]}, settings) ranked_papers_sec = api.get_papers({query["imrad"]: query["search_query"]}, settings_sec) relevant_paper = [api.get_paper(reference["paper_id"]) for reference in query["references"]] ap_whole = self.average_precision(ranked_papers_whole, relevant_paper) ap_doc = self.average_precision(ranked_papers_sec, relevant_paper) mean_ap_whole.append(ap_whole) mean_ap_doc.append(ap_doc) result_whole = sum(mean_ap_whole) / len(mean_ap_whole) result_doc = sum(mean_ap_doc) / len(mean_ap_doc) print() print("{} & {} & {}".format(Mode.without_importance_to_sections.name.replace("_", " "), len(mean_ap_whole), round(result_whole, 4))) print("{} & {} & {}".format(Mode.importance_to_sections.name.replace("_", " "), len(mean_ap_doc), round(result_doc, 4)))
def calculate_ranking_sections(self, raw_queries, settings): api = API() mean_ap_intro, mean_ap_background, mean_ap_methods, mean_ap_result, mean_ap_discussion = [], [], [], [], [] queries = self.__raw_queries_to_queries(raw_queries) for i, query in enumerate(queries): progressBar(i, len(queries)) relevant_paper = [api.get_paper(reference["paper_id"]) for reference in query["references"]] ranked_papers_intro = api.get_papers({IMRaDType.INTRODUCTION.name: query["search_query"]}, settings) ranked_papers_background = api.get_papers({IMRaDType.BACKGROUND.name: query["search_query"]}, settings) ranked_papers_methods = api.get_papers({IMRaDType.METHODS.name: query["search_query"]}, settings) ranked_papers_result = api.get_papers({IMRaDType.RESULTS.name: query["search_query"]}, settings) ranked_papers_discussion = api.get_papers({IMRaDType.DISCUSSION.name: query["search_query"]}, settings) ap_intro = self.average_precision(ranked_papers_intro, relevant_paper) ap_background = self.average_precision(ranked_papers_background, relevant_paper) ap_methods = self.average_precision(ranked_papers_methods, relevant_paper) ap_result = self.average_precision(ranked_papers_result, relevant_paper) ap_discussion = self.average_precision(ranked_papers_discussion, relevant_paper) mean_ap_intro.append(ap_intro) mean_ap_background.append(ap_background) mean_ap_methods.append(ap_methods) mean_ap_result.append(ap_result) mean_ap_discussion.append(ap_discussion) print() print("{} & {} & {}".format(Mode.only_introduction.name.replace("_", " "), len(mean_ap_intro), sum(mean_ap_intro) / len(mean_ap_intro))) print("{} & {} & {}".format(Mode.only_background.name.replace("_", " "), len(mean_ap_background), sum(mean_ap_background) / len(mean_ap_background))) print("{} & {} & {}".format(Mode.only_methods.name.replace("_", " "), len(mean_ap_methods), sum(mean_ap_methods) / len(mean_ap_methods))) print("{} & {} & {}".format(Mode.only_results.name.replace("_", " "), len(mean_ap_result), sum(mean_ap_result) / len(mean_ap_result))) print("{} & {} & {}".format(Mode.only_discussion.name.replace("_", " "), len(mean_ap_discussion), sum(mean_ap_discussion) / len(mean_ap_discussion)))
def test_simple_ranking(self): queries = { IMRaDType.INTRODUCTION.name: "aaa", IMRaDType.BACKGROUND: "", IMRaDType.METHODS.name: "aaa bbb ccc ddd eee fff", IMRaDType.RESULTS.name: "", IMRaDType.DISCUSSION.name: "", "whole-document": "ggg aaa ccc" } settings = { **{ "importance_sections": False }, **TF.get_default_config() } api = API() ret = api.get_papers(queries, settings) self.assertGreater(len(ret), 0)