def calculate_overall_ranking(self, raw_queries, settings):
        api = API()
        mean_ap_whole = []
        mean_ap_doc = []

        queries = self.__raw_queries_to_queries(raw_queries)
        settings["mode"] = Mode.without_importance_to_sections
        settings_sec = copy.deepcopy(settings)
        settings_sec["mode"] = Mode.importance_to_sections

        for i, query in enumerate(queries):
            progressBar(i, len(queries))
            ranked_papers_whole = api.get_papers({"whole-document": query["search_query"]}, settings)
            ranked_papers_sec = api.get_papers({query["imrad"]: query["search_query"]}, settings_sec)

            relevant_paper = [api.get_paper(reference["paper_id"]) for reference in query["references"]]

            ap_whole = self.average_precision(ranked_papers_whole, relevant_paper)
            ap_doc = self.average_precision(ranked_papers_sec, relevant_paper)

            mean_ap_whole.append(ap_whole)
            mean_ap_doc.append(ap_doc)

        result_whole = sum(mean_ap_whole) / len(mean_ap_whole)
        result_doc = sum(mean_ap_doc) / len(mean_ap_doc)
        print()
        print("{} & {} & {}".format(Mode.without_importance_to_sections.name.replace("_", " "), len(mean_ap_whole),
                                    round(result_whole, 4)))
        print("{} & {} & {}".format(Mode.importance_to_sections.name.replace("_", " "), len(mean_ap_doc),
                                    round(result_doc, 4)))
예제 #2
0
    def compute_ranking_with_settings(self, settings, num_of_papers=0):
        papers = self.api.get_all_paper()
        # num_of_papers = len(papers) if num_of_papers == 0 or num_of_papers > len(papers) else num_of_papers

        # shuffled_papers = papers[:num_of_papers]
        # shuffle(shuffled_papers)

        mean_aps = []

        for i, paper in enumerate(papers):
            progressBar(i, len(papers))
            relevant_papers = [
                self.api.get_paper(ref.get_paper_id())
                for ref in paper.references if ref.paper_id
            ]
            if not relevant_papers:
                continue

            ranked_papers, queries = self.api.get_papers_with_paper(
                paper.filename, settings)
            ap = self.average_precision(ranked_papers, relevant_papers)
            mean_aps.append(ap)

        mean_ap = sum(mean_aps) / len(mean_aps)
        print()
        print("{} & {} & {}".format(settings["mode"].name.replace("_", " "),
                                    len(mean_aps), round(mean_ap, 4)))
예제 #3
0
def remove_duplicates_from_cited_by():
    print("\nRemove Duplicates")
    api = API()
    papers = api.get_all_paper()

    for i, paper in enumerate(papers):
        progressBar(i, len(papers))
        paper.cited_by = list(dict.fromkeys(paper.cited_by))
        api.client.update_paper(paper)
예제 #4
0
def check_references():
    print("\nCheck References")
    api = API()
    papers = api.get_all_paper()

    for i, paper in enumerate(papers):
        progressBar(i, len(papers))

        other_papers = [p for p in papers if p.id != paper.id]
        for reference in paper.references:
            if not reference.get_paper_id():
                continue

            ref_paper = api.get_paper(reference.get_paper_id())
            if ref_paper.cited_by.count(paper.id) == 0:
                print()
                reference.paper_id = []
                api.client.update_paper(paper)
                repair_corrupt_reference(reference, paper, other_papers, api)
예제 #5
0
def check_cited_by():
    print("\nCheck Cited by")
    api = API()
    papers = api.get_all_paper()

    for i, paper in enumerate(papers):
        progressBar(i, len(papers))
        for cited_paper_id in paper.cited_by:
            if not api.contains_paper(cited_paper_id):
                paper.cited_by.remove(cited_paper_id)
                api.client.update_paper(paper)
                continue

            cited_paper = api.get_paper(cited_paper_id)
            cited_paper_refs = [ref.get_paper_id() for ref in cited_paper.references if ref.get_paper_id()]

            if cited_paper_refs.count(paper.id) == 0:
                print()
                paper.cited_by.remove(cited_paper_id)
                api.client.update_paper(paper)
                link_references_to_paper(cited_paper, paper, api)
    def calculate_ranking_sections(self, raw_queries, settings):
        api = API()
        mean_ap_intro, mean_ap_background, mean_ap_methods, mean_ap_result, mean_ap_discussion = [], [], [], [], []

        queries = self.__raw_queries_to_queries(raw_queries)

        for i, query in enumerate(queries):
            progressBar(i, len(queries))
            relevant_paper = [api.get_paper(reference["paper_id"]) for reference in query["references"]]

            ranked_papers_intro = api.get_papers({IMRaDType.INTRODUCTION.name: query["search_query"]}, settings)
            ranked_papers_background = api.get_papers({IMRaDType.BACKGROUND.name: query["search_query"]}, settings)
            ranked_papers_methods = api.get_papers({IMRaDType.METHODS.name: query["search_query"]}, settings)
            ranked_papers_result = api.get_papers({IMRaDType.RESULTS.name: query["search_query"]}, settings)
            ranked_papers_discussion = api.get_papers({IMRaDType.DISCUSSION.name: query["search_query"]}, settings)

            ap_intro = self.average_precision(ranked_papers_intro, relevant_paper)
            ap_background = self.average_precision(ranked_papers_background, relevant_paper)
            ap_methods = self.average_precision(ranked_papers_methods, relevant_paper)
            ap_result = self.average_precision(ranked_papers_result, relevant_paper)
            ap_discussion = self.average_precision(ranked_papers_discussion, relevant_paper)

            mean_ap_intro.append(ap_intro)
            mean_ap_background.append(ap_background)
            mean_ap_methods.append(ap_methods)
            mean_ap_result.append(ap_result)
            mean_ap_discussion.append(ap_discussion)

        print()
        print("{} & {} & {}".format(Mode.only_introduction.name.replace("_", " "),
                                    len(mean_ap_intro), sum(mean_ap_intro) / len(mean_ap_intro)))
        print("{} & {} & {}".format(Mode.only_background.name.replace("_", " "),
                                    len(mean_ap_background), sum(mean_ap_background) / len(mean_ap_background)))
        print("{} & {} & {}".format(Mode.only_methods.name.replace("_", " "),
                                    len(mean_ap_methods), sum(mean_ap_methods) / len(mean_ap_methods)))
        print("{} & {} & {}".format(Mode.only_results.name.replace("_", " "),
                                    len(mean_ap_result), sum(mean_ap_result) / len(mean_ap_result)))
        print("{} & {} & {}".format(Mode.only_discussion.name.replace("_", " "),
                                    len(mean_ap_discussion), sum(mean_ap_discussion) / len(mean_ap_discussion)))