Beispiel #1
0
    def return_citation_array(self):
        """Goes through the parsed text and extracts citations
            returns the results in a list

        Returns:
            citation_list (list): A list containing the extracted citations
        """
        f = open(self.path, "r")
        if not self.__has_file_been_read__():
            list_of_citations = []
            with open(self.path, 'r') as file:
                for string in self.__nonblank_lines__(file):

                    match = re.search(self.regex, string)
                    if match.group(1) is None:

                        list_of_citations.append(CitationObj(
                            match.group(4), [], "", self.path))
                    else:

                        list_of_authors = [match.group(1)]
                        list_of_citations.append(CitationObj(match.group(
                            2), list_of_authors, match.group(3), self.path))
            self.analyzed_files.append(self.path)
            return list_of_citations
        else:
            print("File already analyzed")
Beispiel #2
0
 def test_correct_getters(self):
     """[Ensure getters functioning as intended]
     """
     test_citation = CitationObj("", "", "", "")
     self.assertEqual("", test_citation.get_author())
     self.assertEqual("", test_citation.get_journal())
     self.assertEqual("", test_citation.get_title())
Beispiel #3
0
 def test_correct_convert_to_dict(self):
     """Ensure that the convert to dict functon is returning the fields as
     expected
     """
     test_citation = CitationObj("", "", "", "")
     dict = test_citation.convert_to_dict()
     self.assertEqual(5, len(dict))
class TestCitationAnalyzer(unittest.TestCase):
    dictOfKeywords = {"Test": ["a "], "Test2": ["i "]}
    arrayOfCitations = [
        CitationObj("a title", "a author", "a journal", "a id"),
        CitationObj("i title", ["i author", "ii author"], "i journal", "i id")
    ]

    def test_that_results_are_expected(self):
        analyzer = CitationAnalyzer(self.arrayOfCitations, self.dictOfKeywords)
        self.assertEqual(
            analyzer.return_dict_of_assigned_citations_classifications()
            ["Test"].__getitem__(0).get_title(), "a title")
        self.assertEqual(
            analyzer.return_dict_of_assigned_citations_classifications()
            ["Test2"].__getitem__(0).get_title(), "i title")
Beispiel #5
0
 def return_citation_dictionary(self):
     if not self.__has_file_been_read__():
         list_of_citations = []
         with open(self.path, 'r') as file:
             for string in self.__nonblank_lines__(file):
                 match = re.search(self.regex, string)
                 if match.group(1) is None:
                     list_of_citations.append(CitationObj(
                         match.group(4), [], "", self.path))
                     self.analyzed_files.append(self.path)
                 else:
                     list_of_authors = [match.group(1)]
                     list_of_citations.append(CitationObj(match.group(
                         2), list_of_authors, match.group(3), self.path))
                     self.analyzed_files.append(self.path)
         citation_dict = {"Citations": list_of_citations}
         self.analyzed_files.append(self.path)
         return citation_dict
     else:
         print("File already analyzed")
Beispiel #6
0
    def load_from_file_to_array(file_path):
        """Loads saved Json citations to array

        Args:
            file_path (String): The path to the json file to be loaded

        Returns:
            list_of_citations (list): A list of citations
        """
        list_of_citations = []
        with open(file_path, 'r') as file:
            for string in file:
                cit_dict = json.loads(string)
                for id_list in cit_dict.values():
                    for citation in id_list:
                        title = citation.pop("title")
                        journal = citation.pop("journal")
                        author = citation.pop("author")
                        id = citation.pop("id")
                        classification = citation.pop("classification")
                        temp_citation = CitationObj(title, author, journal, id)
                        temp_citation.set_classification(classification)
                        list_of_citations.append(temp_citation)
        return list_of_citations
Beispiel #7
0
    def return_citation_array(self):
        """Goes through the parsed text and extracts citations
            returns the results in a list

        Returns:
            citation_list (list): A list containing the extracted citations
        """
        if not self.__has_file_been_read__():
            citation_list = []
            with open(self.path, 'r') as file:
                for string in file:
                    json_string = json.loads(string)
                    authors = self.__get_authors_names__(json_string)
                    journal = self.__get_journals_name__(json_string)
                    citation_list.append(CitationObj(
                        json_string.get('title'), authors, journal, self.path))
            self.analysed_files.append(self.path)
            return citation_list
        else:
            print("File already analyzed")
Beispiel #8
0
    def return_citation_array(self):
        """Returns a list of all matches to the regex if present

        Returns:
            list_of_citation_objects (list): A list of all matches to the
            regex
        """
        if not self.__has_file_been_read__():
            list_of_citation_objects = []
            docx_text = self.get_document_text()
            citation_regex = self.__get_citation_regex__()
            citation_data = re.findall(citation_regex, docx_text)
            for citation in citation_data:

                list_of_citation_objects.append(CitationObj(
                    citation[1], citation[0], citation[2], self.file_path))
            self.analysed_files.append(self.file_path)
            return list_of_citation_objects
        else:
            print("File already analysed")
    def return_citation_array(self):
        """Goes through the parsed text and extracts citations
            returns the results in a list

        Returns:
            citation_list (list): A list containing the extracted citations
        """
        if not self.__has_file_been_read__():
            list_of_citation_objects = []
            pdf_text = self.__get_pdf_text__()
            citation_regex = self.__get_citation_regex__()
            citation_data = re.findall(citation_regex, pdf_text)
            for citation in citation_data:
                list_of_citation_objects.append(
                    CitationObj(citation[1], citation[0], citation[2],
                                self.file_path))
            self.analyzed_files.append(self.file_path)
            return list_of_citation_objects
        else:
            print("File already analyzed")
    def return_citation_array(self):
        """Goes through the parsed text and extracts the citations based
            on the regex.

        Returns:
            citation_list (list): The results of the extracted citations
        """
        if not self.__has_file_been_read__():
            with open(self.path) as bibtex_file:
                bib_database = bibtexparser.load(bibtex_file)

            bib_dict = bib_database.get_entry_dict()
            citation_list = []
            for key, item in bib_dict.items():
                authors = self.__split_authors__(item.get('author'))
                citation_list.append(CitationObj(
                    item.get('title'), authors, item.get('journal'),
                    self.path))
            self.analyzedFiles.append(self.path)
            return citation_list
        else:
            print("File already analyzed")
    def return_complete_dict(self):
        """Returns a dictionary of all terms defined in the config file

        Returns:
            citation_dict (dictionary): A dictionary of all terms defined in
            the config file
        """
        list_of_citation_objects = []
        pdf_text = self.__get_pdf_text__()
        citation_regex = self.__get_citation_regex__()
        url_regex = self.__get_url_regex__()
        doi_regex = self.__get_doi_regex__()
        citation_data = re.findall(citation_regex, pdf_text)
        list_of_urls = re.findall(url_regex, pdf_text)
        list_of_dois = re.findall(doi_regex, pdf_text)
        for citation in citation_data:
            list_of_citation_objects.append(
                CitationObj(citation[1], citation[0], citation[2]))
        citation_dict = {
            "IEEE": list_of_citation_objects,
            "URLS": list_of_urls,
            "DOIS": list_of_dois
        }
        return citation_dict
Beispiel #12
0
 def test_correct_default_assignment(self):
     """Ensures that assignment starts as unsigned
     """
     test_citation = CitationObj("", "", "", "")
     self.assertEqual("Unassigned", test_citation.get_classification())
Beispiel #13
0
 def test_changing_assignment(self):
     """Ensures change assignment works
     """
     test_citation = CitationObj("", "", "", "")
     test_citation.set_classification("Trial")
     self.assertEqual("Trial", test_citation.get_classification())