def return_citation_array(self): """Goes through the parsed text and extracts citations returns the results in a list Returns: citation_list (list): A list containing the extracted citations """ f = open(self.path, "r") if not self.__has_file_been_read__(): list_of_citations = [] with open(self.path, 'r') as file: for string in self.__nonblank_lines__(file): match = re.search(self.regex, string) if match.group(1) is None: list_of_citations.append(CitationObj( match.group(4), [], "", self.path)) else: list_of_authors = [match.group(1)] list_of_citations.append(CitationObj(match.group( 2), list_of_authors, match.group(3), self.path)) self.analyzed_files.append(self.path) return list_of_citations else: print("File already analyzed")
def test_correct_getters(self): """[Ensure getters functioning as intended] """ test_citation = CitationObj("", "", "", "") self.assertEqual("", test_citation.get_author()) self.assertEqual("", test_citation.get_journal()) self.assertEqual("", test_citation.get_title())
def test_correct_convert_to_dict(self): """Ensure that the convert to dict functon is returning the fields as expected """ test_citation = CitationObj("", "", "", "") dict = test_citation.convert_to_dict() self.assertEqual(5, len(dict))
class TestCitationAnalyzer(unittest.TestCase): dictOfKeywords = {"Test": ["a "], "Test2": ["i "]} arrayOfCitations = [ CitationObj("a title", "a author", "a journal", "a id"), CitationObj("i title", ["i author", "ii author"], "i journal", "i id") ] def test_that_results_are_expected(self): analyzer = CitationAnalyzer(self.arrayOfCitations, self.dictOfKeywords) self.assertEqual( analyzer.return_dict_of_assigned_citations_classifications() ["Test"].__getitem__(0).get_title(), "a title") self.assertEqual( analyzer.return_dict_of_assigned_citations_classifications() ["Test2"].__getitem__(0).get_title(), "i title")
def return_citation_dictionary(self): if not self.__has_file_been_read__(): list_of_citations = [] with open(self.path, 'r') as file: for string in self.__nonblank_lines__(file): match = re.search(self.regex, string) if match.group(1) is None: list_of_citations.append(CitationObj( match.group(4), [], "", self.path)) self.analyzed_files.append(self.path) else: list_of_authors = [match.group(1)] list_of_citations.append(CitationObj(match.group( 2), list_of_authors, match.group(3), self.path)) self.analyzed_files.append(self.path) citation_dict = {"Citations": list_of_citations} self.analyzed_files.append(self.path) return citation_dict else: print("File already analyzed")
def load_from_file_to_array(file_path): """Loads saved Json citations to array Args: file_path (String): The path to the json file to be loaded Returns: list_of_citations (list): A list of citations """ list_of_citations = [] with open(file_path, 'r') as file: for string in file: cit_dict = json.loads(string) for id_list in cit_dict.values(): for citation in id_list: title = citation.pop("title") journal = citation.pop("journal") author = citation.pop("author") id = citation.pop("id") classification = citation.pop("classification") temp_citation = CitationObj(title, author, journal, id) temp_citation.set_classification(classification) list_of_citations.append(temp_citation) return list_of_citations
def return_citation_array(self): """Goes through the parsed text and extracts citations returns the results in a list Returns: citation_list (list): A list containing the extracted citations """ if not self.__has_file_been_read__(): citation_list = [] with open(self.path, 'r') as file: for string in file: json_string = json.loads(string) authors = self.__get_authors_names__(json_string) journal = self.__get_journals_name__(json_string) citation_list.append(CitationObj( json_string.get('title'), authors, journal, self.path)) self.analysed_files.append(self.path) return citation_list else: print("File already analyzed")
def return_citation_array(self): """Returns a list of all matches to the regex if present Returns: list_of_citation_objects (list): A list of all matches to the regex """ if not self.__has_file_been_read__(): list_of_citation_objects = [] docx_text = self.get_document_text() citation_regex = self.__get_citation_regex__() citation_data = re.findall(citation_regex, docx_text) for citation in citation_data: list_of_citation_objects.append(CitationObj( citation[1], citation[0], citation[2], self.file_path)) self.analysed_files.append(self.file_path) return list_of_citation_objects else: print("File already analysed")
def return_citation_array(self): """Goes through the parsed text and extracts citations returns the results in a list Returns: citation_list (list): A list containing the extracted citations """ if not self.__has_file_been_read__(): list_of_citation_objects = [] pdf_text = self.__get_pdf_text__() citation_regex = self.__get_citation_regex__() citation_data = re.findall(citation_regex, pdf_text) for citation in citation_data: list_of_citation_objects.append( CitationObj(citation[1], citation[0], citation[2], self.file_path)) self.analyzed_files.append(self.file_path) return list_of_citation_objects else: print("File already analyzed")
def return_citation_array(self): """Goes through the parsed text and extracts the citations based on the regex. Returns: citation_list (list): The results of the extracted citations """ if not self.__has_file_been_read__(): with open(self.path) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) bib_dict = bib_database.get_entry_dict() citation_list = [] for key, item in bib_dict.items(): authors = self.__split_authors__(item.get('author')) citation_list.append(CitationObj( item.get('title'), authors, item.get('journal'), self.path)) self.analyzedFiles.append(self.path) return citation_list else: print("File already analyzed")
def return_complete_dict(self): """Returns a dictionary of all terms defined in the config file Returns: citation_dict (dictionary): A dictionary of all terms defined in the config file """ list_of_citation_objects = [] pdf_text = self.__get_pdf_text__() citation_regex = self.__get_citation_regex__() url_regex = self.__get_url_regex__() doi_regex = self.__get_doi_regex__() citation_data = re.findall(citation_regex, pdf_text) list_of_urls = re.findall(url_regex, pdf_text) list_of_dois = re.findall(doi_regex, pdf_text) for citation in citation_data: list_of_citation_objects.append( CitationObj(citation[1], citation[0], citation[2])) citation_dict = { "IEEE": list_of_citation_objects, "URLS": list_of_urls, "DOIS": list_of_dois } return citation_dict
def test_correct_default_assignment(self): """Ensures that assignment starts as unsigned """ test_citation = CitationObj("", "", "", "") self.assertEqual("Unassigned", test_citation.get_classification())
def test_changing_assignment(self): """Ensures change assignment works """ test_citation = CitationObj("", "", "", "") test_citation.set_classification("Trial") self.assertEqual("Trial", test_citation.get_classification())