class TestTccExtractor(unittest.TestCase): def setUp(self): self.doc_dir = join(ROOT_PATH, 'testdocs', 'obtencaograu', 'doctest1.pdf') self.preparator = Preparator(self.doc_dir) self.extractor = TccExtractor(self.doc_dir) self.parse = Parser('tcc.xml') self.xml_template_metadata = self.parse.xml_template_metadata() def test_metadata_extractor_generates_metadata_dict(self): self.extractor.all_metadata() |should_not| be_empty def test_tcc_document_has_one_or_more_confirmed_by_corpus_author_type_metadata(self): len(self.extractor._author_metadata()) |should| be_greater_than_or_equal_to(1) self.extractor._author_metadata() |should_not| contain('') self.preparator.remove_converted_document() def test_tcc_document_has_title_type_metadata(self): self.extractor._title_metadata() |should_not| equal_to('') self.preparator.remove_converted_document() def test_tcc_document_has_a_confirmed_by_corpus_institution_metadata(self): self.extractor._institution_metadata() |should_not| equal_to('Instituto Federal de Educação Ciência e Tecnologia ') self.preparator.remove_converted_document() def test_tcc_document_has_a_confirmed_by_corpus_campus_metadata(self): self.extractor._campus_metadata() |should_not| equal_to('') self.preparator.remove_converted_document() def test_tcc_document_has_an_abstract_metadata_pattern_found_by_regex(self): doc = self.extractor._clean_variouspages_doc matches = re.search(r'resumo:* (.*?) palavr(a|as)(.|\s)chav(e|es).', doc) matches.group() |should| start_with('resumo') self.extractor._abstract_metadata |should_not| equal_to('')
def setUp(self): self.doc_dir = join(ROOT_PATH, 'testdocs', 'obtencaograu', 'doctest1.pdf') self.preparator = Preparator(self.doc_dir) self.extractor = TccExtractor(self.doc_dir) self.parse = Parser('tcc.xml') self.xml_template_metadata = self.parse.xml_template_metadata()
def setUp(self): self.doc_dir = join(ROOT_PATH, "testdocs", "obtencaograu", "doctest1.pdf") self.preparator = Preparator(self.doc_dir) self.extractor = TccExtractor(self.doc_dir) self.parse = Parser("tcc.xml") self.xml_template_metadata = self.parse.xml_template_metadata()