class TestPeriodicExtractor(unittest.TestCase): def setUp(self): self.doc_dir = join(ROOT_PATH, "testdocs", "periodic", "1_pt-br.pdf") self.preparator = Preparator(self.doc_dir) self.extractor = PeriodicExtractor(self.doc_dir) self.parse = Parser("periodic.xml") self.xml_template_metadata = self.parse.xml_template_metadata() def test_periodic_document_has_author_type_metadata(self): self.extractor._author_metadata() | should_not | be_empty def test_event_document_has_an_abstract_metadata_pattern_found_by_regex(self): doc = self.extractor._clean_onepage_doc matches = re.search(r"resumo:* (.*?) (palavr(a|as)(.|\s)chav(e|es).|unitermos|descritores)", doc) matches.group() | should | start_with("resumo") self.extractor._abstract_metadata | should_not | equal_to("")
def setUp(self): self.doc_dir = join(ROOT_PATH, 'testdocs', 'periodic', '1_pt-br.pdf') self.preparator = Preparator(self.doc_dir) self.extractor = PeriodicExtractor(self.doc_dir) self.parse = Parser('periodic.xml') self.xml_template_metadata = self.parse.xml_template_metadata()
def setUp(self): self.doc_dir = join(ROOT_PATH, "testdocs", "periodic", "1_pt-br.pdf") self.preparator = Preparator(self.doc_dir) self.extractor = PeriodicExtractor(self.doc_dir) self.parse = Parser("periodic.xml") self.xml_template_metadata = self.parse.xml_template_metadata()