class TestTccExtractor(unittest.TestCase):

	def setUp(self):
		self.doc_dir = join(ROOT_PATH, 'testdocs', 'obtencaograu', 'doctest1.pdf')
		self.preparator = Preparator(self.doc_dir)
		self.extractor = TccExtractor(self.doc_dir)
		self.parse = Parser('tcc.xml')
		self.xml_template_metadata = self.parse.xml_template_metadata()

	def test_metadata_extractor_generates_metadata_dict(self):
		self.extractor.all_metadata() |should_not| be_empty

	def test_tcc_document_has_one_or_more_confirmed_by_corpus_author_type_metadata(self):
		len(self.extractor._author_metadata()) |should| be_greater_than_or_equal_to(1)
		self.extractor._author_metadata() |should_not| contain('')
		self.preparator.remove_converted_document()

	def test_tcc_document_has_title_type_metadata(self):
		self.extractor._title_metadata() |should_not| equal_to('')
		self.preparator.remove_converted_document()

 	def test_tcc_document_has_a_confirmed_by_corpus_institution_metadata(self):
 		self.extractor._institution_metadata() |should_not| equal_to('Instituto Federal de Educação Ciência e Tecnologia ')
 		self.preparator.remove_converted_document()

 	def test_tcc_document_has_a_confirmed_by_corpus_campus_metadata(self):
 		self.extractor._campus_metadata() |should_not| equal_to('')
 		self.preparator.remove_converted_document()

 	def test_tcc_document_has_an_abstract_metadata_pattern_found_by_regex(self):
 		doc = self.extractor._clean_variouspages_doc
 		matches = re.search(r'resumo:* (.*?) palavr(a|as)(.|\s)chav(e|es).', doc)
 		matches.group() |should| start_with('resumo')
 		self.extractor._abstract_metadata |should_not| equal_to('')
	def setUp(self):
		self.doc_dir = join(ROOT_PATH, 'testdocs', 'obtencaograu', 'doctest1.pdf')
		self.preparator = Preparator(self.doc_dir)
		self.extractor = TccExtractor(self.doc_dir)
		self.parse = Parser('tcc.xml')
		self.xml_template_metadata = self.parse.xml_template_metadata()
 def setUp(self):
     self.doc_dir = join(ROOT_PATH, "testdocs", "obtencaograu", "doctest1.pdf")
     self.preparator = Preparator(self.doc_dir)
     self.extractor = TccExtractor(self.doc_dir)
     self.parse = Parser("tcc.xml")
     self.xml_template_metadata = self.parse.xml_template_metadata()