def test_non_ocr_pdf(self) : """ Access an valid pdf which hasn't been OCR'd """ file_name = 'non_ocr_file.pdf' text_extractor = TextExtractor( source_file = file_name, source_directory= TextExtractorTest.test_directory, working_directory = '/tmp', testing = True) actual_results = text_extractor.get_file_contents_as_array() self.assertEquals(len(actual_results), 0)
def test_valid_pdf(self) : """ Access an empty file with .pdf suffix This could well break when we test the file type properly """ expected_results = [ 'Test 1\n', 'Test 2\n', '\n' ] file_name = 'test_file1.pdf' text_extractor = TextExtractor( source_file = file_name, source_directory= TextExtractorTest.test_directory, working_directory = '/tmp', testing = True) actual_results = text_extractor.get_file_contents_as_array() self.assertEquals(expected_results, actual_results)