def test_object_datastream(self): # extract text from a pdf datastream in fedora pdfobj = self.repo.get_object(self.pdfobj.pid, type=TestPdfObject) text = pdf_to_text(pdfobj.pdf.content) self.assertEqual(self.pdf_text, text)
def test_file(self): # extract text from a pdf from a file on the local filesystem text = pdf_to_text(open(self.pdf_filepath, 'rb')) self.assertEqual(self.pdf_text, text)