def processToCheckStructured(self): """ dumps the entire pdf to text to get the size of the content """ pdfToText = PdfToText(self.filePath, self.totalPages, self.outputDir) pdfToText.dumpPages() self.textContentSize += os.path.getsize(pdfToText.dumpedTextFilepath) self.logger.info('Text content size: %d bytes', self.textContentSize) self.logger.info('Structured? %s', self.isStructured())
def testStructuredPdfAllPagesDump(self): pdfToText = PdfToText('tests/sample.pdf', 5, self.outdir) pdfToText.dumpPages() self.assertTrue(os.path.isfile(os.path.join(self.outdir,"sample.txt")))
def testStructuredPdfAllPagesDump(self): pdfToText = PdfToText('tests/sample.pdf', 5, self.outdir) pdfToText.dumpPages() self.assertTrue(os.path.isfile(os.path.join(self.outdir, "sample.txt")))