Ejemplo n.º 1
0
 def processToCheckStructured(self):
     """
     dumps the entire pdf to text to get the size of the content
     """
     pdfToText = PdfToText(self.filePath, self.totalPages, self.outputDir)
     pdfToText.dumpPages()
     self.textContentSize += os.path.getsize(pdfToText.dumpedTextFilepath)
     self.logger.info('Text content size: %d bytes', self.textContentSize)
     self.logger.info('Structured? %s', self.isStructured())
Ejemplo n.º 2
0
 def testStructuredPdfAllPagesDump(self):
     pdfToText = PdfToText('tests/sample.pdf', 5, self.outdir)
     pdfToText.dumpPages()
     self.assertTrue(os.path.isfile(os.path.join(self.outdir,"sample.txt")))
Ejemplo n.º 3
0
 def testStructuredPdfAllPagesDump(self):
     pdfToText = PdfToText('tests/sample.pdf', 5, self.outdir)
     pdfToText.dumpPages()
     self.assertTrue(os.path.isfile(os.path.join(self.outdir,
                                                 "sample.txt")))