コード例 #1
0
 def processToCheckStructured(self):
     """
     dumps the entire pdf to text to get the size of the content
     """
     pdfToText = PdfToText(self.filePath, self.totalPages, self.outputDir)
     pdfToText.dumpPages()
     self.textContentSize += os.path.getsize(pdfToText.dumpedTextFilepath)
     self.logger.info('Text content size: %d bytes', self.textContentSize)
     self.logger.info('Structured? %s', self.isStructured())
コード例 #2
0
ファイル: PdfToTextTest.py プロジェクト: anjesh/pdf-processor
 def testStructuredPdfAllPagesDump(self):
     pdfToText = PdfToText('tests/sample.pdf', 5, self.outdir)
     pdfToText.dumpPages()
     self.assertTrue(os.path.isfile(os.path.join(self.outdir,"sample.txt")))
コード例 #3
0
 def testStructuredPdfAllPagesDump(self):
     pdfToText = PdfToText('tests/sample.pdf', 5, self.outdir)
     pdfToText.dumpPages()
     self.assertTrue(os.path.isfile(os.path.join(self.outdir,
                                                 "sample.txt")))