def testScannedPdfPageExtraction(self): pdfSeparate = PdfSeparate("tests/sample-scanned.pdf", self.outdir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir, "2.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir, "3.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir, "4.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir, "5.pdf")))
def testScannedPdfPageExtraction(self): pdfSeparate = PdfSeparate('tests/sample-scanned.pdf', self.outdir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.outdir,"1.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir,"2.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir,"3.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir,"4.pdf"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir,"5.pdf")))
def separatePdfPages(self): self.logger.info('Calling PdfTkseparate: Separating pdf to pages at %s', os.path.join(self.outputDir,'pages')) pdfTkSeparate = PdfTkSeparate(self.filePath, os.path.join(self.outputDir,'pages')) pdfTkProcessStatus = pdfTkSeparate.extractPages() self.logger.info('PdfTkseparate Status: %s', pdfTkProcessStatus) if pdfTkProcessStatus != 0: self.logger.info('Calling Pdfseparate: Separating pdf to pages at %s', os.path.join(self.outputDir,'pages')) pdfSeparate = PdfSeparate(self.filePath, os.path.join(self.outputDir,'pages')) pdfSeparate.extractPages()
def testScannedPdfPageForUnauthorisec(self): pdfSeparate = PdfSeparate("tests/sample-scanned-1.pdf", self.indir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf"))) try: abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1, "english") abbyyPdf.setApplicationCredentials("nouser", "nopassword") abbyyPdf.processPdfPage(1) except HTTPError as e: self.assertEqual(e.code, 401) self.assertEqual(e.reason, "Unauthorized")
def testScannedPdfPageForUnauthorisec(self): pdfSeparate = PdfSeparate('tests/sample-scanned-1.pdf', self.indir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf"))) try: abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1, "english") abbyyPdf.setApplicationCredentials('nouser', 'nopassword') abbyyPdf.processPdfPage(1) except HTTPError as e: self.assertEqual(e.code, 401) self.assertEqual(e.reason, "Unauthorized")
def testScanned44PdfPageForNetwork(self): pdfSeparate = PdfSeparate('tests/sample-scanned-44pages.pdf', self.indir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.indir,"1.pdf"))) try: abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 44, "english") abbyyPdf.setApplicationCredentials(self.configParser.get('abbyy','appid'), self.configParser.get('abbyy','password')) abbyyPdf.extractPages(); self.assertTrue(os.path.isfile(os.path.join(self.outdir,"1.txt"))) self.assertTrue(os.path.isfile(os.path.join(self.outdir,"44.txt"))) except Exception: pass
def testScannedPdfPage(self): pdfSeparate = PdfSeparate("tests/sample-scanned-1.pdf", self.indir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf"))) try: abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1, "english") abbyyPdf.setApplicationCredentials( self.configParser.get("abbyy", "appid"), self.configParser.get("abbyy", "password") ) abbyyPdf.processPdfPage(1) self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.txt"))) except Exception: pass
def testScannedPdfPage(self): pdfSeparate = PdfSeparate('tests/sample-scanned-1.pdf', self.indir) pdfSeparate.extractPages() self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf"))) try: abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1, "english") abbyyPdf.setApplicationCredentials( self.configParser.get('abbyy', 'appid'), self.configParser.get('abbyy', 'password')) abbyyPdf.processPdfPage(1) self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.txt"))) except Exception: pass