def testScannedPdfPageExtraction(self):
        pdfSeparate = PdfSeparate('tests/sample-scanned.pdf', self.outdir)
        pdfSeparate.extractPages()
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"1.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"2.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"3.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"4.pdf")))
        self.assertTrue(os.path.isfile(os.path.join(self.outdir,"5.pdf")))
Beispiel #2
0
 def separatePdfPages(self):
     self.logger.info('Calling PdfTkseparate: Separating pdf to pages at %s', os.path.join(self.outputDir,'pages'))
     pdfTkSeparate = PdfTkSeparate(self.filePath, os.path.join(self.outputDir,'pages'))
     pdfTkProcessStatus = pdfTkSeparate.extractPages()
     self.logger.info('PdfTkseparate Status: %s', pdfTkProcessStatus)
     if pdfTkProcessStatus != 0:
         self.logger.info('Calling Pdfseparate: Separating pdf to pages at %s', os.path.join(self.outputDir,'pages'))
         pdfSeparate = PdfSeparate(self.filePath, os.path.join(self.outputDir,'pages'))
         pdfSeparate.extractPages()
 def testScannedPdfPageForUnauthorisec(self):
     pdfSeparate = PdfSeparate('tests/sample-scanned-1.pdf', self.indir)
     pdfSeparate.extractPages()
     self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf")))
     try:
         abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1,
                                          "english")
         abbyyPdf.setApplicationCredentials('nouser', 'nopassword')
         abbyyPdf.processPdfPage(1)
     except HTTPError as e:
         self.assertEqual(e.code, 401)
         self.assertEqual(e.reason, "Unauthorized")
    def testScannedPdfPage(self):
        pdfSeparate = PdfSeparate('tests/sample-scanned-1.pdf', self.indir)
        pdfSeparate.extractPages()
        self.assertTrue(os.path.isfile(os.path.join(self.indir, "1.pdf")))

        try:
            abbyyPdf = AbbyyPdfTextExtractor(self.indir, self.outdir, 1,
                                             "english")
            abbyyPdf.setApplicationCredentials(
                self.configParser.get('abbyy', 'appid'),
                self.configParser.get('abbyy', 'password'))
            abbyyPdf.processPdfPage(1)
            self.assertTrue(os.path.isfile(os.path.join(self.outdir, "1.txt")))
        except Exception:
            pass