def testPdfParserPhysical(self):
     """Get Pdf physical structure."""
     url = "file://%s" % pdf_file_name
     pdf_parser = PdfParser(pdf_file_name, url, pdf_file_name)
     phys = pdf_parser.get_physical_structure()
     self.assertEqual(phys[0]['label'], pdf_file_name, "Physical Structure "\
                     "missmatch: %s != %s" % (phys[0]['label'], pdf_file_name))
Example #2
0
 def testPdfParserLogical(self):
     """Get Pdf logical structure."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name)
     logic = pdf_parser.get_logical_structure()
     first_section = logic[0]["label"]
     self.assertEqual(
         first_section, "Introduction", "TOC is not well " "detected: %s != %s" % (first_section, "Introduction")
     )
 def testPdfParserLogical(self):
     """Get Pdf logical structure."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name,
                            pdf_file_name)
     logic = pdf_parser.get_logical_structure()
     first_section = logic[0]['label']
     self.assertEqual (first_section, 'Introduction', "TOC is not well "\
             "detected: %s != %s" %(first_section, 'Introduction'))
 def testPdfParserMeta(self):
     """Get Pdf Metadata."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name,
                            pdf_file_name)
     meta = pdf_parser.get_metadata()
     title = meta['title']
     self.assertEqual(title, u'Multivio: Project description', "Metadata has "\
                     "not been correctly detected %s != %s" %
                     (title, u'Multivio: Project description'))
Example #5
0
 def testPdfParserMetaFileSize(self):
     """Get Pdf Metadata and test file size."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name)
     meta = pdf_parser.get_metadata()
     file_size = meta["fileSize"]
     ref_size = os.path.getsize(pdf_file_name)
     self.assertEqual(
         file_size, ref_size, "File size has " "not been correctly detected %s != %s" % (file_size, ref_size)
     )
 def testPdfParserMetaFileSize(self):
     """Get Pdf Metadata and test file size."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name,
                            pdf_file_name)
     meta = pdf_parser.get_metadata()
     file_size = meta['fileSize']
     ref_size = os.path.getsize(pdf_file_name)
     self.assertEqual(file_size, ref_size, "File size has "\
                     "not been correctly detected %s != %s" %
                     (file_size, ref_size))
Example #7
0
 def testPdfParserPhysical(self):
     """Get Pdf physical structure."""
     url = "file://%s" % pdf_file_name
     pdf_parser = PdfParser(pdf_file_name, url, pdf_file_name)
     phys = pdf_parser.get_physical_structure()
     self.assertEqual(
         phys[0]["label"],
         pdf_file_name,
         "Physical Structure " "missmatch: %s != %s" % (phys[0]["label"], pdf_file_name),
     )
Example #8
0
 def testPdfParserMeta(self):
     """Get Pdf Metadata."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name, pdf_file_name)
     meta = pdf_parser.get_metadata()
     title = meta["title"]
     self.assertEqual(
         title,
         u"Multivio: Project description",
         "Metadata has " "not been correctly detected %s != %s" % (title, u"Multivio: Project description"),
     )
    def testPdfParserLogicalSegfault(self):
        """Get Pdf logical structure for a malformed pdf:
            http://www.tendancesit.com/pdf/19.pdf."""
        file_name = "examples/toc_segfault.pdf"
        try:
            file(file_name)
        except IOError:
            raise Exception(
                "Please go the examples directory and run: get_examples.sh")

        pdf_parser = PdfParser(file_name, "file://%s" % file_name, file_name)
        logic = pdf_parser.get_logical_structure()
        first_section = logic[0]['label']
        first_section_ref = 'Tendances IT 19 WEB.pdf'
        self.assertEqual (first_section, first_section_ref, "TOC is not well "\
                "detected: %s != %s" %(first_section, first_section_ref))
Example #10
0
    def testPdfParserLogicalSegfault(self):
        """Get Pdf logical structure for a malformed pdf:
            http://www.tendancesit.com/pdf/19.pdf."""
        file_name = "examples/toc_segfault.pdf"
        try:
            file(file_name)
        except IOError:
            raise Exception("Please go the examples directory and run: get_examples.sh")

        pdf_parser = PdfParser(file_name, "file://%s" % file_name, file_name)
        logic = pdf_parser.get_logical_structure()
        first_section = logic[0]["label"]
        first_section_ref = "Tendances IT 19 WEB.pdf"
        self.assertEqual(
            first_section,
            first_section_ref,
            "TOC is not well " "detected: %s != %s" % (first_section, first_section_ref),
        )
 def testPdfParser(self):
     """Check PdfParser instance."""
     pdf_parser = PdfParser(pdf_file_name, "file://%s" % pdf_file_name,
                            pdf_file_name)
     self.assert_(pdf_parser, "Can not create simple Parser Object")