def test_jbig2_image_export(self):
        """Extract images of pdf containing jbig2 images

        Feature test for: https://github.com/pdfminer/pdfminer.six/pull/46
        """
        image_files = self.extract_images(
            absolute_sample_path('../samples/contrib/pdf-with-jbig2.pdf'))
        assert image_files[0].endswith('.jb2')
def run(filename, options=None):
    absolute_path = absolute_sample_path(filename)
    with NamedTemporaryFile() as output_file:
        if options:
            s = 'dumppdf -o %s %s %s' % (output_file.name, options,
                                         absolute_path)
        else:
            s = 'dumppdf -o %s %s' % (output_file.name, absolute_path)
        dumppdf.main(s.split(' ')[1:])
    def test_nonfree_dmca(self):
        """Extract images of pdf containing bmp images

        Regression test for:
        https://github.com/pdfminer/pdfminer.six/issues/131
        """
        image_files = self.extract_images(
            absolute_sample_path('../samples/nonfree/dmca.pdf'))
        assert image_files[0].endswith('bmp')
def run(sample_path, options=None):
    absolute_path = absolute_sample_path(sample_path)
    with NamedTemporaryFile() as output_file:
        if options:
            s = 'pdf2txt -o{} {} {}' \
                .format(output_file.name, options, absolute_path)
        else:
            s = 'pdf2txt -o{} {}'.format(output_file.name, absolute_path)
        pdf2txt.main(s.split(' ')[1:])
def test_font_size():
    path = absolute_sample_path('font-size-test.pdf')
    for page in extract_pages(path):
        for text_box in page:
            if isinstance(text_box, LTTextBox):
                for line in text_box:
                    possible_number = line.get_text().strip()
                    if possible_number.isdigit():
                        expected_size = int(possible_number)

                        for char in line:
                            if isinstance(char, LTChar):
                                actual_size = int(round(char.size))
                                print(char, actual_size, expected_size)
                                assert expected_size == actual_size
                    else:
                        print(repr(line.get_text()))
 def _get_test_file_path(self):
     test_file = "simple4.pdf"
     return absolute_sample_path(test_file)
def run_with_string(sample_path, laparams=None):
    if laparams is None:
        laparams = {}
    absolute_path = absolute_sample_path(sample_path)
    s = extract_text(absolute_path, laparams=LAParams(**laparams))
    return s
def run_with_file(sample_path):
    absolute_path = absolute_sample_path(sample_path)
    with open(absolute_path, "rb") as in_file:
        s = extract_text(in_file)
    return s
def run(sample_path):
    absolute_path = absolute_sample_path(sample_path)
    s = extract_text(absolute_path)
    return s
 def test_nonfree_175(self):
     """Extract images of pdf containing jpg images"""
     self.extract_images(absolute_sample_path('../samples/nonfree/175.pdf'))
Exemple #11
0
 def test_string_input(self):
     filename = absolute_sample_path("simple1.pdf")
     opened = open_filename(filename)
     assert_equal(opened.closing, True)
Exemple #12
0
 def test_file_input(self):
     filename = absolute_sample_path("simple1.pdf")
     with open(filename, "rb") as in_file:
         opened = open_filename(in_file)
         assert_equal(opened.file_handler, in_file)
Exemple #13
0
 def test_pathlib_input(self):
     filename = pathlib.Path(absolute_sample_path("simple1.pdf"))
     opened = open_filename(filename)
     assert_equal(opened.closing, True)