def test_jbig2_image_export(self): """Extract images of pdf containing jbig2 images Feature test for: https://github.com/pdfminer/pdfminer.six/pull/46 """ image_files = self.extract_images( absolute_sample_path('../samples/contrib/pdf-with-jbig2.pdf')) assert image_files[0].endswith('.jb2')
def run(filename, options=None): absolute_path = absolute_sample_path(filename) with NamedTemporaryFile() as output_file: if options: s = 'dumppdf -o %s %s %s' % (output_file.name, options, absolute_path) else: s = 'dumppdf -o %s %s' % (output_file.name, absolute_path) dumppdf.main(s.split(' ')[1:])
def test_nonfree_dmca(self): """Extract images of pdf containing bmp images Regression test for: https://github.com/pdfminer/pdfminer.six/issues/131 """ image_files = self.extract_images( absolute_sample_path('../samples/nonfree/dmca.pdf')) assert image_files[0].endswith('bmp')
def run(sample_path, options=None): absolute_path = absolute_sample_path(sample_path) with NamedTemporaryFile() as output_file: if options: s = 'pdf2txt -o{} {} {}' \ .format(output_file.name, options, absolute_path) else: s = 'pdf2txt -o{} {}'.format(output_file.name, absolute_path) pdf2txt.main(s.split(' ')[1:])
def test_font_size(): path = absolute_sample_path('font-size-test.pdf') for page in extract_pages(path): for text_box in page: if isinstance(text_box, LTTextBox): for line in text_box: possible_number = line.get_text().strip() if possible_number.isdigit(): expected_size = int(possible_number) for char in line: if isinstance(char, LTChar): actual_size = int(round(char.size)) print(char, actual_size, expected_size) assert expected_size == actual_size else: print(repr(line.get_text()))
def _get_test_file_path(self): test_file = "simple4.pdf" return absolute_sample_path(test_file)
def run_with_string(sample_path, laparams=None): if laparams is None: laparams = {} absolute_path = absolute_sample_path(sample_path) s = extract_text(absolute_path, laparams=LAParams(**laparams)) return s
def run_with_file(sample_path): absolute_path = absolute_sample_path(sample_path) with open(absolute_path, "rb") as in_file: s = extract_text(in_file) return s
def run(sample_path): absolute_path = absolute_sample_path(sample_path) s = extract_text(absolute_path) return s
def test_nonfree_175(self): """Extract images of pdf containing jpg images""" self.extract_images(absolute_sample_path('../samples/nonfree/175.pdf'))
def test_string_input(self): filename = absolute_sample_path("simple1.pdf") opened = open_filename(filename) assert_equal(opened.closing, True)
def test_file_input(self): filename = absolute_sample_path("simple1.pdf") with open(filename, "rb") as in_file: opened = open_filename(in_file) assert_equal(opened.file_handler, in_file)
def test_pathlib_input(self): filename = pathlib.Path(absolute_sample_path("simple1.pdf")) opened = open_filename(filename) assert_equal(opened.closing, True)