def results(): results = collections.defaultdict(set) with open(expectedf, 'rb') as f: for line in f: line = convert_load(line) line = line.split('#', 1)[0].split() if not line: continue key, value = line results[key].add(value) return results
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertEqual(x.Size, '7') self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) self.assertEqual(len(x.Root.Pages.Kids), 1) self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(x.Root.Pages.Kids[0].MediaBox, ['0', '0', '115', '48']) self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), [PdfName.XObject]) self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), [PdfName.Length]) self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, str(len(x.Root.Pages.Kids[0].Contents.stream))) self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " "Do\nQ") imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress( convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have # the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
#!/usr/bin/env python from setuptools import setup from pdfrw import __version__ as version from pdfrw.py23_diffs import convert_load setup( name='pdfrw', version=version, description='PDF file reader/writer library', long_description=convert_load(open("README.rst", 'rb').read()), author='Patrick Maupin', author_email='*****@*****.**', platforms='Independent', url='https://github.com/pmaupin/pdfrw', packages=['pdfrw', 'pdfrw.objects'], license='MIT', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Topic :: Multimedia :: Graphics :: Graphics Conversion',
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(PdfReaderIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertIn(x.Root.Pages.Count, ('1', '2')) if len(x.Root.Pages.Kids) == '1': self.assertEqual(x.Size, '7') self.assertEqual(len(x.Root.Pages.Kids), 1) elif len(x.Root.Pages.Kids) == '2': self.assertEqual(x.Size, '10') self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Type, PdfName.Pages) orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0*imgwidthpx/ndpi[0] pageheight = 72.0*imgheightpx/ndpi[1] def format_float(f): if int(f) == f: return str(int(f)) else: return ("%.4f" % f).rstrip("0") self.assertEqual(sorted(cur_page.keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(cur_page.MediaBox, ['0', '0', format_float(pagewidth), format_float(pageheight)]) self.assertEqual(cur_page.Parent, x.Root.Pages) self.assertEqual(cur_page.Type, PdfName.Page) self.assertEqual(cur_page.Resources.keys(), [PdfName.XObject]) self.assertEqual(cur_page.Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(cur_page.Contents.keys(), [PdfName.Length]) self.assertEqual(cur_page.Contents.Length, str(len(cur_page.Contents.stream))) self.assertEqual(cur_page.Contents.stream, "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" "/Im0 Do\nQ" % (pagewidth, pageheight)) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode, PdfName.FlateDecode, [PdfName.CCITTFaxDecode]]) # test if the image has correct size self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [PdfName.DCTDecode, PdfName.JPXDecode]: self.assertEqual( cur_page.Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( cur_page.Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == PdfName.FlateDecode: # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input image imgdata = zlib.decompress( convert_store(cur_page.Resources.XObject.Im0.stream)) if imgprops.DecodeParms: if orig_img.format == 'PNG': pngidat, palette = img2pdf.parse_png(orig_imgdata) elif orig_img.format == 'TIFF' \ and orig_img.info['compression'] == "group4": offset, length = \ img2pdf.ccitt_payload_location_from_pil( orig_img) pngidat = orig_imgdata[offset:offset+length] else: pngbuffer = BytesIO() orig_img.save(pngbuffer, format="png") pngidat, palette = img2pdf.parse_png( pngbuffer.getvalue()) self.assertEqual(zlib.decompress(pngidat), imgdata) else: colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does # not have the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
#!/usr/bin/env python from setuptools import setup from pdfrw import __version__ as version from pdfrw.py23_diffs import convert_load setup( name='pdfrw', version=version, description='PDF file reader/writer library', long_description=convert_load(open("README.rst", 'rb').read()), author='Patrick Maupin', author_email='*****@*****.**', platforms='Independent', url='https://github.com/pmaupin/pdfrw', packages=['pdfrw', 'pdfrw.objects'], license='MIT', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Topic :: Multimedia :: Graphics :: Graphics Conversion',
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(PdfReaderIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertIn(x.Root.Pages.Count, ('1', '2')) if len(x.Root.Pages.Kids) == '1': self.assertEqual(x.Size, '7') self.assertEqual(len(x.Root.Pages.Kids), 1) elif len(x.Root.Pages.Kids) == '2': self.assertEqual(x.Size, '10') self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Type, PdfName.Pages) orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0*imgwidthpx/ndpi[0] pageheight = 72.0*imgheightpx/ndpi[1] def format_float(f): if int(f) == f: return str(int(f)) else: return ("%.4f" % f).rstrip("0") self.assertEqual(sorted(cur_page.keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(cur_page.MediaBox, ['0', '0', format_float(pagewidth), format_float(pageheight)]) self.assertEqual(cur_page.Parent, x.Root.Pages) self.assertEqual(cur_page.Type, PdfName.Page) self.assertEqual(cur_page.Resources.keys(), [PdfName.XObject]) self.assertEqual(cur_page.Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(cur_page.Contents.keys(), [PdfName.Length]) self.assertEqual(cur_page.Contents.Length, str(len(cur_page.Contents.stream))) self.assertEqual(cur_page.Contents.stream, "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" "/Im0 Do\nQ" % (pagewidth, pageheight)) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode], [PdfName.CCITTFaxDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( cur_page.Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( cur_page.Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input image imgdata = zlib.decompress( convert_store(cur_page.Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not # have the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertEqual(x.Size, '7') self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) self.assertEqual(len(x.Root.Pages.Kids), 1) self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(x.Root.Pages.Kids[0].MediaBox, ['0', '0', '115', '48']) self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), [PdfName.XObject]) self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), [PdfName.Length]) self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, str(len(x.Root.Pages.Kids[0].Contents.stream))) self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " "Do\nQ") imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode], [PdfName.CCITTFaxDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress( convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have # the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass