Python convert_store Examples

Programming Language: Python

Namespace/Package Name: pdfrw.py23_diffs

Method/Function: convert_store

Examples at hotexamples.com: 8

Python convert_store - 8 examples found. These are the top rated real world Python examples of pdfrw.py23_diffs.convert_store extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_roundtrip.py Project: pmaupin/pdfrw

    def roundtrip(self, testname, basename, srcf, decompress=False,
                  compress=False, repaginate=False):
        dstd = os.path.join(expected.result_dir, testname)
        if not os.path.exists(dstd):
            os.makedirs(dstd)
        dstf = os.path.join(dstd, basename)
        hashfile = os.path.join(expected.result_dir, 'hashes.txt')
        hashkey = '%s/%s' % (testname, basename)
        hash = '------no-file-generated---------'
        expects = expected.results[hashkey]

        # If the test has been deliberately skipped,
        # we are done.  Otherwise, execute it even
        # if we don't know about it yet, so we have
        # results to compare.

        result = 'fail'
        size = 0
        try:
            if 'skip' in expects:
                result = 'skip requested'
                return self.skipTest(result)
            elif 'xfail' in expects:
                result = 'xfail requested'
                return self.fail(result)

            exists = os.path.exists(dstf)
            if expects or not exists:
                if exists:
                    os.remove(dstf)
                trailer = pdfrw.PdfReader(srcf, decompress=decompress,
                                          verbose=False)
                writer = pdfrw.PdfWriter(dstf, compress=compress)
                if repaginate:
                    writer.addpages(trailer.pages)
                else:
                    writer.trailer = trailer
                writer.write()
            with open(dstf, 'rb') as f:
                data = f.read()
            size = len(data)
            if data:
                hash = hashlib.md5(data).hexdigest()
            else:
                os.remove(dstf)
            if expects:
                if len(expects) == 1:
                    expects, = expects
                    self.assertEqual(hash, expects)
                else:
                    self.assertIn(hash, expects)
                result = 'pass'
            else:
                result = 'skip'
                self.skipTest('No hash available')
        finally:
            result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
            with open(hashfile, 'ab') as f:
                f.write(convert_store(result))

Example #2

Show file

File: __init__.py Project: xErik/img2pdf

        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass

Example #3

Show file

File: test_roundtrip.py Project: ytaler/pdfrw

    def roundtrip(self, testname, basename, srcf, decompress=False,
                  compress=False, repaginate=False):
        dstd = os.path.join(expected.result_dir, testname)
        if not os.path.exists(dstd):
            os.makedirs(dstd)
        dstf = os.path.join(dstd, basename)
        hashfile = os.path.join(expected.result_dir, 'hashes.txt')
        hashkey = '%s/%s' % (testname, basename)
        hash = '------no-file-generated---------'
        expects = expected.results[hashkey]

        # If the test has been deliberately skipped,
        # we are done.  Otherwise, execute it even
        # if we don't know about it yet, so we have
        # results to compare.

        result = 'fail'
        size = 0
        try:
            if 'skip' in expects:
                result = 'skip requested'
                return self.skipTest(result)
            elif 'xfail' in expects:
                result = 'xfail requested'
                return self.fail(result)

            exists = os.path.exists(dstf)
            if expects or not exists:
                if exists:
                    os.remove(dstf)
                trailer = pdfrw.PdfReader(srcf, decompress=decompress,
                                          verbose=False)
                if trailer.Encrypt:
                    result = 'skip -- encrypt'
                    hash = '------skip-encrypt-no-file------'
                    return self.skipTest('File encrypted')
                writer = pdfrw.PdfWriter(dstf, compress=compress)
                if repaginate:
                    writer.addpages(trailer.pages)
                else:
                    writer.trailer = trailer
                writer.write()
            with open(dstf, 'rb') as f:
                data = f.read()
            size = len(data)
            if data:
                hash = hashlib.md5(data).hexdigest()
            else:
                os.remove(dstf)
            if expects:
                if len(expects) == 1:
                    expects, = expects
                    self.assertEqual(hash, expects)
                else:
                    self.assertIn(hash, expects)
                result = 'pass'
            else:
                result = 'skip'
                self.skipTest('No hash available')
        finally:
            result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
            with open(hashfile, 'ab') as f:
                f.write(convert_store(result))

Example #4

Show file

 def decode_bytes(self, decode_this, expected):
     """ Decode to bytes"""
     self.assertEqual(PdfString(decode_this).to_bytes(),
                      convert_store(expected))

Example #5

Show file

    def do_test(self, params, prev_results=[''], scrub=False):
        params = params.split()
        hashkey = 'examples/%s' % '_'.join(params)
        params = [lookup.get(x, x) for x in params]
        progname = params[0]
        params[0] = prog_dir % progname
        srcf = params[1]
        params.insert(0, sys.executable)
        subdir, progname = os.path.split(progname)
        subdir = os.path.join(dstdir, subdir)
        if not os.path.exists(subdir):
            os.makedirs(subdir)
        os.chdir(subdir)
        dstf = '%s.%s' % (progname, os.path.basename(srcf))
        scrub = scrub and dstf
        dstf = dstf if not scrub else 'final.%s' % dstf
        hash = '------no-file-generated---------'
        expects = expected.results[hashkey]

        # If the test has been deliberately skipped,
        # we are done.  Otherwise, execute it even
        # if we don't know about it yet, so we have
        # results to compare.

        result = 'fail'
        size = 0
        try:
            if 'skip' in expects:
                result = 'skip requested'
                return self.skipTest(result)
            elif 'xfail' in expects:
                result = 'xfail requested'
                return self.fail(result)

            exists = os.path.exists(dstf)
            if expects or not exists:
                if exists:
                    os.remove(dstf)
                if scrub and os.path.exists(scrub):
                    os.remove(scrub)
                subprocess.call(params)
                if scrub:
                    PdfWriter(dstf).addpages(PdfReader(scrub).pages).write()
            with open(dstf, 'rb') as f:
                data = f.read()
            size = len(data)
            if data:
                hash = hashlib.md5(data).hexdigest()
                lookup[hash] = dstf
                prev_results[0] = hash
            else:
                os.remove(dstf)
            if expects:
                if len(expects) == 1:
                    expects, = expects
                    self.assertEqual(hash, expects)
                else:
                    self.assertIn(hash, expects)
                result = 'pass'
            else:
                result = 'skip'
                self.skipTest('No hash available')
        finally:
            result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
            with open(hashfile, 'ab') as f:
                f.write(convert_store(result))

Example #6

Show file

File: __init__.py Project: Shoeboxed/img2pdf

        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode,
                                      PdfName.FlateDecode,
                                      [PdfName.CCITTFaxDecode]])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [PdfName.DCTDecode,
                                       PdfName.JPXDecode]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == PdfName.FlateDecode:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    if imgprops.DecodeParms:
                        if orig_img.format == 'PNG':
                            pngidat, palette = img2pdf.parse_png(orig_imgdata)
                        elif orig_img.format == 'TIFF' \
                                and orig_img.info['compression'] == "group4":
                            offset, length = \
                                    img2pdf.ccitt_payload_location_from_pil(
                                            orig_img)
                            pngidat = orig_imgdata[offset:offset+length]
                        else:
                            pngbuffer = BytesIO()
                            orig_img.save(pngbuffer, format="png")
                            pngidat, palette = img2pdf.parse_png(
                                    pngbuffer.getvalue())
                        self.assertEqual(zlib.decompress(pngidat), imgdata)
                    else:
                        colorspace = imgprops.ColorSpace
                        if colorspace == PdfName.DeviceGray:
                            colorspace = 'L'
                        elif colorspace == PdfName.DeviceRGB:
                            colorspace = 'RGB'
                        elif colorspace == PdfName.DeviceCMYK:
                            colorspace = 'CMYK'
                        else:
                            raise Exception("invalid colorspace")
                        im = Image.frombytes(colorspace,
                                             (int(imgprops.Width),
                                              int(imgprops.Height)),
                                             imgdata)
                        if orig_img.mode == '1':
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("L").tobytes())
                        elif orig_img.mode not in ("RGB", "L", "CMYK",
                                                   "CMYK;I"):
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("RGB").tobytes())
                        # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does
                        # not have the close() method
                        try:
                            im.close()
                        except AttributeError:
                            pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True)
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass

Example #7

Show file

File: __init__.py Project: johnsonhit/img2pdf

        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                      [PdfName.FlateDecode],
                                      [PdfName.CCITTFaxDecode]])
                # test if the colorspace is valid
                self.assertIn(
                    imgprops.ColorSpace, [PdfName.DeviceGray,
                                          PdfName.DeviceRGB,
                                          PdfName.DeviceCMYK])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [[PdfName.DCTDecode],
                                       [PdfName.JPXDecode]]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == [PdfName.FlateDecode]:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    colorspace = imgprops.ColorSpace
                    if colorspace == PdfName.DeviceGray:
                        colorspace = 'L'
                    elif colorspace == PdfName.DeviceRGB:
                        colorspace = 'RGB'
                    elif colorspace == PdfName.DeviceCMYK:
                        colorspace = 'CMYK'
                    else:
                        raise Exception("invalid colorspace")
                    im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                      int(imgprops.Height)),
                                         imgdata)
                    if orig_img.mode == '1':
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("L").tobytes())
                    elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("RGB").tobytes())
                    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not
                    # have the close() method
                    try:
                        im.close()
                    except AttributeError:
                        pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass

Example #8

Show file

File: __init__.py Project: BigBoss21X/img2pdf-legacy-python

        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode],
                                  [PdfName.CCITTFaxDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                tiff_header = tiff_header_for_ccitt(
                    int(imgprops.Width), int(imgprops.Height),
                    int(imgprops.Length), 4)
                imgio = BytesIO()
                imgio.write(tiff_header)
                imgio.write(convert_store(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                imgio.seek(0)
                im = Image.open(imgio)
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                try:
                    im.close()
                except AttributeError:
                    pass

            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass