コード例 #1
0
def test_pdf_open(tmp_path):
    # fail on a buffer full of null bytes
    with pytest.raises(PdfParser.PdfFormatError):
        PdfParser.PdfParser(buf=bytearray(65536))

    # make an empty PDF object
    with PdfParser.PdfParser() as empty_pdf:
        assert len(empty_pdf.pages) == 0
        assert len(empty_pdf.info) == 0
        assert not empty_pdf.should_close_buf
        assert not empty_pdf.should_close_file

    # make a PDF file
    pdf_filename = helper_save_as_pdf(tmp_path, "RGB")

    # open the PDF file
    with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
        assert len(hopper_pdf.pages) == 1
        assert hopper_pdf.should_close_buf
        assert hopper_pdf.should_close_file

    # read a PDF file from a buffer with a non-zero offset
    with open(pdf_filename, "rb") as f:
        content = b"xyzzy" + f.read()
    with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
        assert len(hopper_pdf.pages) == 1
        assert not hopper_pdf.should_close_buf
        assert not hopper_pdf.should_close_file

    # read a PDF file from an already open file
    with open(pdf_filename, "rb") as f:
        with PdfParser.PdfParser(f=f) as hopper_pdf:
            assert len(hopper_pdf.pages) == 1
            assert hopper_pdf.should_close_buf
            assert not hopper_pdf.should_close_file
コード例 #2
0
ファイル: test_file_pdf.py プロジェクト: nelsonc1975/Jallow
    def test_pdf_append(self):
        # make a PDF file
        pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")

        # open it, check pages and info
        with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
            self.assertEqual(len(pdf.pages), 1)
            self.assertEqual(len(pdf.info), 4)
            self.assertEqual(
                pdf.info.Title,
                os.path.splitext(os.path.basename(pdf_filename))[0])
            self.assertEqual(pdf.info.Producer, "PdfParser")
            self.assertIn(b"CreationDate", pdf.info)
            self.assertIn(b"ModDate", pdf.info)
            self.check_pdf_pages_consistency(pdf)

            # append some info
            pdf.info.Title = "abc"
            pdf.info.Author = "def"
            pdf.info.Subject = u"ghi\uABCD"
            pdf.info.Keywords = "qw)e\\r(ty"
            pdf.info.Creator = "hopper()"
            pdf.start_writing()
            pdf.write_xref_and_trailer()

        # open it again, check pages and info again
        with PdfParser.PdfParser(pdf_filename) as pdf:
            self.assertEqual(len(pdf.pages), 1)
            self.assertEqual(len(pdf.info), 8)
            self.assertEqual(pdf.info.Title, "abc")
            self.assertIn(b"CreationDate", pdf.info)
            self.assertIn(b"ModDate", pdf.info)
            self.check_pdf_pages_consistency(pdf)

        # append two images
        mode_CMYK = hopper("CMYK")
        mode_P = hopper("P")
        mode_CMYK.save(pdf_filename,
                       append=True,
                       save_all=True,
                       append_images=[mode_P])

        # open the PDF again, check pages and info again
        with PdfParser.PdfParser(pdf_filename) as pdf:
            self.assertEqual(len(pdf.pages), 3)
            self.assertEqual(len(pdf.info), 8)
            self.assertEqual(PdfParser.decode_text(pdf.info[b"Title"]), "abc")
            self.assertEqual(pdf.info.Title, "abc")
            self.assertEqual(pdf.info.Producer, "PdfParser")
            self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
            self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
            self.assertIn(b"CreationDate", pdf.info)
            self.assertIn(b"ModDate", pdf.info)
            self.check_pdf_pages_consistency(pdf)
コード例 #3
0
def test_pdf_append(tmp_path):
    # make a PDF file
    pdf_filename = helper_save_as_pdf(tmp_path, "RGB", producer="PdfParser")

    # open it, check pages and info
    with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
        assert len(pdf.pages) == 1
        assert len(pdf.info) == 4
        assert pdf.info.Title == os.path.splitext(
            os.path.basename(pdf_filename))[0]
        assert pdf.info.Producer == "PdfParser"
        assert b"CreationDate" in pdf.info
        assert b"ModDate" in pdf.info
        check_pdf_pages_consistency(pdf)

        # append some info
        pdf.info.Title = "abc"
        pdf.info.Author = "def"
        pdf.info.Subject = "ghi\uABCD"
        pdf.info.Keywords = "qw)e\\r(ty"
        pdf.info.Creator = "hopper()"
        pdf.start_writing()
        pdf.write_xref_and_trailer()

    # open it again, check pages and info again
    with PdfParser.PdfParser(pdf_filename) as pdf:
        assert len(pdf.pages) == 1
        assert len(pdf.info) == 8
        assert pdf.info.Title == "abc"
        assert b"CreationDate" in pdf.info
        assert b"ModDate" in pdf.info
        check_pdf_pages_consistency(pdf)

    # append two images
    mode_cmyk = hopper("CMYK")
    mode_p = hopper("P")
    mode_cmyk.save(pdf_filename,
                   append=True,
                   save_all=True,
                   append_images=[mode_p])

    # open the PDF again, check pages and info again
    with PdfParser.PdfParser(pdf_filename) as pdf:
        assert len(pdf.pages) == 3
        assert len(pdf.info) == 8
        assert PdfParser.decode_text(pdf.info[b"Title"]) == "abc"
        assert pdf.info.Title == "abc"
        assert pdf.info.Producer == "PdfParser"
        assert pdf.info.Keywords == "qw)e\\r(ty"
        assert pdf.info.Subject == "ghi\uABCD"
        assert b"CreationDate" in pdf.info
        assert b"ModDate" in pdf.info
        check_pdf_pages_consistency(pdf)
コード例 #4
0
ファイル: test_file_pdf.py プロジェクト: nelsonc1975/Jallow
    def test_pdf_info(self):
        # make a PDF file
        pdf_filename = self.helper_save_as_pdf(
            "RGB",
            title="title",
            author="author",
            subject="subject",
            keywords="keywords",
            creator="creator",
            producer="producer",
            creationDate=time.strptime("2000", "%Y"),
            modDate=time.strptime("2001", "%Y"))

        # open it, check pages and info
        with PdfParser.PdfParser(pdf_filename) as pdf:
            self.assertEqual(len(pdf.info), 8)
            self.assertEqual(pdf.info.Title, "title")
            self.assertEqual(pdf.info.Author, "author")
            self.assertEqual(pdf.info.Subject, "subject")
            self.assertEqual(pdf.info.Keywords, "keywords")
            self.assertEqual(pdf.info.Creator, "creator")
            self.assertEqual(pdf.info.Producer, "producer")
            self.assertEqual(pdf.info.CreationDate,
                             time.strptime("2000", "%Y"))
            self.assertEqual(pdf.info.ModDate, time.strptime("2001", "%Y"))
            self.check_pdf_pages_consistency(pdf)
コード例 #5
0
ファイル: test_file_pdf.py プロジェクト: nelsonc1975/Jallow
    def helper_save_as_pdf(self, mode, **kwargs):
        # Arrange
        im = hopper(mode)
        outfile = self.tempfile("temp_" + mode + ".pdf")

        # Act
        im.save(outfile, **kwargs)

        # Assert
        self.assertTrue(os.path.isfile(outfile))
        self.assertGreater(os.path.getsize(outfile), 0)
        with PdfParser.PdfParser(outfile) as pdf:
            if kwargs.get("append_images", False) or \
               kwargs.get("append", False):
                self.assertGreater(len(pdf.pages), 1)
            else:
                self.assertGreater(len(pdf.pages), 0)
        with open(outfile, 'rb') as fp:
            contents = fp.read()
        size = tuple(
            int(d) for d in contents.split(b'/MediaBox [ 0 0 ')[1].split(b']')
            [0].split())
        self.assertEqual(im.size, size)

        return outfile
コード例 #6
0
def test_redos(newline):
    malicious = b" trailer<<>>" + newline * 3456

    # This particular exception isn't relevant here.
    # The important thing is it doesn't timeout, cause a ReDoS (CVE-2021-25292).
    with pytest.raises(PdfParser.PdfFormatError):
        PdfParser.PdfParser(buf=malicious)
コード例 #7
0
def test_pdf_info(tmp_path):
    # make a PDF file
    pdf_filename = helper_save_as_pdf(
        tmp_path,
        "RGB",
        title="title",
        author="author",
        subject="subject",
        keywords="keywords",
        creator="creator",
        producer="producer",
        creationDate=time.strptime("2000", "%Y"),
        modDate=time.strptime("2001", "%Y"),
    )

    # open it, check pages and info
    with PdfParser.PdfParser(pdf_filename) as pdf:
        assert len(pdf.info) == 8
        assert pdf.info.Title == "title"
        assert pdf.info.Author == "author"
        assert pdf.info.Subject == "subject"
        assert pdf.info.Keywords == "keywords"
        assert pdf.info.Creator == "creator"
        assert pdf.info.Producer == "producer"
        assert pdf.info.CreationDate == time.strptime("2000", "%Y")
        assert pdf.info.ModDate == time.strptime("2001", "%Y")
        check_pdf_pages_consistency(pdf)
コード例 #8
0
    def helper_save_as_pdf(self, mode, **kwargs):
        # Arrange
        im = hopper(mode)
        outfile = self.tempfile("temp_" + mode + ".pdf")

        # Act
        im.save(outfile, **kwargs)

        # Assert
        assert os.path.isfile(outfile)
        assert os.path.getsize(outfile) > 0
        with PdfParser.PdfParser(outfile) as pdf:
            if kwargs.get("append_images", False) or kwargs.get(
                    "append", False):
                assert len(pdf.pages) > 1
            else:
                assert len(pdf.pages) > 0
        with open(outfile, "rb") as fp:
            contents = fp.read()
        size = tuple(
            int(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")
            [0].split())
        assert im.size == size

        return outfile
コード例 #9
0
ファイル: test_file_pdf.py プロジェクト: hugovk/Pillow
    def test_pdf_append(self):
        # make a PDF file
        pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")

        # open it, check pages and info
        with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
            self.assertEqual(len(pdf.pages), 1)
            self.assertEqual(len(pdf.info), 4)
            self.assertEqual(pdf.info.Title, os.path.splitext(
                                                os.path.basename(pdf_filename)
                                             )[0])
            self.assertEqual(pdf.info.Producer, "PdfParser")
            self.assertIn(b"CreationDate", pdf.info)
            self.assertIn(b"ModDate", pdf.info)
            self.check_pdf_pages_consistency(pdf)

            # append some info
            pdf.info.Title = "abc"
            pdf.info.Author = "def"
            pdf.info.Subject = u"ghi\uABCD"
            pdf.info.Keywords = "qw)e\\r(ty"
            pdf.info.Creator = "hopper()"
            pdf.start_writing()
            pdf.write_xref_and_trailer()

        # open it again, check pages and info again
        with PdfParser.PdfParser(pdf_filename) as pdf:
            self.assertEqual(len(pdf.pages), 1)
            self.assertEqual(len(pdf.info), 8)
            self.assertEqual(pdf.info.Title, "abc")
            self.assertIn(b"CreationDate", pdf.info)
            self.assertIn(b"ModDate", pdf.info)
            self.check_pdf_pages_consistency(pdf)

        # append two images
        mode_CMYK = hopper("CMYK")
        mode_P = hopper("P")
        mode_CMYK.save(pdf_filename,
                       append=True, save_all=True, append_images=[mode_P])

        # open the PDF again, check pages and info again
        with PdfParser.PdfParser(pdf_filename) as pdf:
            self.assertEqual(len(pdf.pages), 3)
            self.assertEqual(len(pdf.info), 8)
            self.assertEqual(PdfParser.decode_text(pdf.info[b"Title"]), "abc")
            self.assertEqual(pdf.info.Title, "abc")
            self.assertEqual(pdf.info.Producer, "PdfParser")
            self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
            self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
            self.assertIn(b"CreationDate", pdf.info)
            self.assertIn(b"ModDate", pdf.info)
            self.check_pdf_pages_consistency(pdf)
コード例 #10
0
ファイル: test_file_pdf.py プロジェクト: nelsonc1975/Jallow
    def test_pdf_open(self):
        # fail on a buffer full of null bytes
        self.assertRaises(PdfParser.PdfFormatError,
                          PdfParser.PdfParser,
                          buf=bytearray(65536))

        # make an empty PDF object
        with PdfParser.PdfParser() as empty_pdf:
            self.assertEqual(len(empty_pdf.pages), 0)
            self.assertEqual(len(empty_pdf.info), 0)
            self.assertFalse(empty_pdf.should_close_buf)
            self.assertFalse(empty_pdf.should_close_file)

        # make a PDF file
        pdf_filename = self.helper_save_as_pdf("RGB")

        # open the PDF file
        with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
            self.assertEqual(len(hopper_pdf.pages), 1)
            self.assertTrue(hopper_pdf.should_close_buf)
            self.assertTrue(hopper_pdf.should_close_file)

        # read a PDF file from a buffer with a non-zero offset
        with open(pdf_filename, "rb") as f:
            content = b"xyzzy" + f.read()
        with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
            self.assertEqual(len(hopper_pdf.pages), 1)
            self.assertFalse(hopper_pdf.should_close_buf)
            self.assertFalse(hopper_pdf.should_close_file)

        # read a PDF file from an already open file
        with open(pdf_filename, "rb") as f:
            with PdfParser.PdfParser(f=f) as hopper_pdf:
                self.assertEqual(len(hopper_pdf.pages), 1)
                self.assertTrue(hopper_pdf.should_close_buf)
                self.assertFalse(hopper_pdf.should_close_file)
コード例 #11
0
    def helper_save_as_pdf(self, mode, **kwargs):
        # Arrange
        im = hopper(mode)
        outfile = self.tempfile("temp_" + mode + ".pdf")

        # Act
        im.save(outfile, **kwargs)

        # Assert
        self.assertTrue(os.path.isfile(outfile))
        self.assertGreater(os.path.getsize(outfile), 0)
        with PdfParser.PdfParser(outfile) as pdf:
            if kwargs.get("append_images", False) or \
               kwargs.get("append", False):
                self.assertGreater(len(pdf.pages), 1)
            else:
                self.assertGreater(len(pdf.pages), 0)

        return outfile