Example #1
0
def test_PDFWriter(test_output_dir):
    pw = pdf_write.PDFWriter()

    font = pdf.PDFSimpleDict()
    font["Type"] = "Font"
    font["Subtype"] = "Type1"
    font["Name"] = "F1"
    font["BaseFont"] = "Helvetica"
    font["Encoding"] = "MaxRomanEncoding"
    font = pw.add_pdf_object(font.to_dict())
    resources = pdf.PDFSimpleDict()
    proc_set = pw.add_pdf_object(pdf_write.ProcedureSet().object())
    resources["ProcSet"] = proc_set
    resources["Font"] = pdf.PDFDictionary([(pdf.PDFName("F1"), font)])
    resources = resources.to_dict()
    data = b"BT\n/F1 24 Tf\n100 100 Td\n(Hello World) Tj\nET"
    contents = pdf.PDFStream(
        [(pdf.PDFName("Length"), pdf.PDFNumeric(len(data)))], data)
    contents = pw.add_pdf_object(contents)
    page = pdf_write.Page(pdf_write.Rectangle(0, 0, 612, 792), resources,
                          contents)

    pw.add_page(page)

    with open(os.path.join(test_output_dir, "text.pdf"), "wb") as f:
        f.write(bytes(pw))
Example #2
0
def test_ParseName():
    p = pdf_parser.ParseName()
    assert p.parse(b"asa") is None
    assert p.parse(b"/Matt asa") == (pdf.PDFName("Matt"), 5)
    assert p.parse(b"/Matt") == (pdf.PDFName("Matt"), 5)
    assert p.parse(b"/Matt#20 asa") == (pdf.PDFName("Matt "), 8)
    assert p.parse(b"/Matt/Bob") == (pdf.PDFName("Matt"), 5)
Example #3
0
def test_DocumentCatalog():
    pages = pdf.PDFObjectId(5, 2)
    root = pdf_write.DocumentCatalog(pages)

    dic = root.object().data
    assert set(bytes(x) for x in dic) == {b"/Type", b"/Pages"}
    assert dic[pdf.PDFName("Type")] == pdf.PDFName("Catalog")
    assert bytes(dic[pdf.PDFName("Pages")]) == b"5 2 R"
Example #4
0
def test_ArrayConsumer():
    p = pdf_parser.ParseArray()
    c = p.consumer()
    assert isinstance(c, pdf_parser.ArrayConsumer)

    c.consume(pdf.PDFName("Matt"))
    assert c.end(b"123.2") is None
    assert c.end(b"]ahsgas") == 1
    assert c.build() == pdf.PDFArray([pdf.PDFName("Matt")])
Example #5
0
def test_PDFArray():
    x = pdf.PDFArray()
    with pytest.raises(IndexError):
        x[0]
    assert repr(x) == "PDFArray([])"
    assert bytes(x) == b"[]"
    assert x == pdf.PDFArray()

    x = pdf.PDFArray([pdf.PDFName("Matt"), pdf.PDFNumeric(12.2)])
    assert repr(x) == "PDFArray([PDFName(b'Matt'), PDFNumeric(12.2)])"
    assert bytes(x) == b"[/Matt 12.2]"
    assert x == pdf.PDFArray([pdf.PDFName("Matt"), pdf.PDFNumeric(12.2)])
Example #6
0
def test_DictionaryConsumer():
    p = pdf_parser.ParseDictionary()
    c = p.consumer()
    assert isinstance(c, pdf_parser.DictionaryConsumer)

    assert c.end(b"ajsdga") is None
    assert c.end(b">>ahsga") == 2

    c.consume(pdf.PDFName("Mat"))
    c.consume(pdf.PDFObjectId(12, 0))
    d = c.build()
    assert bytes(d[pdf.PDFName("Mat")]) == b"12 0 R"
Example #7
0
def test_PDFSimpleDict():
    d = pdf.PDFSimpleDict()
    d["Filter"] = "FlateDecode"
    d["BitsPerComponent"] = 8
    d["Interpolate"] = True
    d["matt"] = 1.23

    dd = d.to_dict()
    assert dd[pdf.PDFName("Filter")] == pdf.PDFName("FlateDecode")
    assert dd[pdf.PDFName("BitsPerComponent")] == pdf.PDFNumeric(8)
    assert dd[pdf.PDFName("Interpolate")] == pdf.PDFBoolean(True)
    assert dd[pdf.PDFName("matt")] == pdf.PDFNumeric(1.23)
Example #8
0
def test_PDFStream():
    x = pdf.PDFStream([(pdf.PDFName("Length"), pdf.PDFNumeric(10))],
                      b"abcdfgiqsp")
    assert x.stream_contents == b"abcdfgiqsp"
    assert repr(
        x
    ) == "PDFDictionary({PDFName(b'Length'): PDFNumeric(10)}, stream length=10)"
    assert bytes(x) == b"<</Length 10>>\nstream\nabcdfgiqsp\nendstream"

    y = pdf.PDFStream([(pdf.PDFName("Length"), pdf.PDFNumeric(10))],
                      b"abcdfgiqsp")
    assert x == y
Example #9
0
def test_PDFDictionary():
    x = pdf.PDFDictionary()
    with pytest.raises(KeyError):
        x[pdf.PDFName("Mat")]
    assert repr(x) == "PDFDictionary({})"
    assert bytes(x) == b"<<>>"

    x = pdf.PDFDictionary([(pdf.PDFName("Bob"), pdf.PDFNumeric(12))])
    assert x[pdf.PDFName("Bob")] == pdf.PDFNumeric(12)
    assert repr(x) == "PDFDictionary({PDFName(b'Bob'): PDFNumeric(12)})"
    assert bytes(x) == b"<</Bob 12>>"
    x[pdf.PDFName("Bob")] = pdf.PDFNumeric(1)
    assert bytes(x) == b"<</Bob 1>>"
Example #10
0
def test_PageTree(page1):
    pt = pdf_write.PageTree()
    pt.add_page(page1)

    pt_obj = pt.object()
    dic = pt_obj.data
    assert set(bytes(x) for x in dic) == {b"/Type", b"/Kids", b"/Count"}
    assert dic[pdf.PDFName("Type")] == pdf.PDFName("Pages")
    assert dic[pdf.PDFName("Kids")][0].number == None
    assert dic[pdf.PDFName("Count")] == pdf.PDFNumeric(1)

    assert page1.parent is pt_obj
    page1.object().number = 5
    assert dic[pdf.PDFName("Kids")][0].number == 5
Example #11
0
def test_read_full_object(pdf1):
    p = pdf_read.PDF(pdf1)
    loc = p.object_lookup[pdf_read.PDFObjectId(5, 0)]
    obj = p.full_object_at(loc)
    assert repr(obj) == "PDFNumeric(33)"

    loc = p.object_lookup[pdf_read.PDFObjectId(4, 0)]
    obj = p.full_object_at(loc)
    assert obj[pdf.PDFName("Length")] == pdf.PDFNumeric(33)
    assert obj.stream_contents.startswith(b"q\n1637")
Example #12
0
def test_PDFName():
    x = pdf.PDFName(b"Bob")
    assert x.name == b"Bob"
    assert repr(x) == "PDFName(b'Bob')"
    assert x == pdf.PDFName(b"Bob")
    assert x == pdf.PDFName("Bob")
    assert bytes(x) == b"/Bob"

    with pytest.raises(ValueError):
        x = pdf.PDFName("ahdga\x00")
        bytes(x)

    x = pdf.PDFName(b"Bob\n T\xee")
    assert bytes(x) == b"/Bob#0A#20T#EE"

    assert len({pdf.PDFName("Masa"), pdf.PDFName("Masa")})
Example #13
0
def test_Page(page1):
    page = page1
    page.parent = pdf.PDFObjectId(2, 5)

    dic = page.object().data
    assert set(bytes(x) for x in dic) == {
        b"/Type", b"/Parent", b"/Resources", b"/MediaBox", b"/Contents"
    }
    assert dic[pdf.PDFName("Type")] == pdf.PDFName("Page")
    assert bytes(dic[pdf.PDFName("Parent")]) == b"2 5 R"
    assert bytes(dic[pdf.PDFName("Resources")]) == b"<</Matt 7>>"
    assert bytes(dic[pdf.PDFName("MediaBox")]) == b"[0 1 20 50]"
    assert bytes(dic[pdf.PDFName("Contents")]) == b"3 2 R"
Example #14
0
def test_PDFParser_1():
    f = io.BytesIO(
        b"\n<</Linearized 1/L 10171355/O 1489/E 14578/N 480/T 10169091/H [ 470 1215]>>\n"
    )
    p = pdf_parser.PDFParser(f)
    got = list(p)
    assert len(got) == 1
    got = got[0]
    assert got[pdf.PDFName("Linearized")] == pdf.PDFNumeric(1)
    assert got[pdf.PDFName("L")] == pdf.PDFNumeric(10171355)
    assert got[pdf.PDFName("O")] == pdf.PDFNumeric(1489)
    assert got[pdf.PDFName("E")] == pdf.PDFNumeric(14578)
    assert got[pdf.PDFName("N")] == pdf.PDFNumeric(480)
    assert got[pdf.PDFName("T")] == pdf.PDFNumeric(10169091)
    assert got[pdf.PDFName("H")] == pdf.PDFArray(
        [pdf.PDFNumeric(470), pdf.PDFNumeric(1215)])

    keys = []
    for x in got:
        assert isinstance(x, pdf.PDFName)
        keys.append(x.name)
    assert set(keys) == {b"Linearized", b"L", b"O", b"E", b"N", b"T", b"H"}
Example #15
0
def test_trailer(pdf1):
    p = pdf_read.PDF(pdf1)
    assert p.trailer[pdf.PDFName("Size")] == pdf.PDFNumeric(18)
    assert pdf.PDFName("Info") in p.trailer
    assert pdf.PDFName("Root") in p.trailer
Example #16
0
def test_read_object_1(pdf1):
    p = pdf_read.PDF(pdf1)
    objs = p.object_at(10)
    assert len(objs) == 1
    assert objs[0][pdf.PDFName("Pages")] == pdf.PDFObjectId(2, 0)
    assert objs[0][pdf.PDFName("Type")] == pdf.PDFName("Catalog")
Example #17
0
def test_ImageDictionary():
    im = pdf_write.ImageDictionary(width=1024,
                                   height=768,
                                   colour_space=pdf_write.ColourSpaceRGB(),
                                   bits=8)
    im.add_filtered_data("FlateDecode", b"1234", {"Predictor": 5})
    im.add_dictionary_entry("matt", 7)

    obj = im.object()
    dic = obj.data
    assert set(bytes(x) for x in dic) == {
        b"/Subtype", b"/Filter", b"/Width", b"/Height", b"/ColorSpace",
        b"/BitsPerComponent", b"/Length", b"/Interpolate", b"/DecodeParms",
        b"/matt"
    }
    assert dic[pdf.PDFName("Subtype")] == pdf.PDFName("Image")
    assert dic[pdf.PDFName("Filter")] == pdf.PDFName("FlateDecode")
    assert dic[pdf.PDFName("Width")] == pdf.PDFNumeric(1024)
    assert dic[pdf.PDFName("Height")] == pdf.PDFNumeric(768)
    assert dic[pdf.PDFName("ColorSpace")] == pdf.PDFName("DeviceRGB")
    assert dic[pdf.PDFName("BitsPerComponent")] == pdf.PDFNumeric(8)
    assert dic[pdf.PDFName("Length")] == pdf.PDFNumeric(4)
    assert dic[pdf.PDFName("Interpolate")] == pdf.PDFBoolean(True)
    assert dic[pdf.PDFName("matt")] == pdf.PDFNumeric(7)
    assert bytes(dic[pdf.PDFName("DecodeParms")]) == b"<</Predictor 5>>"

    assert obj.data.stream_contents == b"1234"
Example #18
0
def test_InfoObject():
    io = pdf_write.InfoObject("My Title")
    dic = io.object().data
    assert set(bytes(x) for x in dic) == {b"/Title", b"/CreationDate"}
    assert dic[pdf.PDFName("Title")] == pdf.PDFString("My Title")
Example #19
0
def page1():
    mediabox = pdf_write.Rectangle(0, 1, 20, 50)
    resources = pdf.PDFDictionary([(pdf.PDFName("Matt"), pdf.PDFNumeric(7))])
    contents = pdf.PDFObjectId(3, 2)
    return pdf_write.Page(mediabox, resources, contents)