Ejemplo n.º 1
0
def test_document():
    doc = Document(res('sample.pdf'))

    assert len(doc) == 15

    assert doc.meta['Title'] == 'Red Stork'

    str(doc)  # should not crash
Ejemplo n.º 2
0
def test_page_smoke():

    doc = Document(res('sample.pdf'))

    page = doc[0]

    assert page.media_box == (0., 0., 612., 792.)
    assert page.crop_box == (0., 0., 612., 792.)
Ejemplo n.º 3
0
def test_flatiter():
    '''used to crash'''
    doc = Document(res('arxiv1901.10092.pdf'))

    text = []
    for obj in doc[0].flat_iter():
        if obj.type == PageObject.OBJ_TYPE_TEXT:
            text.append(obj.text)  # should not crash
    assert ''.join(text)[-20:] == 're.com/naturephysics'
Ejemplo n.º 4
0
def test_document_save():

    with tempfile.TemporaryDirectory() as d:
        doc = Document(res('sample.pdf'))
        fname = f'{d}/temp.pdf'
        doc.save(fname)

        doc2 = Document(fname)

        assert len(doc) == len(doc2)
        assert doc2.meta['Title'] == 'Red Stork'
Ejemplo n.º 5
0
def test_document_edit_meta():

    with tempfile.TemporaryDirectory() as d:
        doc = Document(res('sample.pdf'))

        doc.meta['Title'] = 'Best PDF Parsing tool in this world!'
        fname = f'{d}/temp.pdf'
        doc.save(fname)

        doc2 = Document(fname)

        assert len(doc) == len(doc2)
        assert doc2.meta['Title'] == 'Best PDF Parsing tool in this world!'
Ejemplo n.º 6
0
def test_page_label():
    doc = Document(res('sample.pdf'))

    assert doc[2].label == 'i'
    assert doc[-3].label == '9'
Ejemplo n.º 7
0
from redstork import Document, PageObject, Glyph
from redstork.test import res

doc = Document(res('sample.pdf'))

print('Number of pages:', len(doc))

print('MediaBox of the first page is:', doc[0].media_box)

print('Rotation of the first page is:', doc[0].rotation)

print('Document title:', doc.meta['Title'])

print('First page has', len(doc[0]), 'objects')

doc[0].render('page-0.ppm', scale=2)  # render page #1 as image

page = doc[0]
for o in page:
    if o.type == PageObject.OBJ_TYPE_TEXT:
        for code, _, _ in o:
            print(o.font[code], end='')
        print()

for fid, font in doc.fonts.items():
    print(font.simple_name, fid)

# lets generate an SVG file of the first letter on page 1
text_object = [o for o in page
               if o.type == PageObject.OBJ_TYPE_TEXT][0]  # first text object
charcode, _, _ = text_object[0]  # first character of the first text object