def test_document(): doc = Document(res('sample.pdf')) assert len(doc) == 15 assert doc.meta['Title'] == 'Red Stork' str(doc) # should not crash
def test_page_smoke(): doc = Document(res('sample.pdf')) page = doc[0] assert page.media_box == (0., 0., 612., 792.) assert page.crop_box == (0., 0., 612., 792.)
def test_flatiter(): '''used to crash''' doc = Document(res('arxiv1901.10092.pdf')) text = [] for obj in doc[0].flat_iter(): if obj.type == PageObject.OBJ_TYPE_TEXT: text.append(obj.text) # should not crash assert ''.join(text)[-20:] == 're.com/naturephysics'
def test_document_save(): with tempfile.TemporaryDirectory() as d: doc = Document(res('sample.pdf')) fname = f'{d}/temp.pdf' doc.save(fname) doc2 = Document(fname) assert len(doc) == len(doc2) assert doc2.meta['Title'] == 'Red Stork'
def test_document_edit_meta(): with tempfile.TemporaryDirectory() as d: doc = Document(res('sample.pdf')) doc.meta['Title'] = 'Best PDF Parsing tool in this world!' fname = f'{d}/temp.pdf' doc.save(fname) doc2 = Document(fname) assert len(doc) == len(doc2) assert doc2.meta['Title'] == 'Best PDF Parsing tool in this world!'
def test_page_label(): doc = Document(res('sample.pdf')) assert doc[2].label == 'i' assert doc[-3].label == '9'
from redstork import Document, PageObject, Glyph from redstork.test import res doc = Document(res('sample.pdf')) print('Number of pages:', len(doc)) print('MediaBox of the first page is:', doc[0].media_box) print('Rotation of the first page is:', doc[0].rotation) print('Document title:', doc.meta['Title']) print('First page has', len(doc[0]), 'objects') doc[0].render('page-0.ppm', scale=2) # render page #1 as image page = doc[0] for o in page: if o.type == PageObject.OBJ_TYPE_TEXT: for code, _, _ in o: print(o.font[code], end='') print() for fid, font in doc.fonts.items(): print(font.simple_name, fid) # lets generate an SVG file of the first letter on page 1 text_object = [o for o in page if o.type == PageObject.OBJ_TYPE_TEXT][0] # first text object charcode, _, _ = text_object[0] # first character of the first text object