def render_pdf(pdf_filename): with open(pdf_filename, "rb") as fd: doc = PDFDocument.from_fileobj(fd) for page_number, page in enumerate(doc.get_pages()): svg_file = "svgs/{0}_{1:02d}.svg".format(basename(pdf_filename), page_number) png_file = "pngs/{0}_{1:02d}.png".format(basename(pdf_filename), page_number) table_container = page_to_tables(page) annotations = make_annotations(table_container) render_page(pdf_filename, page_number, annotations, svg_file, png_file) print "Rendered", svg_file, png_file
def render_pdf(pdf_filename): with open(pdf_filename, "rb") as fd: doc = PDFDocument.from_fileobj(fd) for page_number, page in enumerate(doc.get_pages()): svg_file = 'svgs/{0}_{1:02d}.svg'.format(basename(pdf_filename), page_number) png_file = 'pngs/{0}_{1:02d}.png'.format(basename(pdf_filename), page_number) table_container = page_to_tables(page) annotations = make_annotations(table_container) render_page(pdf_filename, page_number, annotations, svg_file, png_file) print "Rendered", svg_file, png_file
from pdftables.pdf_document import PDFDocument as pdfdoc from pdftables.pdftables import page_to_tables from pdftables.display import to_string filepath = 'irregular-verbs-de.pdf' fileobj = open(filepath, 'rb') doc = pdfdoc.from_fileobj(fileobj) page = doc.get_page(0) tables = page_to_tables(page) for table in tables: print to_string(table.data)