Example #1
0
def _test_sample_pdf(short_filename):
    tables = get_tables_from_document(fixture(short_filename))

    assert_equal(get_expected_number_of_tables(short_filename), len(tables))
    for table_num, table in enumerate(tables):
        table_filename = "{}_{}.txt".format(short_filename, table_num)
        expected_filename = join(EXPECTED_DIR, table_filename)
        actual_filename = join(ACTUAL_DIR, table_filename)

        with open(actual_filename, 'w') as f:
            f.write(to_string(table).encode('utf-8'))

        diff_table_files(expected_filename, actual_filename)
def _test_sample_pdf(short_filename):
    with open(join(SAMPLE_DIR, short_filename), "rb") as f:
        tables = get_tables(f)

    assert_equal(get_expected_number_of_tables(short_filename), len(tables))
    for table_num, table in enumerate(tables):
        table_filename = "{}_{}.txt".format(short_filename, table_num)
        expected_filename = join(EXPECTED_DIR, table_filename)
        actual_filename = join(ACTUAL_DIR, table_filename)

        with open(actual_filename, "w") as f:
            f.write(to_string(table).encode("utf-8"))

        diff_table_files(expected_filename, actual_filename)
Example #3
0
from pdftables.pdf_document import PDFDocument as pdfdoc
from pdftables.pdftables import page_to_tables
from pdftables.display import to_string

filepath = 'irregular-verbs-de.pdf'
fileobj = open(filepath, 'rb')

doc = pdfdoc.from_fileobj(fileobj)

page = doc.get_page(0)
tables = page_to_tables(page)
for table in tables:
    print to_string(table.data)
Example #4
0
from pdftables.pdf_document import PDFDocument as pdfdoc
from pdftables.pdftables import page_to_tables
from pdftables.display import to_string


filepath = 'irregular-verbs-de.pdf'
fileobj = open(filepath, 'rb')

doc = pdfdoc.from_fileobj(fileobj)

page = doc.get_page(0)
tables = page_to_tables(page)
for table in tables:
  print to_string(table.data)