def test_it_exits_gracefully_when_no_tables_found():
    pdf_page = fixture(
        "13_06_12_10_36_58_boletim_ingles_junho_2013.pdf").get_page(5)
    table, table_diagnostic_data = page_to_tables(pdf_page)

    assert_equals([], table)
    assert (isinstance(table_diagnostic_data, TableDiagnosticData))
def test_it_can_use_one_hint_argentina_by_size():
    pdf_page = fixture("argentina_diputados_voting_record.pdf").get_page(1)
    table1, _ = page_to_tables(
        pdf_page, ConfigParameters(atomise=False, table_top_hint='Apellido'))
    #table1,_ = getTable(fh, 2)
    assert_equals(32, len(table1))
    assert_equals(4, len(table1[0]))
def test_the_atomise_option_works_on_coceral_p1_by_size():
    pdf_page = fixture(
        "1359397366Final_Coceral grain estimate_2012_December.pdf").get_page(1)
    table, _ = page_to_tables(pdf_page, ConfigParameters(atomise=True))
    #table1, _ = getTable(fh, 2)
    assert_equals(43, len(table))
    assert_equals(31, len(table[0]))
def test_it_can_use_hints_AlmondBoard_p1():
    pdf_page = fixture("2012.01.PosRpt.pdf").get_page(1)
    table, _ = page_to_tables(
        pdf_page,
        ConfigParameters(
            atomise=False,
            table_top_hint=u"% Change",
            table_bottom_hint=u"Uncommited"))
    assert_equals(
    [[u'Salable', u'Million Lbs.', u'Kernel Wt.', u'Kernel Wt.', u'% Change'], 
     [u'1.  Carryin August 1, 2011', u'254.0', u'253,959,411', u'321,255,129', u'-20.95%'], 
     [u'2.  Crop Receipts to Date', u'1,950.0', u'1,914,471,575', u'1,548,685,417', u'23.62%'], 
     [u'3.  [3% Loss and Exempt]', u'58.5', u'57,434,147)(', u'46,460,563(', u')'], 
     [u'4.  New Crop Marketable (2-3)', u'1,891.5', u'1,857,037,428', u'1,502,224,854', u'23.62%'], 
     [u'5.  [Reserve]', u'n/a', u'0', u'0', u''],
     [u'6.  Total Supply (1+4-5)Shipments by Handlers', u'2,145.5', u'2,110,996,839', u'1,823,479,983', u'15.77%'], 
     [u'7.  Domestic', u'555.0', u'265,796,698', u'255,785,794', u'3.91%'], 
     [u'8.  Export', u'1,295.0', u'755,447,255', u'664,175,807', u'13.74%'], 
     [u'9.  Total Shipments', u'1,850.0', u'1,021,243,953', u'919,961,601', u'11.01%'], 
     [u'10.  Forecasted Carryout', u'295.5', u'', u'', u''], 
     [u'11.  Computed Inventory (6-9)Commitments (sold, not delivered)**', u'', u'1,089,752,886', u'903,518,382', u'20.61%'], 
     [u'12.  Domestic', u'', u'214,522,238', u'187,492,263', u'14.42%'], [u'13.  Export', u'', u'226,349,446', u'155,042,764', u'45.99%'],
     [u'14.  Total Commited Shipments', u'', u'440,871,684', u'342,535,027', u'28.71%'], 
     [u'15.  Uncommited Inventory (11-14)', u'', u'648,881,202', u'560,983,355', u'15.67%']]
    , table)
def test_the_atomise_option_works_on_coceral_p1_by_size():
    pdf_page = fixture("1359397366Final_Coceral grain estimate_2012_December.pdf").get_page(1)
    table, _ = page_to_tables(pdf_page,
            ConfigParameters(
                atomise=True))
    #table1, _ = getTable(fh, 2)
    assert_equals(43, len(table))
    assert_equals(31, len(table[0]))
Example #6
0
def test_atomise_does_not_disrupt_table_finding():
    pdf_page = fixture(
        "13_06_12_10_36_58_boletim_ingles_junho_2013.pdf").get_page(4)
    table1, _ = pdftables.page_to_tables(
        pdf_page, ConfigParameters(atomise=True, extend_y=False))
    table2, _ = pdftables.page_to_tables(
        pdf_page, ConfigParameters(atomise=False, extend_y=False))

    assert_equals(table1, table2)
def test_it_can_use_one_hint_argentina_by_size():
    pdf_page = fixture("argentina_diputados_voting_record.pdf").get_page(1)
    table1, _ = page_to_tables(
            pdf_page,
            ConfigParameters(
                atomise=False,
                table_top_hint='Apellido'))
    #table1,_ = getTable(fh, 2)
    assert_equals(32, len(table1))
    assert_equals(4, len(table1[0]))
Example #8
0
def test_it_finds_tables_on_some_pages_CONAB():
    pdf = fixture('13_06_12_10_36_58_boletim_ingles_junho_2013.pdf')
    TestList = [False] * 32
    TestList[5:8] = [True] * 3
    TestList[9:11] = [True] * 2
    TestList[12] = True
    TestList[14] = True
    TestList[16:18] = [True] * 2
    TestList[19:24] = [True] * 5
    TestList[25:30] = [True] * 5

    assert_equals(contains_tables(pdf), TestList)
def test_it_finds_tables_on_some_pages_CONAB():
    pdf = fixture('13_06_12_10_36_58_boletim_ingles_junho_2013.pdf')
    TestList = [False] * 32
    TestList[5:8] = [True] * 3
    TestList[9:11] = [True] * 2
    TestList[12] = True
    TestList[14] = True
    TestList[16:18] = [True] * 2
    TestList[19:24] = [True] * 5
    TestList[25:30] = [True] * 5

    assert_equals(contains_tables(pdf), TestList)
def test_it_can_use_hints_AlmondBoard_p1():
    pdf_page = fixture("2012.01.PosRpt.pdf").get_page(1)
    table, _ = page_to_tables(
        pdf_page,
        ConfigParameters(atomise=False,
                         table_top_hint=u"% Change",
                         table_bottom_hint=u"Uncommited"))
    assert_equals([
        [
            u'Salable', u'Million Lbs.', u'Kernel Wt.', u'Kernel Wt.',
            u'% Change'
        ],
        [
            u'1.  Carryin August 1, 2011', u'254.0', u'253,959,411',
            u'321,255,129', u'-20.95%'
        ],
        [
            u'2.  Crop Receipts to Date', u'1,950.0', u'1,914,471,575',
            u'1,548,685,417', u'23.62%'
        ],
        [
            u'3.  [3% Loss and Exempt]', u'58.5', u'57,434,147)(',
            u'46,460,563(', u')'
        ],
        [
            u'4.  New Crop Marketable (2-3)', u'1,891.5', u'1,857,037,428',
            u'1,502,224,854', u'23.62%'
        ], [u'5.  [Reserve]', u'n/a', u'0', u'0', u''],
        [
            u'6.  Total Supply (1+4-5)Shipments by Handlers', u'2,145.5',
            u'2,110,996,839', u'1,823,479,983', u'15.77%'
        ],
        [u'7.  Domestic', u'555.0', u'265,796,698', u'255,785,794', u'3.91%'],
        [u'8.  Export', u'1,295.0', u'755,447,255', u'664,175,807', u'13.74%'],
        [
            u'9.  Total Shipments', u'1,850.0', u'1,021,243,953',
            u'919,961,601', u'11.01%'
        ], [u'10.  Forecasted Carryout', u'295.5', u'', u'', u''],
        [
            u'11.  Computed Inventory (6-9)Commitments (sold, not delivered)**',
            u'', u'1,089,752,886', u'903,518,382', u'20.61%'
        ], [u'12.  Domestic', u'', u'214,522,238', u'187,492,263', u'14.42%'],
        [u'13.  Export', u'', u'226,349,446', u'155,042,764', u'45.99%'],
        [
            u'14.  Total Commited Shipments', u'', u'440,871,684',
            u'342,535,027', u'28.71%'
        ],
        [
            u'15.  Uncommited Inventory (11-14)', u'', u'648,881,202',
            u'560,983,355', u'15.67%'
        ]
    ], table)
Example #11
0
def _test_sample_pdf(short_filename):
    tables = get_tables_from_document(fixture(short_filename))

    assert_equal(get_expected_number_of_tables(short_filename), len(tables))
    for table_num, table in enumerate(tables):
        table_filename = "{}_{}.txt".format(short_filename, table_num)
        expected_filename = join(EXPECTED_DIR, table_filename)
        actual_filename = join(ACTUAL_DIR, table_filename)

        with open(actual_filename, 'w') as f:
            f.write(to_string(table).encode('utf-8'))

        diff_table_files(expected_filename, actual_filename)
def test_it_includes_page_numbers():
    """
    page_number is 1-indexed, as defined in the PDF format
    table_number is 1-indexed
    """
    doc = fixture('AnimalExampleTables.pdf')
    result = get_tables_from_document(doc)
    assert_equals(result[0].total_pages, 4)
    assert_equals(result[0].page_number, 2)
    assert_equals(result[1].total_pages, 4)
    assert_equals(result[1].page_number, 3)
    assert_equals(result[2].total_pages, 4)
    assert_equals(result[2].page_number, 4)
def test_atomise_does_not_disrupt_table_finding():
    pdf_page = fixture("13_06_12_10_36_58_boletim_ingles_junho_2013.pdf").get_page(4)
    table1, _ = pdftables.page_to_tables(
            pdf_page,
            ConfigParameters(
                atomise=True,
                extend_y=False))
    table2, _ = pdftables.page_to_tables(
            pdf_page,
            ConfigParameters(
                atomise=False,
                extend_y=False))


    assert_equals(table1, table2)
def test_it_returns_the_AlmondBoard_p4_table():
    pdf_page = fixture("2012.01.PosRpt.pdf").get_page(4)
    table, _ = page_to_tables(
        pdf_page,
        ConfigParameters(
            atomise=False,
            extend_y=False))
    assert_equals(
    [[u'Variety Name', u'Total Receipts', u'Total Receipts', u'Total Inedibles', u'Receipts', u'% Rejects'], 
     [u'Aldrich', u'48,455,454', u'49,181,261', u'405,555', u'2.53%', u'0.82%'], 
     [u'Avalon', u'7,920,179', u'8,032,382', u'91,733', u'0.41%', u'1.14%'], 
     [u'Butte', u'151,830,761', u'150,799,510', u'1,054,567', u'7.93%', u'0.70%'], 
     [u'Butte/Padre', u'215,114,812', u'218,784,885', u'1,145,000', u'11.24%', u'0.52%'], 
     [u'Carmel', u'179,525,234', u'178,912,935', u'1,213,790', u'9.38%', u'0.68%'], 
     [u'Carrion', u'507,833', u'358,580', u'2,693', u'0.03%', u'0.75%'], 
     [u'Fritz', u'105,479,433', u'106,650,571', u'1,209,192', u'5.51%', u'1.13%'], 
     [u'Harvey', u'58,755', u'58,755', u'1,416', u'0.00%', u'2.41%'], 
     [u'Hashem', u'430,319', u'430,014', u'1,887', u'0.02%', u'0.44%'], 
     [u'Le Grand', u'0', u'0', u'0', u'0.00%', u'0.00%'], 
     [u'Livingston', u'7,985,535', u'7,926,910', u'186,238', u'0.42%', u'2.35%'], 
     [u'Marchini', u'363,887', u'391,965', u'3,675', u'0.02%', u'0.94%'], 
     [u'Merced', u'65,422', u'66,882', u'1,167', u'0.00%', u'1.74%'], 
     [u'Mission', u'19,097,034', u'18,851,071', u'110,323', u'1.00%', u'0.59%'], 
     [u'Mixed', u'36,358,011', u'36,926,337', u'952,264', u'1.90%', u'2.58%'], 
     [u'Mono', u'757,637', u'689,552', u'6,785', u'0.04%', u'0.98%'], 
     [u'Monterey', u'220,713,436', u'212,746,409', u'2,293,892', u'11.53%', u'1.08%'], 
     [u'Morley', u'822,529', u'825,738', u'6,264', u'0.04%', u'0.76%'], 
     [u'N43', u'156,488', u'85,832', u'340', u'0.01%', u'0.40%'], 
     [u'Neplus', u'1,279,599', u'1,237,532', u'17,388', u'0.07%', u'1.41%'], 
     [u'Nonpareil', u'741,809,844', u'727,286,104', u'5,121,465', u'38.75%', u'0.70%'], 
     [u'Padre', u'62,905,358', u'62,417,565', u'193,168', u'3.29%', u'0.31%'], 
     [u'Peerless', u'5,113,472', u'5,101,245', u'20,792', u'0.27%', u'0.41%'], 
     [u'Price', u'25,312,529', u'25,124,463', u'143,983', u'1.32%', u'0.57%'], 
     [u'Ruby', u'4,163,237', u'4,057,470', u'35,718', u'0.22%', u'0.88%'], 
     [u'Sauret', u'55,864', u'55,864', u'517', u'0.00%', u'0.93%'], 
     [u'Savana', u'389,317', u'390,585', u'2,049', u'0.02%', u'0.52%'], 
     [u'Sonora', u'31,832,025', u'33,184,703', u'387,848', u'1.66%', u'1.17%'], 
     [u'Thompson', u'491,026', u'487,926', u'8,382', u'0.03%', u'1.72%'], 
     [u'Tokyo', u'783,494', u'794,699', u'4,511', u'0.04%', u'0.57%'], 
     [u'Winters', u'5,780,183', u'5,756,167', u'46,211', u'0.30%', u'0.80%'], 
     [u'Wood Colony', u'37,458,735', u'36,331,907', u'189,967', u'1.96%', u'0.52%'], 
     [u'Major Varieties Sub Total:', u'1,913,017,442', u'1,893,945,819', u'14,858,780', u'99.92%', u'0.78%'], 
     [u'Minor Varieties Total:', u'1,454,133', u'1,480,800', u'34,997', u'0.08%', u'2.36%'], 
     [u'Grand Total All Varieties', u'1,914,471,575', u'1,895,426,619', u'14,893,777', u'100.00%', u'0.79%']]
    , table
    )
Example #15
0
def test_it_finds_tables_on_all_pages_AlmondBoard():
    pdf = fixture('2012.01.PosRpt.pdf')
    assert_equals([True, True, True, True, True, True, True],
                  contains_tables(pdf))
Example #16
0
def test_it_finds_no_tables_in_a_pdf_with_no_tables():
    pdf = fixture('m27-dexpeg2-polymer.pdf')
    assert_equals([False, False, False, False, False, False, False, False],
                  contains_tables(pdf))
def test_it_includes_table_numbers():
    doc = fixture('AnimalExampleTables.pdf')
    result = get_tables_from_document(doc)
    assert_equals(result[0].table_number_on_page, 1)
    assert_equals(result[0].total_tables_on_page, 1)
def test_it_does_not_crash_on_m30_p5():
    pdf_page = fixture("m30-JDent36s15-20.pdf").get_page(5)
    table, _ = page_to_tables(pdf_page)
    """Put this in for more aggressive test"""
def test_it_finds_tables_on_all_pages_AlmondBoard():
    pdf = fixture('2012.01.PosRpt.pdf')
    assert_equals(
        [True, True, True, True, True, True, True],
        contains_tables(pdf))
def test_it_finds_no_tables_in_a_pdf_with_no_tables():
    pdf = fixture('m27-dexpeg2-polymer.pdf')
    assert_equals(
        [False, False, False, False, False, False, False, False],
        contains_tables(pdf))
def test_it_does_not_crash_on_m30_p5():
    pdf_page = fixture("m30-JDent36s15-20.pdf").get_page(5)
    table, _ = page_to_tables(pdf_page)
    """Put this in for more aggressive test"""
def test_it_returns_the_AlmondBoard_p4_table():
    pdf_page = fixture("2012.01.PosRpt.pdf").get_page(4)
    table, _ = page_to_tables(pdf_page,
                              ConfigParameters(atomise=False, extend_y=False))
    assert_equals([
        [
            u'Variety Name', u'Total Receipts', u'Total Receipts',
            u'Total Inedibles', u'Receipts', u'% Rejects'
        ],
        [
            u'Aldrich', u'48,455,454', u'49,181,261', u'405,555', u'2.53%',
            u'0.82%'
        ],
        [u'Avalon', u'7,920,179', u'8,032,382', u'91,733', u'0.41%', u'1.14%'],
        [
            u'Butte', u'151,830,761', u'150,799,510', u'1,054,567', u'7.93%',
            u'0.70%'
        ],
        [
            u'Butte/Padre', u'215,114,812', u'218,784,885', u'1,145,000',
            u'11.24%', u'0.52%'
        ],
        [
            u'Carmel', u'179,525,234', u'178,912,935', u'1,213,790', u'9.38%',
            u'0.68%'
        ], [u'Carrion', u'507,833', u'358,580', u'2,693', u'0.03%', u'0.75%'],
        [
            u'Fritz', u'105,479,433', u'106,650,571', u'1,209,192', u'5.51%',
            u'1.13%'
        ], [u'Harvey', u'58,755', u'58,755', u'1,416', u'0.00%', u'2.41%'],
        [u'Hashem', u'430,319', u'430,014', u'1,887', u'0.02%', u'0.44%'],
        [u'Le Grand', u'0', u'0', u'0', u'0.00%', u'0.00%'],
        [
            u'Livingston', u'7,985,535', u'7,926,910', u'186,238', u'0.42%',
            u'2.35%'
        ], [u'Marchini', u'363,887', u'391,965', u'3,675', u'0.02%', u'0.94%'],
        [u'Merced', u'65,422', u'66,882', u'1,167', u'0.00%', u'1.74%'],
        [
            u'Mission', u'19,097,034', u'18,851,071', u'110,323', u'1.00%',
            u'0.59%'
        ],
        [
            u'Mixed', u'36,358,011', u'36,926,337', u'952,264', u'1.90%',
            u'2.58%'
        ], [u'Mono', u'757,637', u'689,552', u'6,785', u'0.04%', u'0.98%'],
        [
            u'Monterey', u'220,713,436', u'212,746,409', u'2,293,892',
            u'11.53%', u'1.08%'
        ], [u'Morley', u'822,529', u'825,738', u'6,264', u'0.04%', u'0.76%'],
        [u'N43', u'156,488', u'85,832', u'340', u'0.01%', u'0.40%'],
        [u'Neplus', u'1,279,599', u'1,237,532', u'17,388', u'0.07%', u'1.41%'],
        [
            u'Nonpareil', u'741,809,844', u'727,286,104', u'5,121,465',
            u'38.75%', u'0.70%'
        ],
        [
            u'Padre', u'62,905,358', u'62,417,565', u'193,168', u'3.29%',
            u'0.31%'
        ],
        [
            u'Peerless', u'5,113,472', u'5,101,245', u'20,792', u'0.27%',
            u'0.41%'
        ],
        [
            u'Price', u'25,312,529', u'25,124,463', u'143,983', u'1.32%',
            u'0.57%'
        ], [
            u'Ruby', u'4,163,237', u'4,057,470', u'35,718', u'0.22%', u'0.88%'
        ], [u'Sauret', u'55,864', u'55,864', u'517', u'0.00%', u'0.93%'],
        [u'Savana', u'389,317', u'390,585', u'2,049', u'0.02%', u'0.52%'],
        [
            u'Sonora', u'31,832,025', u'33,184,703', u'387,848', u'1.66%',
            u'1.17%'
        ], [u'Thompson', u'491,026', u'487,926', u'8,382', u'0.03%', u'1.72%'],
        [u'Tokyo', u'783,494', u'794,699', u'4,511', u'0.04%', u'0.57%'],
        [
            u'Winters', u'5,780,183', u'5,756,167', u'46,211', u'0.30%',
            u'0.80%'
        ],
        [
            u'Wood Colony', u'37,458,735', u'36,331,907', u'189,967', u'1.96%',
            u'0.52%'
        ],
        [
            u'Major Varieties Sub Total:', u'1,913,017,442', u'1,893,945,819',
            u'14,858,780', u'99.92%', u'0.78%'
        ],
        [
            u'Minor Varieties Total:', u'1,454,133', u'1,480,800', u'34,997',
            u'0.08%', u'2.36%'
        ],
        [
            u'Grand Total All Varieties', u'1,914,471,575', u'1,895,426,619',
            u'14,893,777', u'100.00%', u'0.79%'
        ]
    ], table)
def test_it_copes_with_CONAB_p8():
    pdf_page = fixture(
        "13_06_12_10_36_58_boletim_ingles_junho_2013.pdf").get_page(8)
    table, _ = page_to_tables(pdf_page, ConfigParameters(atomise=True))
def test_it_returns_the_AlmondBoard_p2_table_by_size():
    pdf_page = fixture("2012.01.PosRpt.pdf").get_page(2)
    table1, _ = page_to_tables(pdf_page, ConfigParameters(atomise=False))
    #table1, _ = getTable(fh, 2)
    assert_equals(78, len(table1))
    assert_equals(9, len(table1[0]))
def test_it_exits_gracefully_when_no_tables_found():
    pdf_page = fixture("13_06_12_10_36_58_boletim_ingles_junho_2013.pdf").get_page(5)
    table, table_diagnostic_data = page_to_tables(pdf_page)

    assert_equals([],table)
    assert(isinstance(table_diagnostic_data, TableDiagnosticData))
def test_it_copes_with_CONAB_p8():
    pdf_page = fixture("13_06_12_10_36_58_boletim_ingles_junho_2013.pdf").get_page(8)
    table, _ = page_to_tables(pdf_page, ConfigParameters(atomise=True))
def test_it_returns_the_AlmondBoard_p2_table_by_size():
    pdf_page = fixture("2012.01.PosRpt.pdf").get_page(2)
    table1, _ = page_to_tables(pdf_page, ConfigParameters(atomise=False))
    #table1, _ = getTable(fh, 2)
    assert_equals(78, len(table1))
    assert_equals(9, len(table1[0]))
Example #28
0
 def setUp(self):
     fxt = fixtures.fixture()
     self.lns1 = fxt.lns_1()
     self.lns2 = fxt.lns_2()
     self.lns3 = fxt.lns_3()
     self.lns19358 = fxt.setup_19358_lns()