Ejemplo n.º 1
0
def test_it_includes_page_numbers():
    """
    page_number is 1-indexed, as defined in the PDF format
    table_number is 1-indexed
    """
    doc = fixture('AnimalExampleTables.pdf')
    result = get_tables_from_document(doc)
    assert_equals(result[0].total_pages, 4)
    assert_equals(result[0].page_number, 2)
    assert_equals(result[1].total_pages, 4)
    assert_equals(result[1].page_number, 3)
    assert_equals(result[2].total_pages, 4)
    assert_equals(result[2].page_number, 4)
Ejemplo n.º 2
0
            flights.append(FileFlights(url, date))

    # remove all files and create downloads directory
    if os.path.exists('downloads/'):
        os.popen('rm -f downloads/*')
    else:
        os.mkdir('downloads')

    # download all files
    for flight in flights:
        filename = wget.download(flight.url, 'downloads')
        filename = wget.filename_from_url(flight.url)
        filepath = 'downloads/' + filename
        fileobj = open(filepath,'rb')
        doc = PDFDocument(fileobj)
        result = get_tables_from_document(doc)
        # print result
        for r in result:

            for i in r:
                # print i
                res = [x for x in i if '' not in i]
                if len(res) > 0:
                    print len(res), res
                    # print [t.encode('utf-8') for t in i]
                # print i
                # a = []
                # for t in i:
                #    print t.encode('utf-8')
                # print a
Ejemplo n.º 3
0
def test_it_includes_table_numbers():
    doc = fixture('AnimalExampleTables.pdf')
    result = get_tables_from_document(doc)
    assert_equals(result[0].table_number_on_page, 1)
    assert_equals(result[0].total_tables_on_page, 1)