def test_it_includes_page_numbers(): """ page_number is 1-indexed, as defined in the PDF format table_number is 1-indexed """ doc = fixture('AnimalExampleTables.pdf') result = get_tables_from_document(doc) assert_equals(result[0].total_pages, 4) assert_equals(result[0].page_number, 2) assert_equals(result[1].total_pages, 4) assert_equals(result[1].page_number, 3) assert_equals(result[2].total_pages, 4) assert_equals(result[2].page_number, 4)
flights.append(FileFlights(url, date)) # remove all files and create downloads directory if os.path.exists('downloads/'): os.popen('rm -f downloads/*') else: os.mkdir('downloads') # download all files for flight in flights: filename = wget.download(flight.url, 'downloads') filename = wget.filename_from_url(flight.url) filepath = 'downloads/' + filename fileobj = open(filepath,'rb') doc = PDFDocument(fileobj) result = get_tables_from_document(doc) # print result for r in result: for i in r: # print i res = [x for x in i if '' not in i] if len(res) > 0: print len(res), res # print [t.encode('utf-8') for t in i] # print i # a = [] # for t in i: # print t.encode('utf-8') # print a
def test_it_includes_table_numbers(): doc = fixture('AnimalExampleTables.pdf') result = get_tables_from_document(doc) assert_equals(result[0].table_number_on_page, 1) assert_equals(result[0].total_tables_on_page, 1)