def plotAllPages(fh): # tol = 5 # This is the tolerance for histogram rounding fig_list = [] ax1_list = [] pdf = PDFDocument(fh) print "Created by: %s" % pdf.get_creator() # print SelectedPDF # flt = 'LTTextLineHorizontal' # flt = ['LTPage','LTTextLineHorizontal'] flt = ["LTPage", "LTFigure", "LTLine", "LTRect", "LTImage", "LTTextLineHorizontal", "LTCurve"] # flt = ['LTPage','LTChar'] for i, page in enumerate(pdf.get_pages()): # page = next(doc.get_pages()) # layout = page.layout() # LTPage box_list = LeafList().populate(page, interested=flt) ModalHeight = pdftables.calculate_modal_height(box_list) diagnostic_data = pdftables.TableDiagnosticData(box_list, {}, {}, [], []) fig, ax1 = plotpage(diagnostic_data) fig_list.append(fig) ax1_list.append(ax1) title = "page %d" % (i + 1) fig.suptitle(title) # print "Page %d" % (i+1), ElementCount print box_list.count() print "Modal character height: %d" % ModalHeight return fig_list, ax1_list
def plotAllPages(fh): #tol = 5 # This is the tolerance for histogram rounding fig_list = [] ax1_list = [] pdf = PDFDocument(fh) print "Created by: %s" % pdf.get_creator() # print SelectedPDF #flt = 'LTTextLineHorizontal' #flt = ['LTPage','LTTextLineHorizontal'] flt = [ 'LTPage', 'LTFigure', 'LTLine', 'LTRect', 'LTImage', 'LTTextLineHorizontal', 'LTCurve' ] # flt = ['LTPage','LTChar'] for i, page in enumerate(pdf.get_pages()): # page = next(doc.get_pages()) #layout = page.layout() # LTPage box_list = LeafList().populate(page, interested=flt) ModalHeight = pdftables.calculate_modal_height(box_list) diagnostic_data = pdftables.TableDiagnosticData( box_list, {}, {}, [], []) fig, ax1 = plotpage(diagnostic_data) fig_list.append(fig) ax1_list.append(ax1) title = "page %d" % (i + 1) fig.suptitle(title) #print "Page %d" % (i+1), ElementCount print box_list.count() print "Modal character height: %d" % ModalHeight return fig_list, ax1_list
def get_pdf_page(fh, pagenumber): pdf = PDFDocument(fh) return pdf.get_pages()[pagenumber - 1]