Ejemplo n.º 1
0
def plotAllPages(fh):
    #tol = 5 # This is the tolerance for histogram rounding

    fig_list = []
    ax1_list = []

    pdf = PDFDocument(fh)
    print "Created by: %s" % pdf.get_creator()
    # print SelectedPDF
    #flt = 'LTTextLineHorizontal'
    #flt = ['LTPage','LTTextLineHorizontal']
    flt = [
        'LTPage', 'LTFigure', 'LTLine', 'LTRect', 'LTImage',
        'LTTextLineHorizontal', 'LTCurve'
    ]
    # flt = ['LTPage','LTChar']
    for i, page in enumerate(pdf.get_pages()):
        # page = next(doc.get_pages())

        #layout = page.layout()  # LTPage
        box_list = LeafList().populate(page, interested=flt)

        ModalHeight = pdftables.calculate_modal_height(box_list)

        diagnostic_data = pdftables.TableDiagnosticData(
            box_list, {}, {}, [], [])

        fig, ax1 = plotpage(diagnostic_data)
        fig_list.append(fig)
        ax1_list.append(ax1)

        title = "page %d" % (i + 1)
        fig.suptitle(title)
        #print "Page %d" % (i+1), ElementCount
        print box_list.count()
        print "Modal character height: %d" % ModalHeight

    return fig_list, ax1_list