Ejemplo n.º 1
0
def plotAllPages(fh):
    # tol = 5 # This is the tolerance for histogram rounding

    fig_list = []
    ax1_list = []

    pdf = PDFDocument(fh)
    print "Created by: %s" % pdf.get_creator()
    # print SelectedPDF
    # flt = 'LTTextLineHorizontal'
    # flt = ['LTPage','LTTextLineHorizontal']
    flt = ["LTPage", "LTFigure", "LTLine", "LTRect", "LTImage", "LTTextLineHorizontal", "LTCurve"]
    # flt = ['LTPage','LTChar']
    for i, page in enumerate(pdf.get_pages()):
        # page = next(doc.get_pages())

        # layout = page.layout()  # LTPage
        box_list = LeafList().populate(page, interested=flt)

        ModalHeight = pdftables.calculate_modal_height(box_list)

        diagnostic_data = pdftables.TableDiagnosticData(box_list, {}, {}, [], [])

        fig, ax1 = plotpage(diagnostic_data)
        fig_list.append(fig)
        ax1_list.append(ax1)

        title = "page %d" % (i + 1)
        fig.suptitle(title)
        # print "Page %d" % (i+1), ElementCount
        print box_list.count()
        print "Modal character height: %d" % ModalHeight

    return fig_list, ax1_list
Ejemplo n.º 2
0
def get_tables(fh):
    """
    Return a list of 'tables' from the given file handle, where a table is a
    list of rows, and a row is a list of strings.
    """
    pdf = PDFDocument(fh)
    return get_tables_from_document(pdf)
Ejemplo n.º 3
0
def plotAllPages(fh):
    #tol = 5 # This is the tolerance for histogram rounding

    fig_list = []
    ax1_list = []

    pdf = PDFDocument(fh)
    print "Created by: %s" % pdf.get_creator()
    # print SelectedPDF
    #flt = 'LTTextLineHorizontal'
    #flt = ['LTPage','LTTextLineHorizontal']
    flt = [
        'LTPage', 'LTFigure', 'LTLine', 'LTRect', 'LTImage',
        'LTTextLineHorizontal', 'LTCurve'
    ]
    # flt = ['LTPage','LTChar']
    for i, page in enumerate(pdf.get_pages()):
        # page = next(doc.get_pages())

        #layout = page.layout()  # LTPage
        box_list = LeafList().populate(page, interested=flt)

        ModalHeight = pdftables.calculate_modal_height(box_list)

        diagnostic_data = pdftables.TableDiagnosticData(
            box_list, {}, {}, [], [])

        fig, ax1 = plotpage(diagnostic_data)
        fig_list.append(fig)
        ax1_list.append(ax1)

        title = "page %d" % (i + 1)
        fig.suptitle(title)
        #print "Page %d" % (i+1), ElementCount
        print box_list.count()
        print "Modal character height: %d" % ModalHeight

    return fig_list, ax1_list
Ejemplo n.º 4
0
def get_pdf_page(fh, pagenumber):
    pdf = PDFDocument(fh)
    return pdf.get_pages()[pagenumber - 1]
Ejemplo n.º 5
0
def get_pdf_page(fh, pagenumber):
    pdf = PDFDocument(fh)
    return pdf.get_pages()[pagenumber - 1]