Example #1
0
def parse_pdf(path, pages=134):
    global LastBOL

    try:
        pdf_file = open(path, 'rb')

        #Create PDF Parser
        rsrcmanager = PDFResourceManager()
        PDFPageAgg = PDFPageAggregator(rsrcmanager, laparams=LAParams())
        interpreter = PDFPageInterpreter(rsrcmanager, PDFPageAgg)
        for n, page in enumerate(
                PDFPage.get_pages(pdf_file,
                                  maxpages=pages,
                                  password="",
                                  caching=True)):
            n += 1
            print('###### page ' + str(n))
            myTEUs = 0
            myWeight = 0
            interpreter.process_page(page)
            # receive the LTPage object for the page.
            LTPage_layout = PDFPageAgg.get_result()
            myBOL = get_BOL_from_elements(LTPage_layout)
            if not myBOL:
                print("Empty Page " + "\n")
                continue

            if LastBOL == myBOL:
                myTEUs, myWeight = get_TEUs_from_elements(LTPage_layout)
                if myTEUs == 0:
                    print("Continue same BOL: " + myBOL + "\n")
                    continue
                else:
                    myManifest.TEUs = myTEUs
                    myManifest.WeightORG_Tonne = myWeight

            else:
                myManifest = Manifest()
                myManifest.BOL = myBOL
                get_text_from_elements(LTPage_layout, myManifest)

            if myManifest.TEUs > 0:
                myManifest.SetCalculatedValues()
                myManifest.myPrint()
                Manifests.append(myManifest.get_list())
                print("-------------------------------------")
            else:
                print("Continue same BOL: " + myBOL + "\n")
            myTEUs = 0
            LastBOL = myBOL

        pdf_file.close()
        PDFPageAgg.close()
    except Exception:
        traceback.print_exc()