def parse_pdf(path, pages=134): global LastBOL try: pdf_file = open(path, 'rb') #Create PDF Parser rsrcmanager = PDFResourceManager() PDFPageAgg = PDFPageAggregator(rsrcmanager, laparams=LAParams()) interpreter = PDFPageInterpreter(rsrcmanager, PDFPageAgg) for n, page in enumerate( PDFPage.get_pages(pdf_file, maxpages=pages, password="", caching=True)): n += 1 print('###### page ' + str(n)) myTEUs = 0 myWeight = 0 interpreter.process_page(page) # receive the LTPage object for the page. LTPage_layout = PDFPageAgg.get_result() myBOL = get_BOL_from_elements(LTPage_layout) if not myBOL: print("Empty Page " + "\n") continue if LastBOL == myBOL: myTEUs, myWeight = get_TEUs_from_elements(LTPage_layout) if myTEUs == 0: print("Continue same BOL: " + myBOL + "\n") continue else: myManifest.TEUs = myTEUs myManifest.WeightORG_Tonne = myWeight else: myManifest = Manifest() myManifest.BOL = myBOL get_text_from_elements(LTPage_layout, myManifest) if myManifest.TEUs > 0: myManifest.SetCalculatedValues() myManifest.myPrint() Manifests.append(myManifest.get_list()) print("-------------------------------------") else: print("Continue same BOL: " + myBOL + "\n") myTEUs = 0 LastBOL = myBOL pdf_file.close() PDFPageAgg.close() except Exception: traceback.print_exc()