def parse_pdf(path): fp = open('ICCV.pdf', 'rb') parser = PDFParser(fp) doc = PDFDocument() parser.set_document(doc) doc.set_parser(parser) doc.initialize() if not doc.is_extractable(): raise PDFTextExtractionNotAllowed else: rsrcmgr = PDFResourceManager() laparams = LAParams() device = PDFPageInterpreter(rsrcmgr, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) for page in doc.get_pages(): interpreter.process_page(page) layout = device.get_result() for x in layout: if (isinstance(x, LTTextBoxHorizontal)): with open(r'test.txt', 'a') as f: results = x.get_text() print(results) f.write(results + '\n')