def _convert_pdf_to_text(self, password=None): input_pdf = self.cvFile if password is not None: self.cvFilePasswd = password pagenos = range(0, 30) maxpages = pagenos.__len__() layoutmode = 'normal' codec = 'utf-8' scale = 1 outtype = 'txt' laparams = LAParams() laparams.all_texts = True laparams.showpageno = True outputPath = self.scratchDir inputPath = os.getcwd() if os.path.exists(input_pdf): inputPath = os.path.dirname(input_pdf) input_filename = os.path.basename(input_pdf) input_parts = input_filename.split(".") input_parts.pop() randomStr = int(time.time()) output_filename = outputPath + os.path.sep + ".".join(input_parts) + randomStr.__str__() + r".txt" self.cvTextFile = output_filename outfp = file(output_filename, 'w') rsrcmgr = PDFResourceManager() device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams) fp = file(input_pdf, 'rb') process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=self.cvFilePasswd, check_extractable=True) fp.close() device.close() outfp.close() return (0)