Example #1
0
    def _convert_pdf_to_text(self, password=None):
    	input_pdf = self.cvFile
    	if password is not None:
	    self.cvFilePasswd = password
    	pagenos = range(0, 30)
    	maxpages = pagenos.__len__()
    	layoutmode = 'normal'
    	codec = 'utf-8'
    	scale = 1
    	outtype = 'txt'
    	laparams = LAParams()
    	laparams.all_texts = True
    	laparams.showpageno = True
    	outputPath = self.scratchDir
    	inputPath = os.getcwd()
    	if os.path.exists(input_pdf):
            inputPath = os.path.dirname(input_pdf)
    	input_filename = os.path.basename(input_pdf)
    	input_parts = input_filename.split(".")
    	input_parts.pop()
	randomStr = int(time.time())
    	output_filename = outputPath + os.path.sep + ".".join(input_parts) + randomStr.__str__() + r".txt"
	self.cvTextFile = output_filename
	outfp = file(output_filename, 'w')
    	rsrcmgr = PDFResourceManager()
    	device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams)
    	fp = file(input_pdf, 'rb')
    	process_pdf(rsrcmgr, device, fp, pagenos, maxpages=maxpages, password=self.cvFilePasswd, check_extractable=True)
    	fp.close()
    	device.close()
    	outfp.close()
    	return (0)