Python readOCR Examples

Programming Language: Python

Namespace/Package Name: hocrReader

Method/Function: readOCR

Examples at hotexamples.com: 3

Python readOCR - 3 examples found. These are the top rated real world Python examples of hocrReader.readOCR extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: writePDF.py Project: zhenjiezhang/online-ocr

def ocr2PDF(ocrFile):

	zoomRatio=10

	p, lines, words=readOCR(ocrFile)

	c=canvas.Canvas('static/test.pdf', bottomup=0,pagesize=(p.right/zoomRatio,p.bottom/zoomRatio))

	# font=ImageFont.truetype('TimesNewRoman', size=10)
	# font=ImageFont.load('Helvetica')


	for l in lines:

		for w in l.words:
			textWidth = stringWidth(w.text, 'Helvetica', 10)
			print w.text
			if textWidth==0:
				continue
			fontSize=round(10.0*(w.right-w.left)/zoomRatio/textWidth)
			c.setFont('Helvetica',fontSize)

			c.drawString(w.left/zoomRatio,l.bottom/zoomRatio, w.text)

	c.save()

Example #2

Show file

File: ocr.py Project: zhenjiezhang/online-ocr

def refine_process():
    lang = "eng"

    image = Image.open(workFile)

    pages, lines, words = readOCR(outputFile + ".html")

    resp = ""
    for n in xrange(len(lines)):
        l = lines[n]
        tmpImage = "static/tmp/lineImage" + str(n) + ".png"
        tmpXML = "static/tmp/lineImage" + str(n)

        lineImage = image.crop((l.left, l.top, l.right, l.bottom)).convert("RGB")
        w = l.right - l.left
        h = l.bottom - l.top

        scale = 1

        lineImage = lineImage.resize((int(w * scale), int(h * scale)), Image.ANTIALIAS)
        lineImage.filter(ImageFilter.SMOOTH).filter(ImageFilter.BLUR)

        ImageEnhance.Contrast(lineImage)

        margin = 500
        boxImage = Image.new("L", (lineImage.size[0] + margin, lineImage.size[1] + margin), "white")
        boxImage.paste(lineImage, (margin / 2, margin / 2))
        # boxImage=boxImage.convert('L')
        # boxImage=boxImage.point(lambda i: i>180 and 255)

        # boxImage=boxImage.resize((boxImage.size[0],boxImage.size[1]), Image.ANTIALIAS)

        # boxImage=boxImage.filter(ImageFilter.SHARPEN)

        boxImage.save(tmpImage)
        command = ["tesseract", tmpImage, tmpXML, "-l", lang, "hocr"]

        proc = sp.Popen(command, stderr=sp.PIPE)
        proc.wait()

        linePage, lineLines, lineWords = readOCR(tmpXML + ".html")
        for ll in lineLines:
            for w in ll.words:
                resp += w.text + " "
            resp += "\n"
    return resp

Example #3

Show file

File: ocr.py Project: zhenjiezhang/online-ocr

def process_image(image, lang="eng"):
    image.save(inputFile)

    image = pre_process(image).convert("RGB")
    image.filter(ImageFilter.SMOOTH)

    image.save(workFile)

    crude_process(lang=lang)

    pages, lines, words = readOCR(outputFile + ".html")

    resp = ""
    for l in lines:
        # resp+='<p>'
        for w in l.words:
            resp += w.text + " "
        # resp+='</p>'
        resp += "\n"
    return resp

    return resp