def getText(image): chatfont = FontData("w:/app/poker/ps-chat.fontd") for top,base,bottom in scrape.guess_lines(image): def hasInk(a,b): return image.getpixel((a,b+top+1)) == 0 yield scrape.getstring(w, bottom-top, chatfont, hasInk, )#train=True)
def glyphs(img, font, cutoff=1, train=False): """ This is slightly more "advanced" OCR that I used when trying to re-assemble joe's book It attempts to deal with anti-aliased text. However, the method it uses is rather silly. generates (x,y, width, height, char) tuples for each glyph in an image. glyphs may contain several characters because this system cannot compensate for ligatures/kerning. This doesn't really work too well yet. """ for top, baseLine, bottom in guess_lines(img.convert("1")): scanH = top - bottom # 16 #top-bottom fakeTop = top # baseLine - 14 fakeBot = bottom # baseLine + 3 def hasInk(a, b): return img.getpixel((a, b + fakeTop)) <= cutoff # re-alias for clarity w, h = img.size x = 0 for charW, bmp in scan_line(w, scanH, hasInk): if bmp > 0: icon = iconify(img.copy().crop((x, top, x + charW, top + scanH))) code = icon2num(icon) if code in font: char = font[code] elif train: print bmp print convert.glint_to_strings(bmp, scanH) char = raw_input("what is it?") font[code] = char else: char = code yield x, top, charW, scanH, char elif bmp == 0 and not train: yield x, top, charW, scanH, " " x += charW