Exemple #1
0
    def get(self):

        # Obtenemos el captcha
        url = "http://consultas.curp.gob.mx/CurpSP/imagenCatcha"
        file = StringIO(urllib.urlopen(url).read())
        original = Image.open(file)

        # Convertimos formato PIL a CV2
        cv_img = np.asarray(original)[:, :, ::].copy()

        # Convertimos imagen a scala de grises.
        gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)

        # Aplicamos filtro Canny para eliminar lineas.
        edges = cv2.Canny(gray, 60, 200, apertureSize=3)

        # Obtenemos las lineas.
        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 1, None, 0, 0)

        # Dibujamos las lineas encontradas en color blanco.
        for x1, y1, x2, y2 in lines[0]:
            cv2.line(cv_img, (x1, y1), (x2, y2), (255, 255, 255), 2)

        # Creamos una copia de nuestra imagen limpia sin lineas.
        processed = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)

        # Aplicamos un desenfoque gaussiano.
        blur = cv2.GaussianBlur(processed, (3, 3), 0)

        # Aplicamos threshold.
        threshold = cv2.threshold(blur, 128, 255, cv2.THRESH_BINARY)[1]

        # Aplicamos transformación morfologica.
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6, 6))
        morph = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, kernel)

        # Convertimos nuestra imagen final procesada a PIL.
        pil_img = Image.fromarray(morph)

        # Iniciamos tesseract y leemos la imagen.
        tesseract = tesserwrap.tesseract()
        tesseract.set_variable("tessedit_char_whitelist",
                               "0123456789abcdefghijklmnopqrstuvwxyz")
        tesseract.set_page_seg_mode(8)
        text = tesseract.ocr_image(pil_img)
        self.write(text.strip())
Exemple #2
0
#!/usr/bin/env python
#-*- coding:utf-8 -*-

import sys
import StringIO

import requests
import PIL.Image
import tesserwrap

#: https://github.com/gregjurman/tesserwrap
tesseract = tesserwrap.tesseract()


def distinguish_captcha(image_url, show_origin_image=True):
    #: preprocess
    image_bytes = requests.get(image_url).content
    origin_image = PIL.Image.open(StringIO.StringIO(image_bytes))
    image = origin_image.point(lambda p: p * 1.5)\
        .point(lambda p: 255 if p > 200 else 0)\
        .convert("1")
    #: distinguish the text
    text = tesseract.ocr_image(image)
    #: show the origin image
    if show_origin_image:
        origin_image.show()
    return text.strip()


def main():
    url = raw_input("Please input the url of captcha:\n > ").strip()
 def test_deprecator(self):
     tr = tesserwrap.tesseract()
 def test_deprecator(self):
     tr = tesserwrap.tesseract()
Exemple #5
0
 def __init__(self):
     self.tw = tesseract('./', 'dod')
 def __init__(self):
     self.tw = tesseract('./', 'dod')