Python SetCvImageの例、tesseract.SetCvImage Pythonの例

コード例 #1

0

ファイルを表示

    def leerPlaca(self, binarizado):
        """
        Args:
            img: imagen que contiene el texto, obtenida con la
                funcion cv2.imread.
        Returns:
            El string del texto en la imagen.
        """
        cvmat_image = cv.fromarray(binarizado)
        imgbincv = cv.GetImage(cvmat_image)
        # GetImage fuente de un error, revisar

        # Configuración del tesseract
        api = tesseract.TessBaseAPI()
        api.Init(".", "eng", tesseract.OEM_DEFAULT)
        api.SetVariable('tessedit_char_whitelist',
                        '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
        # PSM_SINGLE_CHAR para lectura caracter a caracter
        api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR)

        # Enviando imagen binarizada al tesseract
        tesseract.SetCvImage(imgbincv, api)
        text = api.GetUTF8Text()
        text = re.sub(r'\W+', '', text)
        return text

コード例 #2

0

ファイルを表示

def ocr():
    import cv2.cv as cv
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)

    image = cv.LoadImage("eurotext.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE)
    tesseract.SetCvImage(image, api)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    print(text, len(text))
    print("Cofidence Level: %d %%" % conf)
    print("Confidences of All words")
    header("Method 1", "*" * 10)
    confOfText = api.AllWordConfidences()

    print(confOfText)
    print("Number of Words:")
    print("counted by tesseract: %d" % len(confOfText))
    print("counted by me: %d[%d]" % (countWords(text), countWords2(text)))
    if len(confOfText) != countWords(text):
        print("Why the words counted by tesseract are different from mine!!!!")
    header("Method 2", "*" * 10)
    confs = tesseract.AllWordConfidences(api)
    print(confs, len(confs))

コード例 #3

0

ファイルを表示

ファイル: ocr_server.py プロジェクト: ericyao2013/ROBOT-SubjuGator

 def ocr(self):
     if self.last_image is not None:
         image = self.last_image
         # Add border to keep the characters off the edges
         offset = 20
         height, width, channel = image.shape
         image = cv2.copyMakeBorder(image,
                                    offset,
                                    offset,
                                    offset,
                                    offset,
                                    cv2.BORDER_CONSTANT,
                                    value=(255, 255, 255))
         # Init and configure tesseract api
         api = tesseract.TessBaseAPI()
         api.Init(".", "eng", tesseract.OEM_DEFAULT)
         api.SetPageSegMode(tesseract.PSM_AUTO)
         api.SetVariable("tessedit_char_whitelist", self.white_list)
         # Convert to cv image to to pass to tess api
         # Derived from example code here: http://blog.mimvp.com/2015/11/python-ocr-recognition/
         height, width, channel = image.shape
         iplimage = cv.CreateImageHeader((width, height), cv.IPL_DEPTH_8U,
                                         channel)
         cv.SetData(iplimage, image.tostring(),
                    image.dtype.itemsize * channel * (width))
         tesseract.SetCvImage(iplimage, api)
         api.Recognize(None)
         ri = api.GetIterator()
         level = tesseract.RIL_WORD
         if (ri):
             word = ri.GetUTF8Text(level)
             return word

コード例 #4

0

ファイルを表示

 def do_ocr(self, cv2_img):
     h, w, c = cv2_img.shape
     cv_img = cv.CreateImageHeader((w, h), cv.IPL_DEPTH_8U, c)
     cv.SetData(cv_img, cv2_img.tostring(), cv2_img.dtype.itemsize * c * w)
     #
     tesseract.SetCvImage(cv_img, self.ocr_api)
     text = self.ocr_api.GetUTF8Text()
     #
     return text

コード例 #5

0

ファイルを表示

ファイル: ocr.py プロジェクト: HeLiangHIT/OCR

def tesseract_cv_ocr(imgname):
    image = cv.LoadImage(imgname)
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    #api.SetPageSegMode(tesseract.PSM_SINGLE_WORD)
    api.SetPageSegMode(tesseract.PSM_AUTO)
    tesseract.SetCvImage(image, api)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    return text

コード例 #6

0

ファイルを表示

def ocr_number(img):
    api = tesseract.TessBaseAPI()
    api.Init("C:\Program Files (x86)\Tesseract-OCR", "eng",
             tesseract.OEM_DEFAULT)
    api.SetVariable("tessedit_char_whitelist", "0123456789+-")
    api.SetPageSegMode(tesseract.PSM_SINGLE_LINE)

    cv_im = convert_to_IplImage(img)
    tesseract.SetCvImage(cv_im, api)
    return api.GetUTF8Text().split("\n")[0]  # OCR結果から余計な改行を取り除く

コード例 #7

0

ファイルを表示

def auto_search():
    # click search button
    genymotion_session.console.mouse.put_mouse_event_absolute(660,290,0,0,0)
    genymotion_session.console.mouse.put_mouse_event_absolute(660,290,0,0,1)
    genymotion_session.console.mouse.put_mouse_event_absolute(660,290,0,0,0)
    sleep(10)

    # processing
    subprocess.call("adb shell screencap -p /sdcard/screen.png", shell=True)
    subprocess.call("adb pull /sdcard/screen.png /tmp/screen.png", shell=True)
    im = Image.open("/tmp/screen.png")
    #box = (60, 80, 165, 180)
    #box = (53, 72, 140, 165)
    box = (57, 75, 140, 138)
    loot = im.crop(box).convert('L')
    loot = ImageOps.invert(loot)
    loot.save("/tmp/loot.png", "png")

    api = tesseract.TessBaseAPI()
    api.Init("/home/mrtux/app/bin/", "coc",tesseract.OEM_DEFAULT)
    api.SetVariable("tessedit_char_whitelist", "0123456789")
    api.SetPageSegMode(tesseract.PSM_AUTO)

    image = cv.LoadImage("/tmp/loot.png", cv.CV_LOAD_IMAGE_UNCHANGED)
    tesseract.SetCvImage(image,api)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    total_loot = text.splitlines()

    gold_loot, elixir_loot = total_loot[0:2]
    gold_loot_text_element = gold_loot.split(" ")
    elixir_loot_text_element = elixir_loot.split(" ")

    for i in range(len(gold_loot_text_element)):
        if len(gold_loot_text_element[i]) > 3:
            gold_loot_text_element[i] = gold_loot_text_element[i][1:]

    for i in range(len(elixir_loot_text_element)):
        if len(elixir_loot_text_element[i]) > 3:
            elixir_loot_text_element[i] = elixir_loot_text_element[i][1:]

    gold_expr = gold_loot.find(" ") == 3 and int(gold_loot_text_element[0]) >= 200
    elixir_expr = elixir_loot.find(" ") == 3 and int(elixir_loot_text_element[0]) >= 200

    print gold_loot
    print gold_loot_text_element
    print elixir_loot
    print elixir_loot_text_element

    if gold_expr and elixir_expr:
        subprocess.call("mplayer /home/mrtux/app/bin/gun.mp3", shell=True)
        api.End()
        return True

    return False

コード例 #8

0

ファイルを表示

ファイル: WebNewBUIDScanner.py プロジェクト: privatenumber/WebNew-BU-ID-Scanner

    def __init__(self):

        #Inquire Credentials
        self.buUn = urllib.quote_plus(raw_input("BU TA Username: "******"BU TA Password: "******"Camera", img)

            # OCR
            tesseract.SetCvImage(img, self.api)

            match = re.search(r"(U\d{8})", self.api.GetUTF8Text())
            if match:

                BUID = match.groups()[0]

                print

                # Lookup BUID
                profile = self.lookupBUID(BUID)

                # Prompt for approval
                if profile != False and raw_input("Approve (y/n): ") == "y":
                    self.approveBUID(BUID)

            # If Esc is pressed, quit (Also the fps)
            if cv.WaitKey(100) == 27: break

コード例 #9

0

ファイルを表示

def recognize(image):
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
    tesseract.SetCvImage(image, api)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    image = None
    print text
    print "Confidence: " + str(conf)
    return text, conf

コード例 #10

0

ファイルを表示

ファイル: SenderSudoku.py プロジェクト: Jaganmohan029/Conceal-In-a-Puzzle

def ocr_singledigit(image):
    API.Init(".", "eng", tesseract.OEM_DEFAULT)
    API.SetVariable("tessedit_char_whitelist", "123456789")
    API.SetPageSegMode(6)
    tesseract.SetCvImage(image, API)
    CHAR = API.GetUTF8Text()
    CHAR = CHAR.replace(" ", "").strip()

    if len(CHAR) == 0:
        return "x"
    return int(CHAR)

コード例 #11

0

ファイルを表示

def getCaptcha():
    grayScale = cv.LoadImage(
        'https://my.ebharatgas.com/bharatgas/MyCaptcha.png',
        cv.CV_LOAD_IMAGE_GRAYSCALE)
    cv.Threshold(gray, gray, 231, 255, cv.CV_THRESH_BINARY)
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetVariable("tessedit_char_whitelist",
                    "0123456789abcdefghijklmnopqrstuvwxyz")
    api.SetPageSegMode(tesseract.PSM_SINGLE_WORD)
    tesseract.SetCvImage(gray, api)
    print(api.GetUTF8Text())

コード例 #12

0

ファイルを表示

 def ocr(self, image):
     """Return OCR result."""
     api = tesseract.TessBaseAPI()
     api.Init(".", "big", tesseract.OEM_DEFAULT)
     api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
     h, w = image.shape
     w_step = w * image.dtype.itemsize
     iplimage = cv.CreateImageHeader((w, h), cv.IPL_DEPTH_8U, 1)
     cv.SetData(iplimage, image.tostring(), image.dtype.itemsize * (w))
     tesseract.SetCvImage(iplimage, api)
     hocr = api.GetHOCRText(0)
     return hocr

コード例 #13

0

ファイルを表示

def OCR2(croplist):
	OCRlist = []
	#print OCRlist
	for i in range(len(croplist)):
		print len(croplist)
		print croplist[i]
		image1=cv.LoadImage(croplist[i])
		tesseract.SetCvImage(image1,api)
		OCR=api.GetUTF8Text()
		#conf=api.MeanTextConf()
		print OCR
		OCRlist.append(OCR)
	return OCRlist

コード例 #14

0

ファイルを表示

ファイル: img2txt_scratch.py プロジェクト: mikpim01/PDF2EXCEL

def testy(image_path):

## Indent region below to test as function
    image=cv.LoadImage(image_path, cv.CV_LOAD_IMAGE_GRAYSCALE)
    tesseract.SetCvImage(image,api)
    text=api.GetUTF8Text()

    word_list=text.split()
    #print word_list

##Uncomment the below lines to test function
    print "Program gets here"
    return word_list

コード例 #15

0

ファイルを表示

ファイル: main.py プロジェクト: pjgranahan/livetranslate

def updateOCR(frame):
    api = tesseract.TessBaseAPI()
    api.SetVariable("tessedit_char_whitelist", WHITELIST)
    api.Init("C:\Program Files (x86)\Tesseract-OCR", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)

    bitmap = cv2.cv.CreateImageHeader((frame.shape[1], frame.shape[0]), cv2.IPL_DEPTH_8U, 3)
    cv2.cv.SetData(bitmap, frame.tostring(), frame.dtype.itemsize * 3 * frame.shape[1])
    tesseract.SetCvImage(bitmap, api)
    text = api.GetUTF8Text()
    confidence = api.MeanTextConf()
    charMatrix = api.GetBoxText(0).split('\n')
    return text, confidence, charMatrix

コード例 #16

0

ファイルを表示

 def ocr(self, image, area, factor):
     api = tesseract.TessBaseAPI()
     api.Init(".", "big", tesseract.OEM_DEFAULT)
     api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
     h, w = image.shape
     w_step = w * image.dtype.itemsize
     iplimage = cv.CreateImageHeader((w, h), cv.IPL_DEPTH_8U, 1)
     cv.SetData(iplimage, image.tostring(), image.dtype.itemsize * (w))
     tesseract.SetCvImage(iplimage, api)
     api.SetRectangle(int(area[0][0] * factor), int(area[0][1] * factor),
                      int(area[1][0] * factor) - int(area[0][0] * factor),
                      int(area[1][1] * factor) - int(area[0][1] * factor))
     res = self.hocrToObject(api.GetHOCRText(0), area, factor)
     return res

コード例 #17

0

ファイルを表示

def convert_file(filename):
    #path = str(app.config['UPLOAD_FOLDER']+filename)
    path = str('/home/engineer/htdocs/stop/webapi/uploads/' + filename).lower()
    try:
        image = cv.LoadImage(path, cv.CV_LOAD_IMAGE_GRAYSCALE)
    except Exception as e:
        return str("Error ") + str(e)
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)
    tesseract.SetCvImage(image, api)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    return jsonify({'output': str(text)})

コード例 #18

0

ファイルを表示

def img2text_method2(image_path):
    print "Using method 2 to generate text from image..."

    api = tesseract.TessBaseAPI()
    api.SetOutputName("outputName")
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)

    image = cv.LoadImage(image_path, cv.CV_LOAD_IMAGE_GRAYSCALE)
    tesseract.SetCvImage(image, api)
    text = api.GetUTF8Text()
    word_list = text.split()
    print word_list
    return word_list

コード例 #19

0

ファイルを表示

def ocr_udemae(img):
    img = binarize(img)
    img = erode(img)
    # cv2.imshow('ocr_udemae', img)
    # cv2.waitKey(500)

    api = tesseract.TessBaseAPI()
    api.Init("C:\Program Files (x86)\Tesseract-OCR", "eng",
             tesseract.OEM_DEFAULT)
    api.SetVariable("tessedit_char_whitelist", "SABC+-")
    api.SetPageSegMode(tesseract.PSM_SINGLE_LINE)

    cv_im = convert_to_IplImage(img)
    tesseract.SetCvImage(cv_im, api)
    return api.GetUTF8Text().split("\n")[0]  # OCR結果から余計な改行を取り除く

コード例 #20

0

ファイルを表示

def img2txt(image_path):
    #print "entered img2txt"
    #print "CONVERTING IMAGE TO TEXT"
    #print "---------------------------------------------------"
    image=cv.LoadImage(image_path, cv.CV_LOAD_IMAGE_GRAYSCALE)
    #print "set image"
    tesseract.SetCvImage(image,api)
    #print "did the tesseract thing"
    text=api.GetUTF8Text()
    #print "actually got the text"
    conf=api.MeanTextConf()
    ##  Remove every '\n' PUT IN BY TESS and put everything back together with a space!    
    page_text=" ".join(text.splitlines())
    #print "Text for " + image_path + " : " + text
    return page_text

コード例 #21

0

ファイルを表示

ファイル: addimg.py プロジェクト: EricSchles/veyepar

    def ocr_img(self, imgname):
        """
        To use a non-standard language pack named foo.traineddata, set the TESSDATA_PREFIX environment variable so the file can be found at TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the argument -l foo.
        """

        image = cv.LoadImage(imgname, cv.CV_LOAD_IMAGE_GRAYSCALE)

        api = tesseract.TessBaseAPI()
        api.Init(".", "eng", tesseract.OEM_DEFAULT)
        api.SetPageSegMode(tesseract.PSM_AUTO)
        tesseract.SetCvImage(image, api)
        text = api.GetUTF8Text()
        conf = api.MeanTextConf()

        return text

コード例 #22

0

ファイルを表示

def Recognize(iplimage):
    global meas_stack
    tesseract.SetCvImage(iplimage, api)

    try:
        full_text = api.GetUTF8Text()
    except AttributeError:
        full_text = api.GetUNLVText().replace("^", "")

    conf = api.MeanTextConf()
    # Ger the first line found by tesseract
    for index, text in enumerate(full_text.split('\n')):
        # Some char filter
        text = text.replace(" ", "")
        for char in ConfigSectionMap("POSPROCESS")['strip']:
            text = text.replace(char, "")
        try:
            text_val = float(text)
            # handle OCRed value if exists an expected value prvided by user
            if expected_value != "":
                up_limit = (float(expected_value)) * (1 + (float(expected_value_desv) / 100))
                dn_limit = (float(expected_value)) * (1 - (float(expected_value_desv) / 100))
                if (
                    len(text) > 0 and
                    text_val > dn_limit and
                    text_val < up_limit
                ):
                    pass
                else:
                    return 0
            # most common filter valur

            most_common_filter_pos = cv2.getTrackbarPos('Filter', 'frame')
            # add last text
            meas_stack.append(text)
            if len(meas_stack) > most_common_filter_pos:
                # remove old
                meas_stack = meas_stack[-(most_common_filter_pos + 1):]
                # count most frequent value
                count = Counter(meas_stack)
                out = count.most_common()[0][0]
                ## show if the last is the most common
                # if out == meas_stack[-1]:
                print "Timestamp: " + datetime.datetime.now().strftime('%y%m%d%H%M%S_%f')
                print "Line " + str(index)
                print out
        except:
            pass

コード例 #23

0

ファイルを表示

ファイル: zzy_Solution.py プロジェクト: ziyeZzz/NAO-robot-character-recognition

def ocr_sxx(filename):
    #####Image preprocess#####
    im = cv2.imread(filename)

    #convert to grayscale and apply Gaussian filtering
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    #cv2.imshow('result1',im_gray)
    # Threshold the image
    ret, im_th = cv2.threshold(im_gray, 70, 255, cv2.THRESH_BINARY)
    #cv2.imshow('result2',im_th)
    #remove noise
    im_th = cv2.medianBlur(im_th, 3)
    #check result
    #cv2.imshow('result',im_th)
    #save the image in local computer
    cv2.imwrite('result.png', im_th)
    cv2.waitKey(0)  # Waits forever for user to press any key
    cv2.destroyAllWindows()

    #########----OCR Part----########
    image = cv.LoadImage("result.png", cv.CV_LOAD_IMAGE_GRAYSCALE)
    directory = "tessdata/"
    ########choose language############
    InputLan = "eng"
    api = tesseract.TessBaseAPI()
    fileName = InputLan + '.traineddata'
    fullPath = directory + fileName

    api.Init(".", InputLan, tesseract.OEM_DEFAULT)
    #api.SetPageSegMode(tesseract.PSM_SINGLE_WORD)
    api.SetPageSegMode(tesseract.PSM_AUTO)
    tesseract.SetCvImage(image, api)
    text = api.GetUTF8Text()

    conf = api.MeanTextConf()
    timage = None
    print len(text)
    #text = text.decode('utf-8') #from str to unicode
    print text.decode('utf-8')
    #print conf
    ########delete /n ############
    line = text.replace('\n', ' ')
    #line = line.decode('utf-8')
    print line.decode('utf-8')

コード例 #24

0

ファイルを表示

ファイル: ocrmethods.py プロジェクト: beldougie/EliteOCR

 def ocr(self, image, area, factor):
     api = tesseract.TessBaseAPI()
     #print self.lang
     if self.lang == "big" or self.lang == "eng":
         api.Init(".","big",tesseract.OEM_DEFAULT)
     else:
         api.Init(".", str(self.lang), tesseract.OEM_DEFAULT)
     api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
     h,w = image.shape
     w_step = w*image.dtype.itemsize
     iplimage = cv.CreateImageHeader((w,h), cv.IPL_DEPTH_8U, 1)
     cv.SetData(iplimage, image.tostring(),image.dtype.itemsize * (w))
     tesseract.SetCvImage(iplimage,api)
     ocr_x2 = int((area[1][0]-int((area[1][0]-area[0][0])*0.17))*factor)
     api.SetRectangle(int(area[0][0]*factor), int(area[0][1]*factor),
                      ocr_x2-int(area[0][0]*factor),
                      int(area[1][1]*factor)-int(area[0][1]*factor)) 
     res = self.hocrToObject(api.GetHOCRText(0).decode('utf-8'), area, factor)
     return res

コード例 #25

0

ファイルを表示

ファイル: ocr.py プロジェクト: sorki/pllm

def tesseract_native(fpath, lang, block=True):
    """
    Use tesseract Python API to process `fpath`
    pre-segmented image
    """

    api = tesseract.TessBaseAPI()
    api.Init(".", lang, tesseract.OEM_DEFAULT)
    if block:
        api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
    else:
        api.SetPageSegMode(tesseract.PSM_AUTO)

    for opt, val in tesseract_opts.items():
        api.SetVariable(opt, val)

    img = cv.LoadImage(fpath, iscolor=False)
    tesseract.SetCvImage(img, api)
    text = api.GetUTF8Text()
    return text

コード例 #26

0

ファイルを表示

ファイル: tesseractOCR.py プロジェクト: gcca/urb-vi

    def leerPlaca(self, binarizado):
        """
        Args:
            img: imagen que contiene el texto, obtenida con la
                funcion cv2.imread.
        Returns:
            El string del texto en la imagen.
        """
        cvmat_image = cv.fromarray(binarizado)
        imgbincv = cv.GetImage(cvmat_image)
        # GetImage fuente de un error, revisar

        # Configuración del tesseract
        api = tesseract.TessBaseAPI()
        api.Init(".", "eng", tesseract.OEM_DEFAULT)
        api.SetPageSegMode(tesseract.PSM_AUTO)

        # Enviando imagen binarizada al tesseract
        tesseract.SetCvImage(imgbincv, api)
        text = api.GetUTF8Text()
        text = re.sub(r'\W+', '', text)
        return text

コード例 #27

0

ファイルを表示

ファイル: OCR.py プロジェクト: piaoger/NavPalFloorplans

def getText(image0):
    #thicken the border in order to make tesseract feel happy
    offset = textOffset
    height, width, channel = image0.shape
    image1=cv2.copyMakeBorder(image0, offset, offset,offset,offset,\
                            cv2.BORDER_CONSTANT,value=rgbWhite)

    #set up tesseract API
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)

    api.SetPageSegMode(tesseract.PSM_AUTO)
    height1, width1, channel1 = image1.shape

    #star text extraction
    cvmat_image = cv.fromarray(image1)
    iplimage = cv.GetImage(cvmat_image)
    #extract texts
    tesseract.SetCvImage(iplimage, api)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    return text, conf

コード例 #28

0

ファイルを表示

def __get_text_from_image(saved_image_path, coord):
    try:
        import cv2
        import cv2.cv as cv
        import tesseract
    except:
        raise ImportError("tesseract library for python required")

    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)

    image0 = cv2.imread(saved_image_path, cv.CV_LOAD_IMAGE_GRAYSCALE)

    if image0 is None:
        raise cv2.error("Image for text matching was NoneType")
    # x1 = coord[0], y1 = coord[1], x2 = coord[2], y2 = coord[3]
    image1 = image0[coord[1]:coord[3], coord[0]:coord[2]]
    height1, width1 = image1.shape
    iplimage = cv.CreateImageHeader((width1, height1), cv.IPL_DEPTH_8U, 1)
    cv.SetData(iplimage, image1.tostring(), image1.dtype.itemsize * (width1))
    tesseract.SetCvImage(iplimage, api)
    return api.GetUTF8Text()

コード例 #29

0

ファイルを表示

}

captcha = 'http://158.69.76.135/captcha.php'
c = requests.get(captcha, headers=headers)
with tempfile.NamedTemporaryFile(delete=False) as temp:
    temp.write(c.raw.read())
    temp.seek(0)
    temp.close()
    gray = cv.LoadImage(temp.name, cv.CV_LOAD_IMAGE_GRAYSCALE)
    cv.Threshold(gray, gray, 231, 255, cv.CV_THRESH_BINARY)
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetVariable("tessedit_char_whitelist",
                    "0123456789abcdefghijklmnopqrstuvwxyz")
    api.SetPageSegMode(tesseract.PSM_SINGLE_WORD)
    tesseract.SetCvImage(gray, api)
    captcha = api.GetUTF8Text()

posturl = "http://158.69.76.135/level3.php"

headers = {
    'Referer': 'http://158.69.76.135/level3.php',
    'User-Agent': 'Mozilla/4.01 [en] (Win95; I)'
}

get = requests.get(posturl, headers=headers)

for i in range(1024):
    get = requests.post(posturl,
                        data={
                            'id': '923',

コード例 #30

0

ファイルを表示

#/usr/bin/env python

import numpy as np
import cv2
import cv2.cv as cv
import tesseract

image = cv2.imread("temp.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

blurred = cv2.GaussianBlur(image, (5, 5), 0)
(T, tresh) = cv2.threshold(blurred, 155, 255, cv2.THRESH_BINARY)

cv2.imshow("Avg GaussianBlur", np.hstack([blurred, tresh]))
cv2.waitKey(0)

api = tesseract.TessBaseAPI()
api.Init(".", "eng", tesseract.OEM_DEFAULT)
api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK)
height1, width1 = tresh.shape
channel1 = 1
image = cv.CreateImageHeader((width1, height1), cv.IPL_DEPTH_8U, channel1)
cv.SetData(image, tresh.tostring(), tresh.dtype.itemsize * channel1 * (width1))
tesseract.SetCvImage(image, api)
text = api.GetUTF8Text()
conf = api.MeanTextConf()
image = None
print "..............."
print "Ocred Text: %s" % text
print "Cofidence Level: %d %%" % conf