Exemple #1
0
	def getCheckCodeString(self, filename):
		text = str(ocr.image_file_to_string(filename))
		print("验证码: " + text)
		text = text.strip()     # 去除两边空格!!!

		os.remove(filename)     # 删除验证码
		return text
Exemple #2
0
 def executar(self, diretorio):
     try:
         self.__texto = image_file_to_string(diretorio)
         return True
     except:
         print()
         return False
Exemple #3
0
 def image_file_to_string(file):
     cwd = os.getcwd()
     try :
         os.chdir("C:\Users\MrLevo\Anaconda2\Lib")
         return pytesser.image_file_to_string(file)
     finally:
         os.chdir(cwd)
def getAuthCode(fn):
	#read the file
	im = Image.open(fn)

	# change the image size
	nx, ny = im.size
	width = int(nx*7)
	height = int(ny*7)
	im2 = im.resize((width, height), Image.BICUBIC)

	#if the pixel value is close less than RGB value (90, 136, 100)
	# convert it to black pixel (0, 0, 0), 
	# otherwise, white pixel (255, 255, 255)
	pix = im2.load()
	for x in range(width):
		for y in range(height):			
				(r, g, b) = pix[x, y]
				if (r < 90 and g < 136 and b < 100 ):							
					pix[x, y] = (0, 0, 0)
				else:				
					pix[x, y] = (255, 255, 255)

	#save the image 
	im2.save("bw.bmp")				

	# OCR
	pattern = re.compile(r'[\s\n]+')
	result= pytesser.image_file_to_string('bw.bmp').strip().upper()
	result= pattern.sub('', result).upper()
	#print result	
	return result
Exemple #5
0
 def read_imagetext(self):
     im = PIL.Image.open('E:\study\oooo.png')
     # text = pytesser.image_to_string(im)
     # print "Using image_to_string(): "
     # print text
     text = pytesser.image_file_to_string('E:\study\oooo.png', graceful_errors=True)
     print "Using image_file_to_string():"
     print text
Exemple #6
0
 def getRand(self):
     try:
         fname = 'ttt0.jpg'
         rand = pytesser.image_file_to_string(fname).strip()
         self.rand = rand
         pattern_check = re.compile('[0-9]{4}', re.S)
         check = re.search(pattern_check, self.rand)
         if check:
             return self.rand
         else:
             return None
     except IOError, e:
         print '>>getRand failed'
         return None
Exemple #7
0
def   textdet(shot,q,mode):

       sent=[]
       d=enchant.Dict("en_US")
       prev=""
       while True:
          if shot.empty()==False:
             imgarray=shot.get()
             shot.queue.clear()
             cv2.imshow('retrieved',imgarray)
             cv2.imwrite("image.jpeg" , imgarray)
             text = pytesser.image_file_to_string("image.jpeg")
             text=text.replace("\n","")
##             text=text.replace(" ","")
             for x in text:
                  if not re.match("[a-zA-Z]",x):
                     text=text.replace(x,"")
             valid=checkvalid(text)
             if text!="" and valid==True:
                   if d.check(text)==True:
                          if text.lower()!=prev:
                               print (text),
                               if mode==1:
                                   q.put(text)
                               elif mode==2:
                                   sent.append(text)
     ##                              if text[0].isupper():
                                   print sent
                                   translater(sent)
                                   del sent[:]
                          prev=text.lower()
##                   else:
##                          rep=SpellingReplacer()
##                          text_corrected=rep.replace(text)
##                          if d.check(text_corrected)==True:
##                                 print "corrected text = ",text_corrected
##                                 if mode==1:
##                                      q.put(text)
##                                 if mode==2:
##                                      sent.append(text)
##                                      print sent
##                                      translater(sent)
##                                      del sent[:]       
                          
             k = cv2.waitKey(5) & 0xFF
             if k == 27:
                    break
       cv2.destroyAllWindows() 
Exemple #8
0
def tocodes(fname):
    codes = pytesser.image_file_to_string(fname).strip()
    print 'The ttt' + str(i) + '.jpg\'s code is ' + codes
    return codes
Exemple #9
0
def process_image(url):
    # image = _get_image(url)
    # image.filter(ImageFilter.SHARPEN)
    # return pytesseract.image_to_string(image)
    print "URL", url
    return pytesser.image_file_to_string(url)
            else:
                buf = list();
        elif(len(buf) <= 1 and firsth != 0):
            buf = list()
"""

for h in xrange(height):
    firstw = 0;

    for w in xrange(width):
        r,g,b =  whitePix[w,h];
        tcounter = 0;

        for pw in xrange(-1,2):
            for ph in xrange(-1,2):

                if((w - pw) < 0 or (h - ph) < 0 or (w + pw) >= (width-2) or (h - ph) >= (height-2)):
                    break;
                tr,tg,tb = whitePix[w - pw,h - ph];
                if(tr <= limiar and tg <= limiar and tb <= limiar):
                    #whitePix[w - pw,h - ph] = (0,0,0);
                    tcounter+=1;
        if(tcounter <= 2):
            whitePix[w,h] = (255,255,255);

imgList[0].save('images/' + fname);
imgList[1].save('images/negative_' + fname);
imgList[2].save('images/white_' + fname);

text = pytesser.image_file_to_string('images/white_' + fname,graceful_errors=True)
print(text)
Exemple #11
0
    valimg = None
    valimgs = tree.xpath('//img[@id="yanzheng"]/@src')
    if len(valimgs) > 0:
        valimg = valimgs[0]

    validateCode = None
    if valimg:
        fname = 'img/' + str(idx) + '_' + str(config['gid']) + '.jpg'
        config['gid'] = config['gid'] + 1
        ri = s.get("https://passport.csdn.net" + valimg)
        with open(fname, 'wb') as f:
            for chk in ri:
                f.write(chk)
            f.close()
        validateCode = pytesser.image_file_to_string(fname)
        validateCode = validateCode.strip()
        validateCode = validateCode.replace(' ', '')
        validateCode = validateCode.replace('\n', '')
        result['validateCode'] = validateCode

    return result


def login(usr, pwd, idx):
    s = requests.Session()

    r = s.get(
        'https://passport.csdn.net/account/login',
        headers={
            'User-Agent':
Exemple #12
0
import pytesser
import model

# Testing out Tesseract on a couple of simple images with Chinese characters.
text = pytesser.image_file_to_string('shangwu.png', lang='chi_sim', graceful_errors=True)

# Chinese characters from Tesseract are encoded as UTF-8 and appear to have two trailing newlines.
shangwu = text.decode('utf-8').strip()
print shangwu

text = pytesser.image_file_to_string('taihaole.png', lang='chi_sim', graceful_errors=True)
print text
taihaole = text.decode('utf-8').strip()

# Connect to the CEDICT database and search for entries with these characters.
session = model.connect()
sw_entry = session.query(model.Entry).filter_by(simplified=shangwu).one()
thl_entry = session.query(model.Entry).filter_by(simplified=taihaole).one()

print sw_entry.definition
print thl_entry.definition
Exemple #13
0
        cwd = os.getcwd()
        try :
            os.chdir("C:\Users\MrLevo\Anaconda2\Lib")
            return pytesser.image_file_to_string(file)
        finally:
            os.chdir(cwd)
    im=Image.open("E:\\image_code.jpg")
    imgry = im.convert('L')#图像加强,二值化
    sharpness =ImageEnhance.Contrast(imgry)#对比度增强
    sharp_img = sharpness.enhance(2.0)
    sharp_img.save("E:\\image_code.jpg")
    #http://www.cnblogs.com/txw1958/archive/2012/02/21/2361330.html
    #imgry.show()#这是分布测试时候用的,整个程序使用需要注释掉
    #imgry.save("E:\\image_code.jpg")

    code= pytesser.image_file_to_string("E:\\image_code.jpg")#code即为识别出的图片数字str类型
    print code
    #打印code观察是否识别正确


    #----------------------------------------------------------------------
    if i <= 2: # 根据自己登录特性,我这里是验证码失败一次,重填所有,失败两次,重填验证码
        elem_user.send_keys('S315080092')
        elem_psw.send_keys('xxxxxxxxxx')

    elem_code.send_keys(code)
    click_login = driver.find_element_by_xpath("//img[@src='main_images/images/loginbutton.gif']")
    click_login.click()


#time.sleep(5)#搜索结果页面停留片刻
Exemple #14
0
def run(filename, debug=False):
    content = pytesser.image_file_to_string(filename)

    def find_match(text, pattern, minimum=None):
        minimum = minimum if minimum else 0.7
        score, match, start, end = fuzzy.bitap(text, pattern)

        if not match or score <= minimum:
            return None, None, None, None

        return score, match, start, end

    content = unicode(content, 'utf-8')

    _, alarm, _, end = find_match(content, 'ALARMDEPESCHE')

    keys = json.loads(file('ocr/keywords.json').read())
    keys = dict(sorted(keys.items(), key=lambda (n, _): len(n), reverse=True))

    if alarm is not None:
        content = content[end:]

        _, engines, start, _ = find_match(content, 'Einsatzmittelliste')
        content = content[:start]

        if debug:
            print content

        original = content

        for name, key in keys.items():
            threshold = key.get('threshold')
            score, match, start, end = find_match(content, name, threshold)

            if match:
                skip = False
                for exclude in key.get('exclude', []):
                    alt, _, _, _ = find_match(match, exclude, keys[exclude].get('threshold'))
                    if alt > score:
                        skip = True

                if skip:
                    continue

                newline = content[:start].rfind('\n')

                if newline == -1 or (start - newline) <= 10:
                    key['score'] = score
                    key['match'] = match
                    key['start'] = start
                    key['end'] = end

                    content = content[:start] + re.sub(r'[^\n]', ' ', match) + content[end:]

        tokens = sorted(filter(lambda (_, k): k.get('match') is not None, keys.items()),
                        key=lambda (_, k): k['start'])

        for current, next in peek(tokens):
            _, token = current
            if next:
                _, next_token = next
                end = next_token['start']
            else:
                end = len(original)
            token['content'] = re.sub(r'^[ .:‘]+', '', original[token['end']:end].strip()).strip()

        tokens = filter(lambda (_, k): not k.get('ignore', False), tokens)

        if debug:
            import pprint

            pprint.PrettyPrinter(indent=4).pprint(tokens)

        return {name: token['content'] for name, token in tokens}

    return []
)

# i = 0
# while 1:
#     i = i+1
#     try:
elem_user = driver.find_element_by_name("username")
elem_pwd = driver.find_element_by_name("password")
elem_captcha = driver.find_element_by_name("j_captcha_response")
driver.get_screenshot_as_file('captcha.jpg')

rangle = (1669, 494, 1807, 541)  #写成我们需要截取的位置坐标
i = Image.open("/Users/wangmian/PycharmProjects/selenium/captcha.jpg")  #打开截图
realcaptcha = i.crop(rangle)  #使用Image的crop函数,从截图中再次截取我们需要的区域
realcaptcha.save("/Users/wangmian/PycharmProjects/selenium/realcaptcha.png")
code = pytesser.image_file_to_string(
    '/Users/wangmian/PycharmProjects/selenium/realcaptcha.png')

#
#
#
elem_user.send_keys("20141001146")
elem_pwd.send_keys("******")
time.sleep(0.5)
elem_captcha.send_keys(code)

elem_pwd.send_keys(Keys.RETURN)
time.sleep(5)
driver.close()
driver.quit()
Exemple #16
0
def run(filename, debug=False):
    content = pytesser.image_file_to_string(filename)

    def find_match(text, pattern, minimum=None):
        minimum = minimum if minimum else 0.7
        score, match, start, end = fuzzy.bitap(text, pattern)

        if not match or score <= minimum:
            return None, None, None, None

        return score, match, start, end

    content = unicode(content, 'utf-8')

    _, alarm, _, end = find_match(content, 'ALARMDEPESCHE')

    keys = json.loads(file('ocr/keywords.json').read())
    keys = dict(sorted(keys.items(), key=lambda (n, _): len(n), reverse=True))

    if alarm is not None:
        content = content[end:]

        if debug:
            print content

        original = content

        for name, key in keys.items():
            threshold = key.get('threshold')
            score, match, start, end = find_match(content, name, threshold)

            if match:
                skip = False
                for exclude in key.get('exclude', []):
                    alt, _, _, _ = find_match(match, exclude, keys[exclude].get('threshold'))
                    if alt > score:
                        skip = True

                if skip:
                    continue

                newline = content[:start].rfind('\n')

                if newline == -1 or (start - newline) <= 10:
                    key['score'] = score
                    key['match'] = match
                    key['start'] = start
                    key['end'] = end

                    content = content[:start] + re.sub(r'[^\n]', ' ', match) + content[end:]

        tokens = sorted(filter(lambda (_, k): k.get('match') is not None, keys.items()),
                        key=lambda (_, k): k['start'])

        previous = None
        for current, next in peek(tokens):
            _, token = current

            start = token['end']

            if next:
                _, next_token = next
                end = next_token['start']
            else:
                end = len(original)

            def trim(s):
                return re.sub(r'^[ .:‘]+', '', s.strip()).strip()

            s = trim(original[start:end])

            if previous and token.get('previousLine', False):
                _, previous_token = previous
                previous_content = previous_token['content']
                original_find = original.find(previous_content) + len(previous_content)
                s = trim(original[original_find:token['start']]) + s

            if token.get('singleLine', False):
                rows = s.splitlines()
                token['content'] = rows[0]
            elif token.get('table', False):
                rows = filter(None, s.splitlines())
                token['content'] = filter(None, [group(re.search(r'^(.{10,}?) ?', row)) for row in rows[1:]])
            else:
                token['content'] = s

            previous = current

        tokens = filter(lambda (_, k): not k.get('ignore', False), tokens)

        if debug:
            import pprint

            pprint.PrettyPrinter(indent=4).pprint(tokens)

        return {name: token['content'] for name, token in tokens}

    return []
Exemple #17
0
def parse(s, html, idx):
result = {}
tree = etree.HTML(html)
try:
result['lt'] = tree.xpath('//input[@name="lt"]/@value')[0]
result['execution'] = tree.xpath('//input[@name="execution"]/@value')[0]
result['path'] = tree.xpath('//form[@id="fm1"]/@action')[0]
except IndexError, e:
return None
valimg = None
valimgs = tree.xpath('//img[@id="yanzheng"]/@src')
if len(valimgs) > 0:
valimg = valimgs[0]
validateCode = None
if valimg:
fname = 'img/'   str(idx)   '_'   str(config['gid'])   '.jpg'
config['gid'] = config['gid']   1
ri = s.get("https://passport.csdn.net"   valimg)
with open(fname, 'wb') as f:
for chk in ri:
f.write(chk)
f.close()
validateCode = pytesser.image_file_to_string(fname)
validateCode = validateCode.strip()
validateCode = validateCode.replace(' ', '')
validateCode = validateCode.replace('\n', '')
result['validateCode'] = validateCode
return result
def login(usr, pwd, idx):
s = requests.Session()
r = s.get('https://passport.csdn.net/account/login',
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0', 'Host': 'passport.csdn.net', })
while True:
res = parse(s, r.text, idx)
if res == None:
return False
url = 'https://passport.csdn.net'   res['path']
form = {'username': usr, 'password':pwd, '_eventId':'submit', 'execution':res['execution'], 'lt':res['lt'],}
if res.has_key('validateCode'):
form['validateCode'] = res['validateCode']
s.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'passport.csdn.net',
'Origin': 'https://passport.csdn.net',
'Referer': 'https://passport.csdn.net/account/login',
'Upgrade-Insecure-Requests': 1,
})
r = s.post(url, data=form)
tree = etree.HTML(r.text)
err_strs = tree.xpath('//span[@id="error-message"]/text()')
if len(err_strs) == 0:
return True
err_str = err_strs[0]
print err_str
err = err_str.encode('utf8')
validate_code_err = '驗證碼錯誤'
usr_pass_err = '帳戶名或登入密碼不正確,請重新輸入'
try_later_err = '登入失敗連續超過5次,請10分鐘後再試'
if err[:5] == validate_code_err[:5]:
pass
elif err[:5] == usr_pass_err[:5]:
return False
elif err[:5] == try_later_err[:5]:
return False
else:
return True
if __name__ == '__main__':
main(sys.argv[1], sys.argv[2], 0)
Exemple #18
0
def OCRreadline(img, *args):
    """ read line using OCR, separate with commas
        img: contains single line
    """

    left_edge = 0 # start from left edge
    top_edge = 0 # static!
    res_str = ""
    hit_class = ""
    nList = len(args)
    line_str = []
    i = 1

    for arg in args:
        width = arg
        # pdb.set_trace()
        sub_cropped_img = crop(img, left_edge, top_edge, width, img.shape[1])

        # extract relevant pixels based on color
        masked_img = filter_color(sub_cropped_img, LBOUND_H, LBOUND_S, LBOUND_V, UBOUND_H, UBOUND_S, UBOUND_V)
        # extract only one color group, e.g. allies
        lb_h = COLOR_ALLIES[0]-RANGE_HUE if COLOR_ALLIES[0]-RANGE_HUE>=0 else 0
        lb_s = COLOR_ALLIES[1]-RANGE_SAT if COLOR_ALLIES[1]-RANGE_SAT>=0 else 0
        lb_v = COLOR_ALLIES[2]-RANGE_VAL if COLOR_ALLIES[2]-RANGE_VAL>=0 else 0
        ub_h = COLOR_ALLIES[0]+RANGE_HUE if COLOR_ALLIES[0]+RANGE_HUE<=179 else 179
        ub_s = COLOR_ALLIES[1]+RANGE_SAT if COLOR_ALLIES[1]+RANGE_SAT<=255 else 255
        ub_v = COLOR_ALLIES[2]+RANGE_VAL if COLOR_ALLIES[2]+RANGE_VAL<=255 else 255
        # print(lb_h, lb_s, lb_v, ub_h, ub_s, ub_v)
        masked_img_single = filter_color(sub_cropped_img, lb_h, lb_s, lb_v, ub_h, ub_s, ub_v)

        # cv2.imshow("after filtering", preproc_img)
        # cv2.waitKey(500)
        cv2.imwrite( "tmp.png", masked_img);
        if i < 3 and np.any(masked_img_single != 255):
            hit_class = HIT_CLASS1
        elif i == 3 and np.any(masked_img_single != 255):
            hit_class = HIT_CLASS2

        cv2.imwrite( "tmp.png", masked_img)
        # cv2.imshow("current mask", masked_img)
        cv2.imshow("current mask", masked_img)
        cv2.waitKey(500)

        line_str.append(pytesser.image_file_to_string("tmp.png").rstrip("\n\r")) # remove newline at end of string)

        # TODO: if line_str empty, try with different setting; limit characters to single font type?

        # save entity
        # res_str += entity + ','
        
        # update left edge -> move to right
        left_edge += width

        i += 1

    if hit_class == "":
        # print("Color class unknown", end='')
        line_str.append("N/A")
    else:
        # print(hit_class, end='')
        line_str.append(hit_class)

    # # last iteration subroutine    
    # masked_img = crop(img, left_edge, top_edge, width, img.shape[1])
    # cv2.imwrite( "tmp.png", masked_img);
    # entity = pytesser.image_file_to_string("tmp.png").rstrip("\n\r") # remove newline at end of string

    # newline at end of image line
    # res_str = res_str.rstrip(',') + '\n'
    res_str = line_str

    print("{}".format(line_str))

    # pdb.set_trace()


    return line_str
Exemple #19
0
from PIL import Image
import pytesser

image = Image.open('test.jpg')

print pytesser.image_file_to_string('test.jpg')
print pytesser.image_to_string(image)