def get_text_from_image(image_data, app_id, app_key, app_secret, api_version=0, timeout=3): """ Get image text use baidu ocr :param image_data: :param app_id: :param app_key: :param app_secret: :param api_version: :param timeout: :return: """ client = AipOcr(appId=app_id, apiKey=app_key, secretKey=app_secret) client.setConnectionTimeoutInMillis(timeout * 1000) options = {} options["language_type"] = "CHN_ENG" if api_version == 1: result = client.basicAccurate(image_data, options) else: result = client.basicGeneral(image_data, options) if "error_code" in result: print("baidu api error: ", result["error_msg"]) return "" return "".join([words["words"] for words in result["words_result"]])
def imageRecognition(image): APP_ID = '1' API_KEY = '1' SECRET_KEY = '1' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) text = client.basicGeneral(image) print 'read image...' kw_list = [] for xy in text['words_result']: kw_list.append(xy['words']) kw = ''.join(kw_list)#问题列表转字符串 return kw
def get_distinguish_img_str(name): s = '' try: client = AipOcr(AppID,API_Key,Secret_Key) img = open(name,'rb').read() msg = client.basicGeneral(img) for m in msg.get('words_result'): s += m.get('words') + '\n' except Exception as ex: s = str(ex) if not s: s = 'No Img Data !' return s
def ocr_img_baidu(image, config): # 百度OCR API ,在 https://cloud.baidu.com/product/ocr 上注册新建应用即可 """ 你的 APPID AK SK """ APP_ID = config.get('baidu_api','APP_ID') API_KEY = config.get('baidu_api','API_KEY') SECRET_KEY = config.get('baidu_api','SECRET_KEY') client = AipOcr(APP_ID, API_KEY, SECRET_KEY) global combine_region # 切割题目+选项区域,左上角坐标和右下角坐标,自行测试分辨率 combine_region = config.get("region", "combine_region").replace(' ','').split(',') combine_region = list(map(int, combine_region)) region_im = image.crop((combine_region[0], combine_region[1], combine_region[2], combine_region[3])) # 转化为灰度图 #region_im = region_im.convert('L') # 把图片变成二值图像 #region_im = binarizing(region_im, 190) #region_im.show() img_byte_arr = io.BytesIO() region_im.save(img_byte_arr, format='PNG') image_data = img_byte_arr.getvalue() # base64_data = base64.b64encode(image_data) response = client.basicGeneral(image_data) #print(response) words_result = response['words_result'] texts = [x['words'] for x in words_result] # print(texts) if len(texts) > 2: question = texts[0] choices = texts[1:] choices = [x.replace(' ', '') for x in choices] else: print(Fore.RED + '截图区域设置错误,请重新设置' + Fore.RESET) exit(0) # 处理出现问题为两行或三行 if choices[0].endswith('?'): question += choices[0] choices.pop(0) elif choices[1].endswith('?'): question += choices[0] question += choices[1] choices.pop(0) choices.pop(0) return question, choices
def __init__(self): self.index_url = "http://bkjw.sxu.edu.cn/" self.class_url = "http://bkjw.sxu.edu.cn/ZNPK/KBFB_RoomSel.aspx" self.score_url = "http://bkjw.sxu.edu.cn/_data/login.aspx" self.cookie = cookiejar.CookieJar() self.handler = rq.HTTPCookieProcessor(self.cookie) self.opener = rq.build_opener(self.handler) self.header = { "Host":"bkjw.sxu.edu.cn", "Origin":"http://bkjw.sxu.edu.cn", "Content-Type":"application/x-www-form-urlencoded", "Referer":"http://bkjw.sxu.edu.cn/_data/login.aspx", "Upgrade-Insecure-Requests":"1", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36" } self.header2 = { "Host":"bkjw.sxu.edu.cn", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language":"zh-CN,zh;q=0.8", "Accept-Encoding":"gzip, deflate", "Referer":"http://bkjw.sxu.edu.cn/xscj/Stu_MyScore.aspx", "Content-Type":"application/x-www-form-urlencoded", "Content-Length":"76", "Cookie":"", "Connection":"keep-alive", "Upgrade-Insecure-Requests":"1" } self.jxl_list = ["101","105"] self.js_list_101 = ["1010101","1010102","1010103","1010104","1010105","1010106","1010107","1010108","1010109","1010110","1010111","1010112","1010113","1010114","1010115","1010201","1010202","1010203","1010204","1010205","1010206","1010207","1010208","1010301","1010302","1010303","1010304","1010305","1010306","1010307","1010308","1010401","1010402","1010501","1010502","1010503","1010504","1010505","1010506","1010507","1010508","1010509","1010510","1010511"] self.jxl_list_105 = ['1050101', '1050102', '1050103', '1050104', '1050105', '1050106', '1050107', '1050108', '1050109', '1050110', '1050111', '1050112', '1050113', '1050114', '1050115', '1050116','1050201', '1050202', '1050203', '1050204', '1050205', '1050206', '1050207', '1050208', '1050209','1050211', '1050212', '1050213', '1050214', '1050215', '1050216', '1050217', '1050218','1050301', '1050302', '1050303', '1050304', '1050305', '1050306', '1050307', '1050308', '1050309','1050310','1050311', '1050312', '1050313', '1050314', '1050315', '1050316', '1050317','1050401', '1050402', '1050403', '1050404', '1050405', '1050406', '1050407', '1050408', '1050409','1050501','1050502','1050503','1050504','1050505'] self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
class PicName(object): # 编辑图片 def __init__(self): # """ 你的 APPID AK SK """ APP_ID = '15584553' API_KEY = 'MGGfM6EGySBdKOM6605nbhDg' SECRET_KEY = 'zxpcnv2pd0VD5zlRwwySFx55BOU0sXhl' self.file_path = input('请输入图片文件夹: ') self.stylecode = int(input('请输入款号长度: ')) # self.stylecode = 11 self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_path(self): result_file_path = [] pic_format = ['jpg', 'png'] try: all_file = [ os.path.join(self.file_path, i) for i in os.listdir(self.file_path) ] for i in all_file: one_file = os.path.basename(i).lower() if '~$' in i: pass elif one_file[-3:] in pic_format: result_file_path.append(i) except FileNotFoundError as e: print('路径输入错误', e) return result_file_path def get_file_content(self, picture_file): img = Image.open(picture_file) width, height = img.size roi = img.crop((0, 0, width / 3 * 2, height / 4)) # roi.show() img.close() roi.save('temp.jpg') # 读取图片 with open('temp.jpg', 'rb') as fp: return fp.read() def get_picture_name(self, picture_file): image = self.get_file_content(picture_file) # 调用通用文字识别, 图片参数为本地图片 relative_name = os.path.basename(picture_file) dir_name = os.path.dirname(picture_file) result_dict = self.client.basicAccurate(image) words_result = result_dict['words_result'] stylecode = words_result[0]['words'] if len(stylecode) == self.stylecode: print('\t{}名称识别成功'.format(relative_name)) else: print('\t{}名称识别不成功'.format(relative_name)) new_name = os.path.join(dir_name, stylecode + '.jpg') os.renames(picture_file, new_name) return '\t文件重命名成功' def main(self): for picture_file in self.get_file_path(): print(picture_file) pic_rename = self.get_picture_name(picture_file) print(pic_rename)
# -*- coding: UTF-8 -*- from aip import AipOcr import json # 定义常量 APP_ID = '9851066' API_KEY = 'LUGBatgyRGoerR9FZbV4SQYk' SECRET_KEY = 'fB2MNz1c2UHLTximFlC4laXPg7CVfyjV' # 初始化AipFace对象 aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) # 读取图片 filePath = "d:/pic/20180716sz.jpg" def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 定义参数变量 options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } # 调用通用文字识别接口 result = aipOcr.basicGeneral(get_file_content(filePath), options) #print(json.dumps(result)) print(type(result)) print(result) d = dict(result)
def getClient(): APP_ID = '15765872' API_KEY = 'Bywy2lZjSttR1xPy48cwc5QQ' SECRET_KEY = 'Ca0GLFtO4Ebc4qV7hVHkzScxN0Si0r7Q' return AipOcr(APP_ID, API_KEY, SECRET_KEY)
def __init__(self, app_id, api_key, secret_key): self._app_id = app_id self._api_key = api_key self._secret_key = secret_key self._client = AipOcr(app_id, api_key, secret_key)
def __init__(self, appid, api_key, secrrt_key, redis_url): self.appid = appid self.api_key = api_key self.secrrt_key = secrrt_key self.client = AipOcr(appid, api_key, secrrt_key) self.redis = RedisClient(redis_url)
import cv2 from aip import AipOcr import serial import tkinter #from os import system import os import win32com.client from pygame import mixer # Load the required library APP_ID = '20329897' API_KEY = 'z8D0PlnOuGxSMg9LgA3wNNBN' SECRET_KEY = 'SyLQ39pw7c7ngqw8qhStQXo4h1n8ZrRv' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) speaker = win32com.client.Dispatch("SAPI.SpVoice") x = 0 y = 0 def play(): mixer.init() mixer.music.load('8813.wav') mixer.music.play() def feedback(): sign_up = tkinter.Toplevel(top) sign_up.geometry("500x495") tkinter.Label(sign_up, text='如有产品问题或建议,请联系开发者', font=('Times', 16, 'bold')).place(x=25, y=40) tkinter.Label(sign_up, text='email: [email protected]', font=('Times', 14)).place(x=30, y=125)
import os import random import subprocess import requests from io import BytesIO from pathlib import Path from PIL import Image from aip import AipOcr import config ocr_client = AipOcr(config.APP_ID, config.API_KEY, config.SECRET_KEY) def screenshot(): process = subprocess.Popen('adb shell screencap -p', stdout=subprocess.PIPE) binary_screenshot = process.stdout.read().replace(b'\r\n', b'\n') # 写入文件 # Path('test.png').write_bytes(binary_screenshot) # 写入内存 fb = BytesIO() fb.write(binary_screenshot) print('[*] 截图成功!') return Image.open(fb) def ocr(img, join=True):
from aip import AipOcr """ 你的 APPID AK SK """ APP_ID = '' API_KEY = 'Vfoayf6ZuupEesUXDEygLbPQ' SECRET_KEY = '50kwhTnPZok7KPu2yF8HXVub6fzIqOXK' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('/Users/tigerzhang/Downloads/huafei.JPG') """ 调用通用文字识别(高精度版) """ client.basicAccurate(image); """ 如果有可选参数 """ options = {} options["detect_direction"] = "true" options["probability"] = "true" """ 带参数调用通用文字识别(高精度版) """ resp = client.basicAccurate(image, options) for word in resp['words_result']: if word['words'].startswith('充值卡密码'): print(word['words'])
# 引入文字识别OCR SDK from aip import AipOcr # 定义常量 APP_ID = '9838807' API_KEY = 'ZyNwfGnvQQnYPIuGt25iTWhw' SECRET_KEY = 'r8RZWXQPMBnS4TyUorzdO6fpFO4h1Ggs' # 读取图片 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 初始化ApiOcr对象 aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) # 定义参数变量 options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } # 调用通用文字识别接口 result = aipOcr.basicGeneral(get_file_content('general.png'), options) print(result)
class AipClient(object): ''' 百度识别api ''' def __init__(self, appid, api_key, secrrt_key, redis_url): self.appid = appid self.api_key = api_key self.secrrt_key = secrrt_key self.client = AipOcr(appid, api_key, secrrt_key) self.redis = RedisClient(redis_url) def __new__(cls, *args, **kw): ''' api 单例模式 ''' if not hasattr(cls, '_instance'): cls._instance = super().__new__(cls) return cls._instance @property def options(self): return {"language_type":"CHN_ENG", "detect_direction":"false", "detect_language":"false", "probability":"false"} def General(self, image,**kwargs): print('调取General_api 识别') return self.client.basicGeneral(image, self.options) def Accurate(self, image): print('调取Accurate_api 识别') return self.client.basicAccurate(image, self.options) def orc(self, image, font_key, word, **kwargs): hash_value = MD5.md5(image) results = self.General(image, **kwargs) if results.get('words_result'): if results.get('words_result') != '*': result = results['words_result'][0]['words'] self.redis.add(hash_value, result) self.redis.hadd(font_key, word, result) return result results = self.Accurate(image) if results.get('words_result'): if results.get('words_result') != '*': result = results['words_result'][0]['words'] self.redis.add(hash_value, result) self.redis.hadd(font_key, word, result) return result # Image.open(BytesIO(image)).show() # print(hash_value) return '*' def run(self, image, font_key,word, **kwargs): hash_value = MD5.md5(image) if self.redis.exists(hash_value): result = self.redis.get(hash_value) self.redis.hadd(font_key, word, result) return result else: return self.orc(image, font_key, word, **kwargs)
from aip import AipOcr import re # 百度应用授权 APP_ID = '21834800' API_KEY = 'gzsi6XkPupAE4xbhUcfq4TPO' SECRET_KEY = '5O74roabd3cz2MFbreyQAaDGQkUc6NR8' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) i = open('C:\\Users\\wk\\Desktop\\2.png', 'rb') # python 读取文件时报错UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 205: illegal multibyte sequence img = i.read() # 得到数据信息字典 message = client.basicGeneral(img) # print(message) # print(message.get('words_result')) # 给出识别的文字的字典的列表。 for i in message.get('words_result'): print(i.get('words'))
#基于百度ocr的测试 from aip import AipOcr import configparser config = {'appId': '', 'apiKey': '', 'secretKey': ''} client = AipOcr(**config) def get_file_content(file): with open(file, 'rb') as fp: return fp.read() def img_to_str(image_path): image = get_file_content(image_path) # 通用文字识别(可以根据需求进行更改) result = client.basicGeneral(image) return result if __name__ == '__main__': text = img_to_str('sourceImages/testOcr.png') print(text)
from aip import AipOcr import json APP_ID = '10706210' API_KEY = 'kh6kczBGNeE6zFDDFS6U6zC4' SECRET_KEY = 'L8D6xMO5BkVlISKGaG2TP90vhM0yd1eV' aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } def get_file_content(c_filePath): with open(c_filePath, 'rb') as fp: return fp.read() if __name__ == '__main__': q_filePath = "test.jpg" result = aipOcr.basicGeneral(get_file_content(q_filePath), options) c_Result_s = '' for word_s in result['words_result']: c_Result_s = c_Result_s + word_s['words'] print(c_Result_s)
from common.common_func import DRG_func from common.common_func_merchant import Drg_merchant import requests from aip import AipOcr import datetime import time """ 你的 APPID AK SK """ APP_ID = '19611635' API_KEY = '5kn9XvSsF19BPNuTtYeuPghP' SECRET_KEY = 'oRAxD3tm0ducNoF3dbzaVfBh9EQHiP6W' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) s = requests.session() #DF = DRG_func(s) DM = Drg_merchant(s) DM.merchant_login(username='******', password='******') #获取图片验证码,毫秒级时间戳 t = int(time.time() * 1000) smscode = 'https://spman.shb02.net/common/jcaptcha/create?%s' % t respnose = s.get(smscode) #图片验证码存储路径 path = 'F:\\dx.jpg' with open(path, 'wb') as f: f.write(respnose.content) f.close() """ 读取图片 """ def get_file_content(filePath):
# !/usr/bin/env python # encoding=utf-8 # Date: 2018-05-28 # Author: pangjian from aip import AipOcr APP_ID = '11312548' API_KEY = 'F8VHIMmovwN8oaINsLHTYNXk' SECRET_KEY = 'DVvrhcOjTAhRLphpgXjV9AuGRGinp1HQ' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_content(filePath): try: file = open(filePath, 'r') ret = file.read() return ret except: print 'read file error' finally: file.close() """ 如果有可选参数 """ options = {} options["language_type"] = "ENG" options["detect_direction"] = "true" options["detect_language"] = "true" options["probability"] = "true" image = get_file_content('image2.jpg') client.basicGeneral(image, options)
class sxufreesite: index_url = "" class_url = "" score_url = "" usrname = "" password = "" header = "" table = "" cookie = "" handler = "" values = {} tr_list = [] td_list = [] class_list = [] js_list = [] jxl_list_101 = [] jxl_list_102 = [] final_list = [] APP_ID = '11519354' API_KEY = 'tLlZhgC4kwx8ArqEhBXzCvRw' SECRET_KEY = 'GnpZ0XXBFgZXz8v0aYTGIMhHRMmlRKSd' def __init__(self): self.index_url = "http://bkjw.sxu.edu.cn/" self.class_url = "http://bkjw.sxu.edu.cn/ZNPK/KBFB_RoomSel.aspx" self.score_url = "http://bkjw.sxu.edu.cn/_data/login.aspx" self.cookie = cookiejar.CookieJar() self.handler = rq.HTTPCookieProcessor(self.cookie) self.opener = rq.build_opener(self.handler) self.header = { "Host":"bkjw.sxu.edu.cn", "Origin":"http://bkjw.sxu.edu.cn", "Content-Type":"application/x-www-form-urlencoded", "Referer":"http://bkjw.sxu.edu.cn/_data/login.aspx", "Upgrade-Insecure-Requests":"1", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36" } self.header2 = { "Host":"bkjw.sxu.edu.cn", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language":"zh-CN,zh;q=0.8", "Accept-Encoding":"gzip, deflate", "Referer":"http://bkjw.sxu.edu.cn/xscj/Stu_MyScore.aspx", "Content-Type":"application/x-www-form-urlencoded", "Content-Length":"76", "Cookie":"", "Connection":"keep-alive", "Upgrade-Insecure-Requests":"1" } self.jxl_list = ["101","105"] self.js_list_101 = ["1010101","1010102","1010103","1010104","1010105","1010106","1010107","1010108","1010109","1010110","1010111","1010112","1010113","1010114","1010115","1010201","1010202","1010203","1010204","1010205","1010206","1010207","1010208","1010301","1010302","1010303","1010304","1010305","1010306","1010307","1010308","1010401","1010402","1010501","1010502","1010503","1010504","1010505","1010506","1010507","1010508","1010509","1010510","1010511"] self.jxl_list_105 = ['1050101', '1050102', '1050103', '1050104', '1050105', '1050106', '1050107', '1050108', '1050109', '1050110', '1050111', '1050112', '1050113', '1050114', '1050115', '1050116','1050201', '1050202', '1050203', '1050204', '1050205', '1050206', '1050207', '1050208', '1050209','1050211', '1050212', '1050213', '1050214', '1050215', '1050216', '1050217', '1050218','1050301', '1050302', '1050303', '1050304', '1050305', '1050306', '1050307', '1050308', '1050309','1050310','1050311', '1050312', '1050313', '1050314', '1050315', '1050316', '1050317','1050401', '1050402', '1050403', '1050404', '1050405', '1050406', '1050407', '1050408', '1050409','1050501','1050502','1050503','1050504','1050505'] self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY) def get_img_code(self): req = rq.Request("http://bkjw.sxu.edu.cn/sys/ValidateCode.aspx",headers=self.header) with self.opener.open(req) as gec: # print(cookie) name = "imgCode.jpg" img_res = gec.read() with open(name,"wb") as ic: ic.write(img_res) print(self.cookie) # def get_score(self): # username = "******" # password = "******" # cookies = {} # for item in self.cookies: # cookies["name"] = item.name # cookies["value"] = item.value # cookies["domain"] = ".bkjw.sxu.edu.cn" # cookies["path"] = "/" # cookies["expires"] = None # browser2 = webdriver.PhantomJS() # # browser2.get("http://bkjw.sxu.edu.cn/sys/ValidateCode.aspx") # # ck2 = browser2.get_cookies() # # print(ck2) # browser = webdriver.PhantomJS() # browser.get("http://bkjw.sxu.edu.cn") # browser.delete_all_cookies() # browser.add_cookie(cookies) # browser.refresh() # browser.switch_to.frame(0) # browser.find_element_by_id("txt_asmcdefsddsd").send_keys(username) # browser.find_element_by_id("txt_pewerwedsdfsdff").send_keys(password) # browser.find_element_by_id("txt_sdertfgsadscxcadsads").click() # a = browser.get_screenshot_as_file("1.jpg") # im = Image.open("1.jpg") # box = (145,278,224,298) # region = im.crop(box) # region2 = region.convert("RGB") # region2.save("imgCode.jpg") # browser.add_cookie(cookies) # for j in range(20): # imgcodeidentify.deal_img("imgCode.jpg") # imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg") # imgcodeidentify.interference_point(imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg"),"imgCode.jpg") # code = self.client.basicGeneral(self.get_file_content("imgCode.jpg"))["words_result"][0]["words"] # browser.find_element_by_id("txt_sdertfgsadscxcadsads").send_keys(code) # ck = browser.get_cookies() # print(ck) # time.sleep(5) # browser.get_screenshot_as_file("1.jpg") def get_score(self,opener,username,password,yzm): h1 = hashlib.md5() h1.update(password.encode(encoding='utf-8')) hex_password = h1.hexdigest() temp_pwd = username+hex_password[:30].upper()+"10108" h2 = hashlib.md5() h2.update(temp_pwd.encode(encoding='utf-8')) hex_temp = h2.hexdigest() dsdsdsdsdxcxdfgfg = hex_temp[:30].upper() #密码 txt_asmcdefsddsd = username #用户名 h3 = hashlib.md5() h3.update(yzm.upper().encode(encoding='utf-8')) hex_temp_yzm = h3.hexdigest()[:30].upper()+'10108' h4 = hashlib.md5() h4.update(hex_temp_yzm.encode(encoding='utf-8')) fgfggfdgtyuuyyuuckjg = h4.hexdigest()[:30].upper() #验证码 __VIEWSTATE = "dDwyMTIyOTQxMzM0Ozs+AI2AQlMGeOYvPjA1fJfST57PPCk=" pcInfo = "Mozilla/5.0+(Windows+NT+10.0;+Win64;+x64;+rv:61.0)+Gecko/20100101+Firefox/61.0Windows+NT+10.0;+Win64;+x645.0+(Windows)+SN:NULL" Sel_Type = "STU" typeName = "学生" values = {} values["__VIEWSTATE"] = __VIEWSTATE values["dsdsdsdsdxcxdfgfg"] = dsdsdsdsdxcxdfgfg values["fgfggfdgtyuuyyuuckjg"] = fgfggfdgtyuuyyuuckjg values["pcInfo"] = pcInfo values["Sel_Type"] = Sel_Type values["txt_asmcdefsddsd"] = txt_asmcdefsddsd values["txt_pewerwedsdfsdff"] = "" values["txt_sdertfgsadscxcadsads"] = "" values["typeName"] = typeName data = urllib.parse.urlencode(values).encode('gb2312') #GB18030 req = rq.Request(self.score_url,data,headers=self.header) html = self.opener.open(req).read().decode('gb2312') print(data) print(html) # http://bkjw.sxu.edu.cn/xscj/Stu_MyScore_rpt.aspx # http://bkjw.sxu.edu.cn/xscj/Stu_MyScore_Drawimg.aspx?x=1&h=2&w=782&xnxq=20171&xn=2017&xq=1&rpt=1&rad=2&zfx=0&xh=201700004159 def post_score(self,opener): # sel_xn=2017&sel_xq=1&SJ=1&btn_search=%BC%EC%CB%F7&SelXNXQ=2&zfx_flag=0&zxf=0 data = "sel_xn=2017&sel_xq=1&SJ=1&btn_search=%BC%EC%CB%F7&SelXNXQ=2&zfx_flag=0&zxf=0".encode('GB18030') for item in self.cookie: self.header2["Cookie"] = item.name+'='+item.value print(self.header2) head2 = urllib.parse.urlencode(self.header2).encode('utf-8') request = rq.Request("http://bkjw.sxu.edu.cn/xscj/Stu_MyScore_Drawimg.aspx?x=1&h=2&w=782&xnxq=20171&xn=2017&xq=1&rpt=1&rad=2&zfx=0&xh=201700004159",head2)#,data self.header2 html = self.opener.open(request).read() with open("score.jpg","wb") as jpg: jpg.write(html) print(html) def get_file_content(self,filePath): with open(filePath, 'rb') as fp: result = fp.read() return result def post_data(self,opener,Sel_XNXQ,rad_gs,imgcode,Sel_XQ,Sel_JXL,Sel_ROOM): self.values["Sel_XNXQ"] = Sel_XNXQ self.values["rad_gs"] = rad_gs self.values["txt_yzm"] = imgcode self.values["Sel_XQ"] = Sel_XQ self.values["Sel_JXL"] = Sel_JXL self.values["Sel_ROOM"] = Sel_ROOM data = urllib.parse.urlencode(self.values).encode('GB18030') request = rq.Request("http://bkjw.sxu.edu.cn/ZNPK/KBFB_RoomSel_rpt.aspx", data, self.header) html = self.opener.open(request).read().decode('GB18030') reg = re.compile("<tr.*>.*</tr>") self.table = reg.findall(html)[0] return html def recommend_class(self): EmptyClassList = [] for i in range(5): for j in range(7): if self.tr_list[i][j] == "": t = (j+1,i+1) EmptyClassList.append(t) print("¸Ã½ÌÊÒÐÇÆÚ"+str(j+1)+"µÚ"+str(i+1)+"½Ú¿ÎΪ¿Õ½ÌÊÒ") return EmptyClassList def deal_table(self,html): soup = BeautifulSoup(html,"html5lib") td_list = soup.findAll(valign = "top") tr_list1 = [] tr_list2 = [] tr_list3 = [] tr_list4 = [] tr_list5 = [] count = 1 for i in td_list: if count <= 7: tr_list1.append(i.text) elif count <=14 and count >=8: tr_list2.append(i.text) elif count <=21 and count >=15: tr_list3.append(i.text) elif count <=28 and count >=22: tr_list4.append(i.text) elif count <=35 and count >=29: tr_list5.append(i.text) else: pass count = count + 1 self.tr_list.append(tr_list1) self.tr_list.append(tr_list2) self.tr_list.append(tr_list3) self.tr_list.append(tr_list4) self.tr_list.append(tr_list5) def main(self):#,xq,time count = 0 for i in self.js_list_101: while True: if count%10 == 0: opener = self.get_img_code() for j in range(30): imgcodeidentify.deal_img("imgCode.jpg") imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg") imgcodeidentify.interference_point(imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg"),"imgCode.jpg") try: code = self.client.basicGeneral(self.get_file_content("imgCode.jpg"))["words_result"][0]["words"] except IndexError: continue #self.client.basicGeneral(self.get_file_content("imgCode.jpg"))["words_result"][0]["words"] code = code.replace(" ","") print(code) # input("ÇëÊäÈëÑéÖ¤Âë\n") try: html = self.post_data(opener,"20171","1",code,"1","101",i) except IndexError: continue else: self.deal_table(html) temp_list = self.recommend_class() temp_dict = {} temp_dict[str(i)] = temp_list self.final_list.append(temp_dict) # print("Àí¿ÆÂ¥"+str(i)+"½ÌÊÒ²éѯÍê±Ï") count = count + 1 break
from aip import AipOcr import base64 """ 你的 APPID AK SK """ APP_ID = '10682639' API_KEY = 'yYkzlkhdkO4CsOo7fGHZmgXx' SECRET_KEY = 'DWIxGQsDGbuTY9v7qCC5t5VqOkDZC8c1' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('11.jpg') imagebase64 = base64.b64encode(image) print(len(imagebase64)) # """ 调用通用文字识别, 图片参数为本地图片 """ # client.basicGeneral(image); """ 如果有可选参数 """ options = {} # options["language_type"] = "CHN_ENG" # options["detect_direction"] = "true" # options["detect_language"] = "true" # options["probability"] = "true" """ 带参数调用通用文字识别, 图片参数为本地图片 """ # client.basicGeneral(image, options) #retbig = client.accurate(image, options)
class unlockScrapy(object): def __init__(self, driver): super(unlockScrapy, self).__init__() # selenium驱动 self.driver = driver self.WAPPID = '百度文字识别appid' self.WAPPKEY = '百度文字识别appkey' self.WSECRETKEY = '百度文字识别secretkey' # 百度文字识别sdk客户端 self.WCLIENT = AipOcr(self.WAPPID, self.WAPPKEY, self.WSECRETKEY) # 按顺序点击图片中的文字 def clickWords(self, wordsPosInfo): # 获取到大图的element imgElement = self.driver.find_element_by_xpath( "/html/body/div[3]/div[3]/img") # 根据上图文字在下图中的顺序依次点击下图中的文字 for info in wordsPosInfo: ActionChains(self.driver).move_to_element_with_offset( to_element=imgElement, xoffset=info['location']['left'] + 20, yoffset=info['location']['top'] + 20).click().perform() time.sleep(1) # 下载上面的小图和下面的大图 def downloadImg(self): # 小图的src codeSrc = self.driver.find_element_by_xpath( "/html/body/div[3]/div[1]/img").get_attribute("src") # 大图的src checkSrc = self.driver.find_element_by_xpath( "/html/body/div[3]/div[3]/img").get_attribute("src") # 保存下载 fh = open("code.jpeg", "wb") # 由于其src是base64编码的,因此需要以base64编码形式写入 fh.write(base64.b64decode(codeSrc.split(',')[1])) fh.close() fh = open("checkCode.jpeg", "wb") fh.write(base64.b64decode(checkSrc.split(',')[1])) fh.close() # 图片二值化,便于识别其中的文字 def chageImgLight(self): im = Image.open("code.jpeg") im1 = im.point(lambda p: p * 4) im1.save("code.jpeg") im = Image.open("checkCode.jpeg") im1 = im.point(lambda p: p * 4) im1.save("checkCode.jpeg") # 破解滑动 def unlockScroll(self): # 滑块element scrollElement = self.driver.find_elements_by_class_name( 'cpt-img-double-right-outer')[0] ActionChains( self.driver).click_and_hold(on_element=scrollElement).perform() ActionChains(self.driver).move_to_element_with_offset( to_element=scrollElement, xoffset=30, yoffset=10).perform() ActionChains(self.driver).move_to_element_with_offset( to_element=scrollElement, xoffset=100, yoffset=20).perform() ActionChains(self.driver).move_to_element_with_offset( to_element=scrollElement, xoffset=200, yoffset=50).perform() # 读取图片文件 def getFile(self, filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别上面小图中的文字 def iTow(self): try: op = {'language_type': 'CHN_ENG', 'detect_direction': 'true'} res = self.WCLIENT.basicAccurate(self.getFile('code.jpeg'), options=op) words = '' for item in res['words_result']: if item['words'].endswith('。'): words = words + item['words'] + '\r\n' else: words = words + item['words'] return words except: return 'error' # 识别下面大图中文字的坐标 def getPos(self, words): try: op = {'language_type': 'CHN_ENG', 'recognize_granularity': 'small'} res = self.WCLIENT.accurate(self.getFile('checkCode.jpeg'), options=op) # 所有文字的位置信息 allPosInfo = [] # 需要的文字的位置信息 needPosInfo = [] for item in res['words_result']: allPosInfo.extend(item['chars']) # 筛选出需要的文字的位置信息 for word in words: for item in allPosInfo: if word == item['char']: needPosInfo.append(item) return needPosInfo except Exception as e: print(e) def main(self): # 破解滑块 self.unlockScroll() time.sleep(2) # 下载图片 self.downloadImg() time.sleep(2) # 图像二值化,方便识别 self.chageImgLight() # 识别小图文字 text = self.iTow() # 获取大图的文字位置信息 posInfo = self.getPos(list(text)) # 由于小图或大图文字识别可能不准确,因此这里设置识别出的文字少于4个则重新识别 while len(posInfo) != 4 or len(text) != 4: # 点击重新获取图片,再次识别 self.driver.find_elements_by_xpath( '/html/body/div[3]/div[4]/div/a')[0].click() time.sleep(2) self.downloadImg() time.sleep(2) text = self.iTow() posInfo = self.getPos(list(text)) time.sleep(3) print('匹配成功,开始点击') # 点击下面大图中的文字 self.clickWords(posInfo) # 点击提交按钮 self.driver.find_elements_by_xpath( '/html/body/div[3]/div[4]/a')[0].click() time.sleep(2) # 如果破解成功,html的title会变 if self.driver.title != '携程在手,说走就走': print('破解成功') else: # 再次尝试 print('破解失败,再次破解') self.main()
from aip import AipOcr APP_ID = '11730410' API_KEY = 'Teuyu8PygKTn8KEdUqLTTvh1' SECRET_KEY = 'rIqEyFe6TkFTKrt7Isa9rvsG9vzTELCT ' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('example.jpg') """ 调用通用文字识别(高精度版) """ client.basicAccurate(image); """ 如果有可选参数 """ options = {} options["detect_direction"] = "true" options["probability"] = "true" """ 带参数调用通用文字识别(高精度版) """ client.basicAccurate(image, options)
import base64 from aip import AipOcr import re # client_id 为官网获取的AK, client_secret 为官网获取的SK host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=dvhSujnaY9qaWlWNenyCo2ZK&client_secret=82G08tOb0wYntAHD4oYdEMPGsvQEHinA' response = requests.get(host) content = response.json() access_token = content["access_token"] # 定义常量 APP_ID = '11650666' API_KEY = 'qQqkIOEYaLdMwA42op63gaLc' SECRET_KEY = 'Cnw2YGHs2n58CyVmLvyKAc5zwaQHMyVl' # 初始化文字识别分类器 aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) # 读取图片 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() imageAddr = open('paris.txt', 'r') fo = open('pictureToWords.txt', 'w+') for filepath in imageAddr.readlines(): filepath = filepath.replace('\n', '') image = open(filepath, 'rb').read() data = {'image': base64.b64encode(image).decode()}
from aip import AipOcr import re APP_ID = "18137435" API_KEY = "Rtast1dOns0Mf4ckdQyM8LKC" SECRET_KEY = "gpEkNxGpWkeqCbnTl3DsR1ni6h4fSCYV" client = AipOcr(APP_ID, API_KEY, SECRET_KEY) #将图片读取到程序中 with open(r"E:\image\aa.png", "rb") as f: image = f.read() data = str(client.basicGeneral(image)).replace(" ", "") print(data) pat = r"{'words': '(.*?)'}" pattern = re.compile(pat) res = pattern.findall(data)[0] print(res)
import uuid from os import remove import requests import enchant from PIL import Image, ImageFilter from aip import AipOcr app_id = '11565085' api_key = 'dh9pPBqw1H4hQQyPrk4HHVv6' secret_key = '6mjlcxPsT2NRs7wETIqs3xYBjz0pdyH5' client = AipOcr(app_id, api_key, secret_key) d = enchant.Dict('en_US') def process_image(filename): image = Image.open(filename) threshold_grey = 25 image = image.convert('L') im2 = Image.new("L", image.size, 255) for y in range(image.size[1]): for x in range(image.size[0]): pix = image.getpixel((x, y)) if int(pix) > threshold_grey: im2.putpixel((x, y), 255) else: im2.putpixel((x, y), 0) im2 = im2.filter(ImageFilter.MedianFilter()) im2.save(filename) return im2
def main(videoname): conf = config.getConfig(videoname) APP_ID = conf['APP_ID'] API_KEY = conf['API_KEY'] SECRET_KEY = conf['SECRET_KEY'] imgDir = conf['imgDir'] outputDir = conf['outputDir'] client = AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() def get_OCR(imgName): image = get_file_content(imgDir + '/' + imgName) options = {} options["recognize_granularity"] = "big" options["language_type"] = "CHN_ENG" options["detect_direction"] = "true" res = client.general(image, options) try: w = res['words_result'] print str(res) return w except: return False def is_img(f): return re.match(r'.+jpg', f) start = time.time() output = open(outputDir + str(start) + '.txt', 'a') pathDir = sorted(filter(is_img, os.listdir(imgDir))) positionData = [] for imgName in pathDir: output.write('Start: ' + imgName + '\n') ocrRes = get_OCR(imgName) # fail then retry while ocrRes == False: print 'Fail: ' + imgName ocrRes = get_OCR(imgName) for word in ocrRes: top = int(word['location']['top']) height = int(word['location']['height']) w = word['words'] has = False for group in positionData: # belong to this group if abs(group['top'] - top) < (group['height'] / 2): # Avoid duplicate: check if current word is similar to last word lastWord = group['words'][len(group['words']) - 1] if difflib.SequenceMatcher(None, lastWord, w).quick_ratio() > 0.8: break # append words group['words'].append(w) # cal new value group['totalTop'] += top group['totalHeight'] += height group['totalNum'] += 1 group['top'] = group['totalTop'] / group['totalNum'] group['height'] = group['totalHeight'] / group['totalNum'] has = True break if has == False: positionData.append({ 'top': top, # group standard, using average value of tops 'totalTop': top, 'height': height, 'totalHeight': height, 'totalNum': 1, # how many pics has been add to this group 'words': [w] }) output.write('Words: ' + w + '\n') output.write('Top: ' + str(word['location']['top']) + '\n') output.write('Height: ' + str(word['location']['height']) + '\n') output.write('Finished: ' + imgName + '\n') print 'Finished: ' + imgName output.write(str(positionData) + '\n') max_group = [] for group in positionData: if group['totalNum'] > len(max_group): max_group = group['words'] allWords = ','.join(max_group) output.write('-----------------------' + '\n') output.write(allWords + '\n') output.write('-----------------------' + '\n') end = time.time() output.write('Running time: ' + str(end - start) + '\n') output.close() print 'Finished All'
class IdentifyCaptchaPicture(object): """识别验证码类""" def __init__(self): self.output_graph = 'output_graph.pb' self.output_labels = 'output_labels.txt' self.captcha_picture = 'captcha_picture.jpg' self.english_to_chinese = english_to_chinese self.id_to_something = self._id_to_something() self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY) # 读取 output_labels.txt 文件 def _id_to_something(self): id_to_something = {} with open(self.output_labels, 'r') as f: lines = f.readlines() for index, line in enumerate(lines): line = line.strip() id_to_something[index] = line return id_to_something # 根据 id 找到 分类 def _find_string_by_id(self, id_test): if id_test in self.id_to_something: return self.id_to_something[id_test] else: return None # 切割验证码 def _cut_captcha_picture(self): image = Image.open(self.captcha_picture) one = image.crop((5, 41, 71, 107)) two = image.crop((77, 41, 143, 107)) three = image.crop((149, 41, 215, 107)) four = image.crop((221, 41, 287, 107)) five = image.crop((5, 113, 71, 179)) six = image.crop((77, 113, 143, 179)) seven = image.crop((149, 113, 215, 179)) eight = image.crop((221, 113, 287, 179)) all_picture_after_cut = [ one, two, three, four, five, six, seven, eight ] return all_picture_after_cut # 识别验证码的文字 def _identify_captcha_picture_text(self): image = Image.open(self.captcha_picture) text = image.crop((124, 0, 287, 26)) image_byte_array = io.BytesIO() text.save(image_byte_array, format='PNG') text = image_byte_array.getvalue() text = self.client.basicGeneral(text)['words_result'] try: return text[0]['words'] except (IndexError, Exception): return '' # 识别验证码 def identify_captcha_picture(self): all_picture_after_cut = self._cut_captcha_picture() with tf.gfile.FastGFile(self.output_graph, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') with tf.Session() as sess: text = self._identify_captcha_picture_text() print('识别出的文字是:', text) correct_choose = [] softmax_tensor = sess.graph.get_tensor_by_name('final_result:0') # 遍历每张切割好的图片 for index, one_picture in enumerate(all_picture_after_cut): image_byte_array = io.BytesIO() one_picture.save(image_byte_array, format='PNG') one_picture = image_byte_array.getvalue() prediction = sess.run( softmax_tensor, feed_dict={'DecodeJpeg/contents:0': one_picture}) prediction = np.squeeze(prediction) # 找到概率最大的分类 top_1 = np.argmax(prediction, axis=0) something = self._find_string_by_id(top_1) something_chinese = self.english_to_chinese[something] # print('100001:', text) for one_word in something_chinese: if one_word in text: correct_choose.append(str(index + 1)) break print('%s %0.5f%%' % (something_chinese, prediction[top_1] * 100)) # print('\n-----------------------------------------------\n') # print('correct_choose:', correct_choose) return correct_choose
def __init__(self): self.api_id = THEKEY2BD.THEKE2BD_api_id self.api_key = THEKEY2BD.THEKE2BD_api_key self.secret_key = THEKEY2BD.THEKE2BD_secret_key self.client = AipOcr(self.api_id, self.api_key, self.secret_key)
class MyApp(QMainWindow, Ui_MainWindow): ''' 使用PyQt5做GUI界面,调用百度文字识别API识别图片文字 ''' def __init__(self): QMainWindow.__init__(self) Ui_MainWindow.__init__(self) super().__init__() self.initUI() # 调用自定义的UI初始化函数initUI() self.status = False # 状态变量,如果是打开图片来转换的,设置status为True,以区分截图时调用的图片转换函数 self.APP_ID = '' self.API_KEY = '' self.SECRET_KEY = '' self.fileName1 = None self.fileName2 = None def initUI(self): ''' Initialize the window's UI ''' self.setupUi(self) self.setWindowTitle("图片转文字GUI程序") self.setWindowIcon(QIcon("../icons/eye.png")) # 设置图标,linux下只有任务栏会显示图标 self.initMenuBar() # 初始化菜单栏 self.initToolBar() # 初始化工具栏 self.initButton() # 初始化按钮 self.show() # 显示 def initMenuBar(self): ''' 初始化菜单栏 ''' menubar = self.menuBar() exitAct = QAction(QIcon('../icons/exit.png'), 'Exit', self) exitAct.setShortcut('Ctrl+Q') exitAct.triggered.connect(qApp.quit) fileMenu = menubar.addMenu('&File') fileMenu.addAction(exitAct) fileMenu = menubar.addMenu('&Help') def initToolBar(self): ''' 初始化工具栏 创建一个QAction实例exitAct,然后添加到designer已经创建的默认的工具栏toolBar里面 ''' exitAct = QAction(QIcon('../icons/exit.png'), 'Exit', self) exitAct.setShortcut('Ctrl+Q') exitAct.triggered.connect(qApp.quit) self.toolBar.addAction(exitAct) def initButton(self): ''' 初始化按钮 ''' self.btnBrowse.clicked.connect( self.browserButton_callback) # 按下按钮调用回调函数 self.btnBrowse.setToolTip("浏览需要转换的文件") # 设置提示 #self.btnBrowse.setStyleSheet("{border-image: url(/home/kindy/Files/python/gui/pyq/play.ico);}") # 此代码没有效果 self.btnScreen.clicked.connect( self.screenButton_callback) # 一旦按下按钮,连接槽函数进行处理 self.btnScreen.setToolTip("截取屏幕文字") self.btnConvert.clicked.connect(self.convertButton_callback) self.btnConvert.setToolTip("转换图片中的文字") def browserButton_callback(self): ''' 使用QFileDialog打开文件管理器 ''' #global fileName1 # 设置全局 self.status = True self.fileName1, filetype = QFileDialog.getOpenFileName( self, "选取图片文件", "/home/kindy/图片", "All Files (*);;Music Files (*.png)") #设置文件扩展名过滤,注意用双分号间隔 self.filePath.setText(self.fileName1) def screenButton_callback(self): ''' 打开截图,点击对勾号会自动保存在目录"../temp/temp.png" ''' #global fileName2 self.fileName2 = r'../temp/temp.png' self.cap = ScreenShot() self.cap.show() def convertButton_callback(self, filename): ''' 调用百度API进行文字识别 ''' # 初始化文字识别 self.APP_ID = self.appid.text() self.API_KEY = self.apikey.text() self.SECRET_KEY = self.screetkey.text() self.plainTextEdit.setPlainText('') self.plainTextEdit.setStatusTip('') if self.APP_ID == '' or self.API_KEY == '' or self.SECRET_KEY == '': if self.APP_ID == '': QMessageBox.critical(self, "标题", "请输入appid", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) if self.API_KEY == '': QMessageBox.critical(self, "标题", "请输入apikey", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) if self.SECRET_KEY == '': QMessageBox.critical(self, "标题", "请输入screetkey", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) else: self.aipOcr = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY) start = time.time() if self.fileName1 == None and self.fileName2 == None: QMessageBox.critical(self, "标题", "请选择一张图片,或进行截图", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) else: if self.status: res = self.aipOcr.webImage(getImageBytes(self.fileName1)) else: res = self.aipOcr.webImage(getImageBytes(self.fileName2)) print(res) flag = True for i in res: if ('error_code' in i or ('error_msg' in i)): flag = True else: flag = False if flag: QMessageBox.critical(self, "标题", "输入信息有误,请重新输入", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) else: txt = res['words_result'] text = str() for i in range(len(txt)): text += (str(txt[i]['words']) + '\n') self.plainTextEdit.setPlainText(text) print(text) end = time.time() self.plainTextEdit.setStatusTip("图片文字转换时间:%.2fs" % (end - start))
from aip import AipOcr APP_ID = '16600152' API_KEY = 'oNnWciQupqWPH871GU0T77dy' SECRET_KEY = 'xMNjsEhc1RbIxGlhWdX9ACDe5LjktDHi' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('C:\\Users\\povti\\Desktop\\test\\test2.jpg') # """ 调用通用文字识别, 图片参数为本地图片 """ # result=client.basicGeneral(image); # print(result) """ 如果有可选参数 """ options = {} # options["language_type"] = "CHN_ENG" # options["detect_direction"] = "true" # options["detect_language"] = "true" options["probability"] = "true" """ 带参数调用通用文字识别, 图片参数为本地图片 """ print(client.basicGeneral(image, options))
class BaseCls(object): STATUS_MAP = { 'B': '海关终审通过', 'P': '预审批通过', '': '', } def __init__(self, *args, **kwargs): self.UserAgent =settings.USER_AGENT self.CheckIndex = settings.CHECK_INDEX self.IndexUrl = settings.INDEX_URL self.ImageUrl = settings.IMAGE_URL self.ImageDir = settings.IMAGE_DIR self.ImageDir2 = settings.IMAGE_DIR2 self.CookieDir = settings.COOKIE_DIR self.CookieUrl = None self.aipOcr = AipOcr(settings.APP_ID, settings.API_KEY, settings.SECRET_KEY) self.session = MySession() def check_login(self): header = { 'Host': 'app.singlewindow.cn', 'Origin': 'http://app.singlewindow.cn', 'Referer': 'http://app.singlewindow.cn/cas/login?_local_login_flag=1&service=http://app.singlewindow.cn/cas/jump.jsp%3FtoUrl%3DaHR0cDovL2FwcC5zaW5nbGV3aW5kb3cuY24vY2FzL29hdXRoMi4wL2F1dGhvcml6ZT9jbGllbnRfaWQ9MTM2NyZyZXNwb25zZV90eXBlPWNvZGUmcmVkaXJlY3RfdXJpPWh0dHAlM0ElMkYlMkZzei5zaW5nbGV3aW5kb3cuY24lMkZkeWNrJTJGT0F1dGhMb2dpbkNvbnRyb2xsZXI=&localServerUrl=http://sz.singlewindow.cn/dyck&localDeliverParaUrl=/deliver_para.jsp&colorA1=d1e4fb&colorA2=66,%20124,%20193,%200.8', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Upgrade-Insecure-Requests': '1', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', } self.session.headers.update(header) self.session.cookies.update(self.get_cookie()) res = self.session.post(self.IndexUrl, timeout=30) if res.text.find("登录成功") > 0: log.info('已登录!!!') else: log.info('cookie失效,重新登录!!!') self.get_cookie(LOCAL_COOKIE_FLG=False) def get_file_content(self): image = Image.open(self.ImageDir) im = image img_grey = im.convert('L') threshold = 55 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) img_out = img_grey.point(table, '1') img_out.save(self.ImageDir2) with open(self.ImageDir2, 'rb') as f: return f.read() def get_login_info(self, headers=None): if headers is None: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", "Connection": "keep-alive", } res = requests.get(self.IndexUrl, headers=headers, timeout=30) # 这里用self.session的话会把session信息带过去,导致无法登录 # res = self.session.get(self.IndexUrl, headers=headers, timeout=30) # 这里用self.session的话会把session信息带过去,导致无法登录 content = etree.HTML(res.text) lt = content.xpath(r'//*[@id="fm1"]/p[1]/input[1]/@value')[0] execution = content.xpath(r'//*[@id="fm1"]/p[1]/input[2]/@value')[0] return lt, execution def know_Image(self, headers=None, timeout=30): start_time = time.time() used_time = 0 ret = {'value': None, 'error': None} if headers is None: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", "Connection": "keep-alive", } code_count = 0 while used_time < timeout: code_count += 1 res = self.session.get(self.ImageUrl, headers=headers, timeout=30) with open(self.ImageDir, "wb") as f: f.write(res.content) value = self.aipOcr.basicGeneral(self.get_file_content(), settings.OPTIONS) try: value = value['words_result'][0]['words'].replace(' ', '') except: continue if 4 == len(value) and re.match('^[0-9a-zA-Z]{4}$', value): log.info("已识别验证码%d张,验证码识别成功..." % code_count) log.info("验证码是:%r" % value) ret['value'] = value return ret used_time = int(time.time() - start_time) ret['error'] = '图片识别程序超时退出...' log.warning('图片识别程序超时退出...') return ret def get_login_cookie(self): lt, execution = self.get_login_info() data = { 'swy': settings.USERNAME, 'swm': hashlib.md5(settings.PASSWD.encode('utf8')).hexdigest(), 'swm2': '', 'verifyCode': self.know_Image().get('value'), 'lt': lt, '_eventId': 'submit', 'execution': execution, 'swLoginFlag': 'swUp', 'lpid': 'P1', } header = { 'Host': 'app.singlewindow.cn', 'Origin': 'http://app.singlewindow.cn', 'Referer': 'http://app.singlewindow.cn/cas/login?_local_login_flag=1&service=http://app.singlewindow.cn/cas/jump.jsp%3FtoUrl%3DaHR0cDovL2FwcC5zaW5nbGV3aW5kb3cuY24vY2FzL29hdXRoMi4wL2F1dGhvcml6ZT9jbGllbnRfaWQ9MTM2NyZyZXNwb25zZV90eXBlPWNvZGUmcmVkaXJlY3RfdXJpPWh0dHAlM0ElMkYlMkZzei5zaW5nbGV3aW5kb3cuY24lMkZkeWNrJTJGT0F1dGhMb2dpbkNvbnRyb2xsZXI=&localServerUrl=http://sz.singlewindow.cn/dyck&localDeliverParaUrl=/deliver_para.jsp&colorA1=d1e4fb&colorA2=66,%20124,%20193,%200.8', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Upgrade-Insecure-Requests': '1', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', } self.session.headers.update(header) res = self.session.post(self.IndexUrl, data=data, timeout=30) if res.text.find("登录成功") > 0: log.info('模拟登陆成功!!!') return True return False def save_cookie(self): with open(self.CookieDir, "w") as output: cookies = self.session.cookies.get_dict() json.dump(cookies, output) log.info("已在目录下生成cookie文件") def get_cookie(self, LOCAL_COOKIE_FLG=True): """1.不同进程怎么共享登录状态?""" if LOCAL_COOKIE_FLG and os.path.exists(self.CookieDir): print('cookie已存在...') with open(self.CookieDir, "r") as f: cookie = json.load(f) return cookie else: print('cookie不存在...') if os.path.exists(self.CookieDir): os.remove(self.CookieDir) return self.get_web_cookie() def get_web_cookie(self): retry = 10 while not self.get_login_cookie(): log.info('登陆失败,1S后重新登陆..') retry -= 1 print('retry = ', retry) if retry < 1: raise Exception('登录重试次数超过9次,程序退出') time.sleep(1) headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'Content-Type': 'application/json', 'Referer': None, } self.session.headers.update(headers) self.session.get(self.CookieUrl, timeout=30) self.save_cookie() return self.session.cookies.get_dict()
# -*- coding: utf-8 -*- from aip import AipOcr """ 你的 APPID AK SK """ APP_ID = '14836038' API_KEY = 'grTm0iuaEnyjNn0XZriXGkKU' SECRET_KEY = 'vUVtHedaEKmG8ecjWarhxSjmxWTBeIu7' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content(r'C:\Users\Administrator\Desktop\WebCrawler\day16\baiduAi\realbaiduAi\test_pic\sina.jpg') """ 调用通用文字识别, 图片参数为本地图片 """ # list_w=client.basicGeneral(image)['words_result'] list_w=client.basicAccurate(image)['words_result'] for i in list_w: for k in i.items(): print(i['words']) """ 如果有可选参数 """ options = {} options["language_type"] = "CHN_ENG" options["detect_direction"] = "true" options["detect_language"] = "true" options["probability"] = "true"
# -*- encoding:utf-8 -*- """ @作者:leel @文件名:character_recognize.py @时间:2020/7/2 12:27 @文档说明: """ import re from aip import AipOcr """ 你的 APPID AK SK """ APP_ID = '20711349' API_KEY = ' ErkKG3QtmuMWvrG1o9q3MVQz' SECRET_KEY = 'YxG1xekcAC2xD0FnV1GWEo2FuLnydGWI' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('example.jpg') """ 调用通用文字识别(含位置信息版), 图片参数为本地图片 """ data = client.general(image) print(data) data = str(data) pattern = re.compile("'words': '(.*?)'}") #:后有一个空格 res = pattern.findall(data)
def client(): '''百度api key''' l = [] l.append(AipOcr('', '', '')) return random.choice(l)
def __init__(self): app_id= pd.APP_ID api_key = pd.API_KEY secret_key = pd.SECRET_KEY self.client = AipOcr(app_id, api_key, secret_key)
#coding=utf-8 # 百度ocrapi,读取本地图片并识别 from aip import AipOcr # 百度AI部分 APP_ID = '15529244' API_KEY = 'TgXpSb1tWliUDeqrYLh722i7' SECRET_KEY = 'K19qjQ2An9LSEDwd143vCxpXU3whwOsz' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('111.png') results = client.general(image)["words_result"] outputtxt = [] for r in results: text = r["words"] outputtxt.append(text) #print(outputtxt) f = open('111.md','w',encoding='utf-8') for i in outputtxt: f.write(i + '\n') f.close()
self.textRect = None self.textInput.hide() self.textInput.clearText() self.redraw() def changeFont(self, font): self.fontNow = font ## 百度API参数 APP_ID = '9851066' API_KEY = 'LUGBatgyRGoerR9FZbV4SQYk' SECRET_KEY = 'fB2MNz1c2UHLTximFlC4laXPg7CVfyjV' # 初始化文字识别 aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) options = {'detect_direction': 'true', 'language_type': 'CHN_ENG'} # 使用QtCreator建立的ui文件路径 qtCreatorFile = "baidu-api.ui" # 使用uic加载 Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile) # 读取图片 def getImageBytes(filename): with open(filename, 'rb') as fp: return fp.read() class MyApp(QMainWindow, Ui_MainWindow):
from aip import AipOcr if __name__ == "__main__": # 此处填入在百度云控制台处获得的appId, apiKey, secretKey的实际值 appId, apiKey, secretKey = [ '23077616', '6FlEkzTZhREpWzPiWWtGGdjt', 'wqoMOGQEMGOlc8DVCyLfgjnKhinsSYH6' ] # 创建ocr对象 ocr = AipOcr(appId, apiKey, secretKey) with open('img/dizhi.png', 'rb') as fin: img = fin.read() res = ocr.basicGeneral(img) print(res)
#coding:utf-8 from aip import AipOcr import json '''你的 APPID AK SK ''' ''' https://cloud.baidu.com/doc/OCR/OCR-Python-SDK.html#.E9.80.9A.E7.94.A8.E6.96.87.E5.AD.97.E8.AF.86.E5.88.AB ''' APP_ID = '***' API_KEY = '***' SECRET_KEY = '***' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() image = get_file_content('example.jpg') text = client.basicGeneral(image) print text['log_id'] print type(text['words_result']) # with open('result.txt',r'a+') as my: # my.write(str(text['words_result']))
def __init__(self,app_id,api_key,secret_key): ## 用自己申请到的app id等内容初始化AipOcr self.aipOcr = AipOcr(app_id,api_key,secret_key) """ 读取图片 """
# 2.调用三方的sdk,来实现 import time import keyboard from PIL import ImageGrab from aip import AipOcr # 调用百度的sdk """ 你的 APPID AK SK """ APP_ID = '17134093' API_KEY = 'gDGaUOGMRX5cxqFOxgp5SGbm' SECRET_KEY = '2YOAu5p6MEpq9iWKR3yKRERfxkduWFWN' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() while 1: # 监听键盘按键 keyboard.wait(hotkey='f1') keyboard.wait(hotkey='ctrl+c') time.sleep(0.1)
# -*- coding: UTF-8 -*- from aip import AipOcr # 定义常量 APP_ID = '10764564' API_KEY = 'TxvXG9liAGvVVpSuYGZ0iwXW' SECRET_KEY = 'QG8SWcX4l98Q5HGxRhAnBKqhK9VmYGZL' # 初始化文字识别分类器 aipOcr=AipOcr(APP_ID, API_KEY, SECRET_KEY) # 读取图片 filePath = "wenzi.png" def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 定义参数变量 options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } # 网络图片文字文字识别接口 result = aipOcr.webImage(get_file_content(filePath),options) # 如果图片是url 调用示例如下 # result = apiOcr.webImage('http://www.xxxxxx.com/img.jpg')