def get_info(self, html=None): """Get hit card info, which is the old info with updated new time.""" if not html: time.sleep(1) res = self.sess.get(self.base_url) html = res.content.decode() try: old_infos = re.findall(r'oldInfo: ({[^\n]+})', html) if len(old_infos) != 0: old_info = json.loads(old_infos[0]) else: raise RegexMatchError("未发现缓存信息,请先至少手动成功打卡一次再运行脚本") def_info = json.loads(re.findall(r'def = ({[^\n]+})', html)[0]) magic_code = re.findall( r'"([0-9a-z]{32})": "([0-9]{10})","([0-9a-z]{32})":"([0-9a-z]{32})"', html)[0] magic_code_group = { magic_code[0]: magic_code[1], magic_code[2]: magic_code[3] } except IndexError as err: raise RegexMatchError( 'Relative info not found in html with regex: ' + str(err)) except json.decoder.JSONDecodeError as err: raise DecodeError('JSON decode error: ' + str(err)) new_info = def_info.copy() new_info.update(magic_code_group) ocr = ddddocr.DdddOcr() resp = self.sess.get(self.captcha_url) # form change new_info['szgjcs'] = "" new_info['zgfx14rfhsj'] = "" new_info['geo_api_info'] = old_info['geo_api_info'] # 定位 new_info['address'] = old_info['address'] new_info['area'] = old_info['area'] new_info['city'] = old_info['city'] new_info['ismoved'] = 0 new_info['sfzx'] = old_info['sfzx'] # 在校 new_info['sfymqjczrj'] = old_info['sfymqjczrj'] # 入境 new_info['sfqrxxss'] = 1 # 属实 new_info['campus'] = '紫金港校区' #校区 new_info['verifyCode'] = ocr.classification(resp.content)#验证码 self.info = new_info # print(json.dumps(self.info)) return new_info
def yzm(): try: # 获取验证码 # 获取验证码 operation = True counter = 0 while (operation): if counter > 5: operation = False WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, "//*[@id='imgObjjgRegist']"))) imgelement = driver.find_elements_by_xpath( '//*[@id="imgObjjgRegist"]') # 定位验证码 if not imgelement: return try: imgelement[0].screenshot('./save.png') except Exception as e: print("截图失败") print(e) counter += 1 continue # 验证码识别 ocr = ddddocr.DdddOcr() with open('./save.png', 'rb') as f: img_bytes = f.read() res = ocr.classification(img_bytes) f.close() print(res) driver.find_element_by_id('yzm').send_keys(res) driver.find_element_by_id('pass-dialog').click() counter += 1 sleep(1) if not driver.find_elements_by_class_name("weui-toptips_warn"): operation = False except Exception as e: print("验证码处理失败") print(e)
def do_auto_work(browser): browser.get("https://hk.sz.gov.cn:8118/userPage/login") mouse = ActionChains(browser) msgbox = browser.find_element( By.XPATH, '//*[@id="winLoginNotice"]/div[@class="flexbox btngroup"]') mouse.move_to_element(msgbox).click() mouse.perform() card_type_node = browser.find_element( By.XPATH, '//select[@id="select_certificate"]') card_selector = Select(card_type_node) card_selector.select_by_index(1) id_card_node = browser.find_element(By.XPATH, '//input[@id="input_idCardNo"]') id_card_node.send_keys("111111") pwd_node = browser.find_element(By.XPATH, '//input[@id="input_pwd"]') pwd_node.send_keys("111111") # # # 隐式等待 # browser.implicitly_wait(5) # img_verify_node = browser.find_element(By.XPATH, '//*[@id="img_verify"]') # 显式等待 wait = WebDriverWait(browser, 30) img_verify_node = wait.until( expected_conditions.presence_of_element_located( (By.XPATH, '//*[@id="img_verify"]'))) bytes = base64.b64decode(img_verify_node.screenshot_as_base64) img_verify_ocr_result = ddddocr.DdddOcr().classification(bytes) input_verify_code_node = browser.find_element( By.XPATH, '//input[@id="input_verifyCode"]') input_verify_code_node.send_keys(img_verify_ocr_result)
import ddddocr ocr = ddddocr.DdddOcr(show_ad=False) def get_num(img_bytes) -> int: ret = ocr.classification(img_bytes) ret = ret or '0' if 'l' in ret: ret = '1' return int(ret)
import random import time import traceback from tempfile import NamedTemporaryFile from typing import List, Callable, Dict import ddddocr from loguru import logger from selenium.common.exceptions import NoSuchElementException, TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions from selenium.webdriver.support.select import Select from selenium.webdriver.support.wait import WebDriverWait ocr = ddddocr.DdddOcr() class PageOperations: operations = {} # type: Dict[str, Callable] operation_children_dict = {} # type: Dict[str, List[str]] def __init__(self): raise NotImplementedError() @staticmethod def run(name: str, kwargs=None): if kwargs is None: kwargs = {} try: logger.info(f"run operation {name}")
if ocr_mode == "本地1": # 这一行代码用于关闭tensorflow的gpu模式(如果使用,内存占用翻几倍) os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import muggle_ocr # 初始化;model_type 包含了 ModelType.OCR/ModelType.Captcha 两种 sdk = muggle_ocr.SDK(model_type=muggle_ocr.ModelType.OCR) if ocr_mode == "本地2": import tr if ocr_mode == "本地3": import ddddocr ocr = ddddocr.DdddOcr(show_ad=False, old=True) if ocr_mode == "本地4": import easyocr # 'ch_tra' 中文繁体 easyocr_reader = easyocr.Reader(['ch_sim', 'en'], gpu=False, verbose=False) # this needs to run only once to load the model into memory else: if ocr_mode == "网络1": import queue # 这一行创建了发包队列 baidu_queue = queue.Queue(baidu_QPS) config = { 'appId': 'PCR', 'apiKey': baidu_apiKey,
def __init__(self): self.oldocr = ddddocr.DdddOcr(old=True, show_ad=False) self.ocr = ddddocr.DdddOcr(show_ad=False) self.det = ddddocr.DdddOcr(det=True, show_ad=False)
def detect(input): ocr = ddddocr.DdddOcr(show_ad=0) with input as f: res = ocr.classification(f.read()) return res