class Tester(object): def __init__(self): self.db = RedisClient() async def test_single_proxy(self, proxy): """ 使用异步请求库aiohttp对代理进行测试 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as r: if r.status == 200: self.db.max(proxy) print('代理可用:', proxy) else: self.db.exists(proxy) print('状态码不合法:', proxy) except Exception as e: self.db.decrease(proxy) print('代理请求异常:', proxy) def run(self): try: count = self.db.count() print('当前剩余{}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = self.db.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() # 刷新缓冲区 time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class AipClient(object): ''' 百度识别api ''' def __init__(self, appid, api_key, secrrt_key, redis_url): self.appid = appid self.api_key = api_key self.secrrt_key = secrrt_key self.client = AipOcr(appid, api_key, secrrt_key) self.redis = RedisClient(redis_url) def __new__(cls, *args, **kw): ''' api 单例模式 ''' if not hasattr(cls, '_instance'): cls._instance = super().__new__(cls) return cls._instance @property def options(self): return { "language_type": "CHN_ENG", "detect_direction": "false", "detect_language": "true", "probability": "true" } def General(self, image, **kwargs): print('调取General_api 识别') return self.client.basicGeneral(image, self.options) def Accurate(self, image): print('调取Accurate_api 识别') return self.client.basicAccurate(image, self.options) def orc(self, image, **kwargs): hash_value = MD5.md5(image) results = self.General(image, **kwargs) if results.get('words_result'): self.redis.add(hash_value, results['words_result'][0]['words']) return results['words_result'][0]['words'] results = self.Accurate(image) if results.get('words_result'): self.redis.add(hash_value, results['words_result'][0]['words']) return results['words_result'][0]['words'] return '*' def run(self, image, **kwargs): hash_value = MD5.md5(image) if self.redis.exists(hash_value): return self.redis.get(hash_value) else: return self.orc(image, **kwargs)
class AipClient(object): ''' 百度识别api ''' def __init__(self, appid, api_key, secrrt_key, redis_url): self.appid = appid self.api_key = api_key self.secrrt_key = secrrt_key self.client = AipOcr(appid, api_key, secrrt_key) self.redis = RedisClient(redis_url) def __new__(cls, *args, **kw): ''' api 单例模式 ''' if not hasattr(cls, '_instance'): cls._instance = super().__new__(cls) return cls._instance @property def options(self): return { "language_type": "CHN_ENG", "detect_direction": "false", "detect_language": "false", "probability": "true" } def General(self, image, **kwargs): print('调取General_api 识别') return self.client.basicGeneral(image, self.options) def Accurate(self, image): print('调取Accurate_api 识别') return self.client.basicAccurate(image, self.options) def orc(self, image, font_key, word, **kwargs): hash_value = MD5.md5(image) results = self.General(image, **kwargs) if results.get('words_result'): if results.get('words_result') != '*': result = results['words_result'][0]['words'] self.redis.add(hash_value, result) self.redis.hadd(font_key, word, result) return result results = self.Accurate(image) if results.get('words_result'): if results.get('words_result') != '*': result = results['words_result'][0]['words'] self.redis.add(hash_value, result) self.redis.hadd(font_key, word, result) return result if FIXED: '''手动修正''' if not os.path.exists(os.path.join(BASE_DIR, hash_value + '.jpg')): with open(os.path.join(BASE_DIR, hash_value + '.jpg'), 'wb') as f: f.write(image) return '*' def run(self, image, font_key, word, **kwargs): hash_value = MD5.md5(image) if self.redis.exists(hash_value): result = self.redis.get(hash_value) self.redis.hadd(font_key, word, result) return result else: return self.orc(image, font_key, word, **kwargs)
class Sender: def __init__(self): # VPS服务器上在 init 初始RedisClient 实例,下面调用会阻塞住, # 暂时不清楚原因,只能在每个方法里面实例化RedisClient # self.redis = RedisClient() self.timer = time.time() def get_ip(self, ifname=ADSL_IFNAME): """ 获取本机IP :param ifname: 网卡名称 :return: """ (status, output) = subprocess.getstatusoutput('ifconfig') if status == 0: pattern = re.compile(ifname + r'.*?inet.*?(\d+\.\d+\.\d+\.\d+).*?netmask', re.S) result = re.search(pattern, output) if result: ip = result.group(1) return ip def test_proxy(self, proxy): """ 测试代理 :param proxy: 代理 :return: 测试结果 """ proxies = { 'http': f'http://{PROXY_USER}:{PROXY_PASSWORD}@{proxy}', 'https': f'https://{PROXY_USER}:{PROXY_PASSWORD}@{proxy}' } try: response = requests.get(TEST_URL, proxies=proxies, timeout=TEST_TIMEOUT) if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def remove_proxy(self): """ 移除代理 :return: None """ self.redis = RedisClient() self.redis.remove(CLIENT_NAME) print('Successfully Removed Proxy') def set_proxy(self, proxy): """ 设置代理 :param proxy: 代理 :return: None """ self.redis = RedisClient() if self.redis.set(CLIENT_NAME, proxy): print('Successfully Set Proxy', proxy) def remove_proxy(self): """ 移除代理 :return: None """ self.redis = RedisClient() self.redis.remove(CLIENT_NAME) print('Successfully Removed Proxy') def exists_proxy(self): """ 代理是否存在 :return: bool """ self.redis = RedisClient() return self.redis.exists(CLIENT_NAME) def count_time_interval(self): return time.time() - self.timer def adsl(self): """ 拨号主进程 :return: None """ while True: print('ADSL Start, Remove Proxy, Please wait') (status, output) = subprocess.getstatusoutput(ADSL_BASH) if status == 0: print('ADSL Successfully') ip = self.get_ip() if ip: print('Now IP', ip) print('Testing Proxy, Please Wait') proxy = '{ip}:{port}'.format(ip=ip, port=PROXY_PORT) if self.test_proxy(proxy): print('Valid Proxy') self.set_proxy(proxy) print('Sleeping') time.sleep(ADSL_CYCLE) try: self.remove_proxy() except ConnectionError : print('redis链接错误') else: print('Invalid Proxy') else: print('Get IP Failed, Re Dialing') time.sleep(ADSL_ERROR_CYCLE) else: print('ADSL Failed, Please Check') time.sleep(ADSL_ERROR_CYCLE)
class AipClient(object): ''' 百度识别api ''' def __init__(self, appid, api_key, secrrt_key, redis_url): self.appid = appid self.api_key = api_key self.secrrt_key = secrrt_key self.client = AipOcr(appid, api_key, secrrt_key) self.redis = RedisClient(redis_url) def __new__(cls, *args, **kw): ''' api 单例模式 ''' if not hasattr(cls, '_instance'): cls._instance = super().__new__(cls) return cls._instance @property def options(self): return {"language_type":"CHN_ENG", "detect_direction":"false", "detect_language":"false", "probability":"false"} def General(self, image,**kwargs): print('调取General_api 识别') return self.client.basicGeneral(image, self.options) def Accurate(self, image): print('调取Accurate_api 识别') return self.client.basicAccurate(image, self.options) def orc(self, image, font_key, word, **kwargs): hash_value = MD5.md5(image) results = self.General(image, **kwargs) if results.get('words_result'): if results.get('words_result') != '*': result = results['words_result'][0]['words'] self.redis.add(hash_value, result) self.redis.hadd(font_key, word, result) return result results = self.Accurate(image) if results.get('words_result'): if results.get('words_result') != '*': result = results['words_result'][0]['words'] self.redis.add(hash_value, result) self.redis.hadd(font_key, word, result) return result # Image.open(BytesIO(image)).show() # print(hash_value) return '*' def run(self, image, font_key,word, **kwargs): hash_value = MD5.md5(image) if self.redis.exists(hash_value): result = self.redis.get(hash_value) self.redis.hadd(font_key, word, result) return result else: return self.orc(image, font_key, word, **kwargs)