def mark_fail(self, data): """ 对第二次或以上的单个代理IP数据进行验证失败的打分更新操作, 将combo_fail+1,combo_success置0,以及对其扣分,满足删除条件则直接删除 :param data:单个IP代理数据 dict 类型 """ if data: ip = data['ip'] port = data['port'] proxy = ':'.join([ip, port]) _score = data['score'] _count = data['test_count'] _f_count = data['fail_count'] _success_rate = data['success_rate'] _combo_fail = data['combo_fail'] valid_time = time_to_date(int(time.time())) data['score'] = round( _score - FAIL_BASIC * ((_f_count + 1) / (_count + 1)) * (_combo_fail + 1), 2) data['combo_fail'] = _combo_fail + 1 data['combo_success'] = 0 data['test_count'] = _count + 1 data['fail_count'] = _f_count + 1 data['valid_time'] = valid_time success_rate = round(1 - ((_f_count + 1) / (_count + 1)), 3) data['success_rate'] = str(success_rate * 100) + '%' data['stability'] = round( data['score'] * data['test_count'] * success_rate / PRECISION, 4) if (_count >= 100 and _success_rate <= str(MIN_SUCCESS_RATE*100)+'%') or \ int(_score) < 0: logger.warning('Deleting unstable proxy: %s ' % proxy) self.db.delete({'ip': ip, 'port': port}) else: self.db.update({'ip': ip, 'port': port}, data)
def mark_success(self, data): """ 代理IP数据经过验证器验证成功,进行第一次的打分存储 :param data: 单个要存储的代理IP数据,dict类型 """ ip = data['ip'] port = data['port'] # proxy = ':'.join([ip,port]) _data = self.db.select({'ip': ip, 'port': port}) if _data: self.mark_update(data) return address = get_ip_addr_03(ip) elapsed = round(int(data['resp_time'].replace('ms', '')) / 1000, 3) score = round(100 - 10 * (elapsed - 1), 2) stability = round(score / PRECISION, 4) valid_time = time_to_date(int(time.time())) data['createdTime'] = valid_time data['valid_time'] = valid_time data['address'] = address data['score'] = score data['test_count'] = 1 data['stability'] = stability data['success_rate'] = '100%' self.db.save(data)
def mark_update(self, data, collected=True): """ 对单个代理IP数据进行验证成功的打分更新操作, 将combo_success+1,combo_fail置0,以及对其加分 :param data: 单个代理IP数据 dict类型 :param collected: 是否是第一次进行验证的代理 """ ip = data['ip'] port = data['port'] proxy = ':'.join([ip, port]) valid_time = time_to_date(int(time.time())) data['valid_time'] = valid_time elapsed = round(int(data['resp_time'].replace('ms', '')) / 1000, 3) score = round(100 - 10 * (elapsed - 1), 2) if collected: try: _one_data = self.db.select({'ip': ip, 'port': port})[0] except Exception as e: return else: _one_data = data if _one_data: _score = _one_data['score'] if int(_score) < 0: logger.warning('Deleting unstable proxy: %s ' % proxy) self.db.delete({'ip': ip, 'port': port}) return _count = _one_data['test_count'] _f_count = _one_data['fail_count'] _address = _one_data['address'] _combo_success = _one_data['combo_success'] _created_time = _one_data['createdTime'] _success_rate = round( float(_one_data['success_rate'].replace('%', '')) / 100, 4) score = round((score + _score * _count) / (_count + 1) + SUCCESS_BASIC * (_combo_success + 1) * _success_rate, 2) address = get_ip_addr_03(ip) address = _address if address == 'unknown' else address success_rate = round(1 - (_f_count / (_count + 1)), 3) stability = round(score * (_count + 1) * success_rate / PRECISION, 4) data['fail_count'] = _f_count data['createdTime'] = _created_time data['combo_fail'] = 0 data['address'] = address data['score'] = score data['test_count'] = _count + 1 data['combo_success'] = _combo_success + 1 data['success_rate'] = str(success_rate * 100) + '%' data['stability'] = stability if data.get('_id', False): del data['_id'] self.db.update({'ip': ip, 'port': port}, data)
async def async_visit_target(self, db, url, proxy, bullet, sem, session, scan=True): """ 异步请求协程,对单个代理IP数据进行异步验证 :param db:处理操作的数据库 :param url:目标网站url :param proxy:要验证对目标网址是否有用的代理IP,dict类型 :param bullet:单个代理ip对象的所有数据 :param sem:协程并发信号量 :param session:异步请求session :param scan:是否进行的是目标库扫描操作,False则表示进行的是初次入库验证 """ data = { 'ip': bullet['ip'], 'port': bullet['port'], 'anony_type': bullet['anony_type'], 'address': bullet['address'], 'createdTime': bullet['createdTime'], 'score': bullet['score'], 'test_count': int(bullet['test_count']) + 1, 'url': url, } db_name = gen_target_db_name(url) async with sem: ret = await send_async_http(session, 'head', url, retries=RETRIES, headers=headers, proxy=proxy['http'], timeout=TIMEOUT) t, code = ret['cost'], ret['code'] if code == 200: data['score'] = round( (bullet['score'] * bullet['test_count'] + round( (1 - t / 15) * 100, 2)) / data['test_count'], 2) data['total'] = round(data['score'] * data['test_count'], 2) data['resp_time'] = str(t) + 's' data['valid_time'] = time_to_date(int(time.time())) if scan: self.update(db, data, db_name) else: self.success(db, data, db_name) else: if scan: self.fail(db, data, db_name)
def update(self,db,bullet,tname): """ 验证成功后对已存在于目标库中的代理数据进行更新 :param db: 处理操作的数据库对象 :param bullet: 单个代理ip对象的所有数据 :param tname: 目标url对应的数据集合 """ ip = bullet['ip'] port = bullet['port'] if bullet['createdTime']=='': bullet['createdTime']=time_to_date(int(time.time())) bullet['address'] = get_ip_addr(ip) if bullet['address'] == 'unknown' or \ bullet['address'] == '' else bullet['address'] db.update({'ip':ip,'port':port},bullet,tname=tname)
def success(self,db,bullet,tname): """ 初次在Validator中调用触手成功验证目标url后进行入库操作 :param db: 处理操作的数据库对象 :param bullet: 单个代理ip对象的所有数据 :param tname: 目标url对应的数据集合 """ ip = bullet['ip'] port = bullet['port'] _data = db.select({'ip':ip,'port':port},tname=tname) bullet['address'] = get_ip_addr(ip) if bullet['address'] == 'unknown' or\ bullet['address'] == '' else bullet['address'] if _data: bullet['_id'] = _data[0]['_id'] self.update(db,bullet,tname) return bullet['createdTime'] = time_to_date(int(time.time())) try: db.save(bullet,tname=tname) except Exception as e: logger.error('%s,msg: %s ' % (e.__class__, e)) return