Esempio n. 1
0
 def mark_fail(self, data):
     """
     对第二次或以上的单个代理IP数据进行验证失败的打分更新操作,
     将combo_fail+1,combo_success置0,以及对其扣分,满足删除条件则直接删除
     :param data:单个IP代理数据 dict 类型
     """
     if data:
         ip = data['ip']
         port = data['port']
         proxy = ':'.join([ip, port])
         _score = data['score']
         _count = data['test_count']
         _f_count = data['fail_count']
         _success_rate = data['success_rate']
         _combo_fail = data['combo_fail']
         valid_time = time_to_date(int(time.time()))
         data['score'] = round(
             _score - FAIL_BASIC * ((_f_count + 1) / (_count + 1)) *
             (_combo_fail + 1), 2)
         data['combo_fail'] = _combo_fail + 1
         data['combo_success'] = 0
         data['test_count'] = _count + 1
         data['fail_count'] = _f_count + 1
         data['valid_time'] = valid_time
         success_rate = round(1 - ((_f_count + 1) / (_count + 1)), 3)
         data['success_rate'] = str(success_rate * 100) + '%'
         data['stability'] = round(
             data['score'] * data['test_count'] * success_rate / PRECISION,
             4)
         if (_count >= 100 and _success_rate <= str(MIN_SUCCESS_RATE*100)+'%') or \
                 int(_score) < 0:
             logger.warning('Deleting unstable proxy: %s ' % proxy)
             self.db.delete({'ip': ip, 'port': port})
         else:
             self.db.update({'ip': ip, 'port': port}, data)
Esempio n. 2
0
 def mark_success(self, data):
     """
     代理IP数据经过验证器验证成功,进行第一次的打分存储
     :param data: 单个要存储的代理IP数据,dict类型
     """
     ip = data['ip']
     port = data['port']
     # proxy = ':'.join([ip,port])
     _data = self.db.select({'ip': ip, 'port': port})
     if _data:
         self.mark_update(data)
         return
     address = get_ip_addr_03(ip)
     elapsed = round(int(data['resp_time'].replace('ms', '')) / 1000, 3)
     score = round(100 - 10 * (elapsed - 1), 2)
     stability = round(score / PRECISION, 4)
     valid_time = time_to_date(int(time.time()))
     data['createdTime'] = valid_time
     data['valid_time'] = valid_time
     data['address'] = address
     data['score'] = score
     data['test_count'] = 1
     data['stability'] = stability
     data['success_rate'] = '100%'
     self.db.save(data)
Esempio n. 3
0
 def mark_update(self, data, collected=True):
     """
     对单个代理IP数据进行验证成功的打分更新操作,
     将combo_success+1,combo_fail置0,以及对其加分
     :param data: 单个代理IP数据 dict类型
     :param collected: 是否是第一次进行验证的代理
     """
     ip = data['ip']
     port = data['port']
     proxy = ':'.join([ip, port])
     valid_time = time_to_date(int(time.time()))
     data['valid_time'] = valid_time
     elapsed = round(int(data['resp_time'].replace('ms', '')) / 1000, 3)
     score = round(100 - 10 * (elapsed - 1), 2)
     if collected:
         try:
             _one_data = self.db.select({'ip': ip, 'port': port})[0]
         except Exception as e:
             return
     else:
         _one_data = data
     if _one_data:
         _score = _one_data['score']
         if int(_score) < 0:
             logger.warning('Deleting unstable proxy: %s ' % proxy)
             self.db.delete({'ip': ip, 'port': port})
             return
         _count = _one_data['test_count']
         _f_count = _one_data['fail_count']
         _address = _one_data['address']
         _combo_success = _one_data['combo_success']
         _created_time = _one_data['createdTime']
         _success_rate = round(
             float(_one_data['success_rate'].replace('%', '')) / 100, 4)
         score = round((score + _score * _count) / (_count + 1) +
                       SUCCESS_BASIC * (_combo_success + 1) * _success_rate,
                       2)
         address = get_ip_addr_03(ip)
         address = _address if address == 'unknown' else address
         success_rate = round(1 - (_f_count / (_count + 1)), 3)
         stability = round(score * (_count + 1) * success_rate / PRECISION,
                           4)
         data['fail_count'] = _f_count
         data['createdTime'] = _created_time
         data['combo_fail'] = 0
         data['address'] = address
         data['score'] = score
         data['test_count'] = _count + 1
         data['combo_success'] = _combo_success + 1
         data['success_rate'] = str(success_rate * 100) + '%'
         data['stability'] = stability
         if data.get('_id', False): del data['_id']
         self.db.update({'ip': ip, 'port': port}, data)
Esempio n. 4
0
 async def async_visit_target(self,
                              db,
                              url,
                              proxy,
                              bullet,
                              sem,
                              session,
                              scan=True):
     """
     异步请求协程,对单个代理IP数据进行异步验证
     :param db:处理操作的数据库
     :param url:目标网站url
     :param proxy:要验证对目标网址是否有用的代理IP,dict类型
     :param bullet:单个代理ip对象的所有数据
     :param sem:协程并发信号量
     :param session:异步请求session
     :param scan:是否进行的是目标库扫描操作,False则表示进行的是初次入库验证
     """
     data = {
         'ip': bullet['ip'],
         'port': bullet['port'],
         'anony_type': bullet['anony_type'],
         'address': bullet['address'],
         'createdTime': bullet['createdTime'],
         'score': bullet['score'],
         'test_count': int(bullet['test_count']) + 1,
         'url': url,
     }
     db_name = gen_target_db_name(url)
     async with sem:
         ret = await send_async_http(session,
                                     'head',
                                     url,
                                     retries=RETRIES,
                                     headers=headers,
                                     proxy=proxy['http'],
                                     timeout=TIMEOUT)
         t, code = ret['cost'], ret['code']
         if code == 200:
             data['score'] = round(
                 (bullet['score'] * bullet['test_count'] + round(
                     (1 - t / 15) * 100, 2)) / data['test_count'], 2)
             data['total'] = round(data['score'] * data['test_count'], 2)
             data['resp_time'] = str(t) + 's'
             data['valid_time'] = time_to_date(int(time.time()))
             if scan:
                 self.update(db, data, db_name)
             else:
                 self.success(db, data, db_name)
         else:
             if scan:
                 self.fail(db, data, db_name)
Esempio n. 5
0
 def update(self,db,bullet,tname):
     """
     验证成功后对已存在于目标库中的代理数据进行更新
     :param db: 处理操作的数据库对象
     :param bullet: 单个代理ip对象的所有数据
     :param tname: 目标url对应的数据集合
     """
     ip = bullet['ip']
     port = bullet['port']
     if bullet['createdTime']=='':
         bullet['createdTime']=time_to_date(int(time.time()))
     bullet['address'] = get_ip_addr(ip) if bullet['address'] == 'unknown' or \
                                            bullet['address'] == '' else bullet['address']
     db.update({'ip':ip,'port':port},bullet,tname=tname)
Esempio n. 6
0
 def success(self,db,bullet,tname):
     """
     初次在Validator中调用触手成功验证目标url后进行入库操作
     :param db: 处理操作的数据库对象
     :param bullet: 单个代理ip对象的所有数据
     :param tname: 目标url对应的数据集合
     """
     ip = bullet['ip']
     port = bullet['port']
     _data = db.select({'ip':ip,'port':port},tname=tname)
     bullet['address'] = get_ip_addr(ip) if bullet['address'] == 'unknown' or\
                                            bullet['address'] == '' else bullet['address']
     if _data:
         bullet['_id'] = _data[0]['_id']
         self.update(db,bullet,tname)
         return
     bullet['createdTime'] = time_to_date(int(time.time()))
     try:
         db.save(bullet,tname=tname)
     except Exception as e:
         logger.error('%s,msg: %s ' % (e.__class__, e))
         return