コード例 #1
0
 def _update_src(self):
     for info in self._src_apis:
         try:
             platform = info.get('platform')
             api = info.get('api')
             parse_mould = info.get('parse_mould')
             rsp = requests.get(api)
             if not rsp:
                 log.error('Exception(%s): ' % platform + api)
                 continue
             if not parse_mould:
                 log.error('Exception: parse_mould is None.')
                 continue
             all_ips = parse_mould(rsp.text)
             http_ips = self._proxy_active_check(all_ips.get('HTTP', []))
             CacheManager().smadd('%s:http' % self.proxy_conf.source_key,
                                  http_ips)
             log.info('Proxies To HTTP Was Growth:%d' % len(http_ips))
             https_ips = self._proxy_active_check(all_ips.get('HTTPS', []))
             CacheManager().smadd('%s:https' % self.proxy_conf.source_key,
                                  https_ips)
             CacheManager().smadd('%s:http' % self.proxy_conf.source_key,
                                  https_ips)
             log.info('Proxies To HTTPS Was Growth:%d' % len(https_ips))
         except Exception as e:
             log.error('Exception[IP_SOURCE]:')
             log.exception(e)
コード例 #2
0
ファイル: test.py プロジェクト: Lockeysama/DistributedCrawler
def redis():
    from tddc import RecordManager
    from tddc import CacheManager
    from tddc import StatusManager
    RecordManager()
    StatusManager().set_the_hash_value_for_the_hash('test:event:status:test',
                                                    'test_id_10',
                                                    'test:event:status:value:test_id_10',
                                                    'client1',
                                                    '1000')
    StatusManager().set_the_hash_value_for_the_hash('test:event:status:test',
                                                    'test_id_10',
                                                    'test:event:status:value:test_id_10',
                                                    'client1',
                                                    '1200')
    StatusManager().set_the_hash_value_for_the_hash('test:event:status:test',
                                                    'test_id_10',
                                                    'test:event:status:value:test_id_10',
                                                    'tddc_worker_monitor_host_id',
                                                    '1200')
    print(StatusManager().get_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10', 'client2'))
    print(StatusManager().get_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10'))
    print(RecordManager().get_record_sync('tddc.event.record.crawler.a',
                                          '1-1505811162.56-1799',
                                          _callback))
    RecordManager().logger.debug('Record')
    CacheManager()
    print(CacheManager().get_random('tddc:proxy:pool:che300'))
    CacheManager().logger.info('Cache')
    StatusManager()
    print(StatusManager().get_status('tddc.Task.Status.che300.1200',
                                     'XRTDepEFx255UPyqEZEJsG'))
    StatusManager().logger.error('Status')
コード例 #3
0
 def _check(self, tag, proxy_type):
     cnt = 0
     gevent.sleep(5)
     while True:
         try:
             if not len(self._rules_moulds[proxy_type]):
                 gevent.sleep(10)
                 continue
             proxy = CacheManager().get_random(
                 '%s:%s' % (self.proxy_conf.source_key, proxy_type))
             if not proxy:
                 if not cnt % 6 and tag == 1:
                     log.warning('No Proxy(%s).' % proxy_type)
                 cnt += 1
                 gevent.sleep(10)
                 continue
             for platform, cls in self._rules_moulds[proxy_type].items():
                 ret = cls(proxy)
                 if ret.useful:
                     CacheManager().set(
                         '%s:%s' % (self.proxy_conf.pool_key, platform),
                         proxy)
                     log.debug('[%s:%s:%s]' % (proxy_type, platform, proxy))
         except Exception as e:
             log.exception(e)
コード例 #4
0
 def remove_proxy(self, task, proxy):
     """
     从代理缓存池中移除当前 proxy
     :param task:
     :param proxy:
     :return:
     """
     if not proxy:
         return
     proxy = proxy.split('//')[1]
     if task.proxy == 'ADSL':
         CacheManager().remove('tddc:proxy:adsl', proxy)
     else:
         CacheManager().remove(
             '%s:%s' % (task_conf.pool_key, task.platform), proxy)
コード例 #5
0
 def _get_adsl_proxy(self):
     while True:
         try:
             ip = self.adsl_server.get_ip()
         except Exception as e:
             self.warning(e.message)
         else:
             proxy = '%s:52460' % ip
             CacheManager().set('tddc:proxy:adsl', proxy)
             return proxy
コード例 #6
0
 def _redial_adsl_proxy(self):
     while True:
         try:
             ip = self.adsl_server.redial()
         except Exception as e:
             self.warning(e.message)
         else:
             if not ip:
                 continue
             proxy = '%s:52460' % ip
             CacheManager().set('tddc:proxy:adsl', proxy)
             return proxy
コード例 #7
0
 def _update_adsl(self):
     proxy = CacheManager().get_random('tddc:proxy:adsl', False)
     if proxy or self.switching:
         return
     self.switching = True
     try:
         self.adsl_proxy = self._redial_adsl_proxy()
         if self.adsl_proxy:
             log.info('ADSL Proxy(%s) Was Updated.' % self.adsl_proxy)
     except Exception as e:
         log.warning(e.message)
     self.switching = False
コード例 #8
0
ファイル: proxy.py プロジェクト: hejunling/tddc_crawler
 def process_request(self, request, spider):
     '''
     process request
     '''
     proxy = request.meta.get('proxy')
     if not proxy:
         task, _ = request.meta.get('item')
         if task.proxy:
             if task.proxy not in ['http', 'https', 'HTTP', 'HTTPS', 'ADSL', 'adsl']:
                 request.meta['proxy'] = task.proxy
                 return
             if task.proxy == 'ADSL':
                 ip_port = CacheManager().get_random('tddc:proxy:adsl', False)
                 # auth = base64.encodestring('tddc_crawler:tddc_crawler!@#$%^')
                 # request.headers['Proxy-Authorization'] = 'Basic ' + auth
             else:
                 ip_port = CacheManager().get_random('%s:%s' % (self.proxy_conf.pool_key,
                                                                task.platform))
                 if ip_port:
                     request.headers['X-Forwarded-For'] = ip_port.split(':')[0]
             if not ip_port:
                 return Response(url=request.url, status=-1000, request=request)
             ip, port = ip_port.split(':')
             proxy = '{}://{}:{}'.format(lower(getattr(task, 'proxy_type', 'http')), ip, port)
             request.meta['proxy'] = proxy
コード例 #9
0
 def _back_pool():
     CacheManager().set(
         '%s:%s' % (task_conf.pool_key, task.platform),
         proxy.split('//')[1])