def _update_src(self): for info in self._src_apis: try: platform = info.get('platform') api = info.get('api') parse_mould = info.get('parse_mould') rsp = requests.get(api) if not rsp: log.error('Exception(%s): ' % platform + api) continue if not parse_mould: log.error('Exception: parse_mould is None.') continue all_ips = parse_mould(rsp.text) http_ips = self._proxy_active_check(all_ips.get('HTTP', [])) CacheManager().smadd('%s:http' % self.proxy_conf.source_key, http_ips) log.info('Proxies To HTTP Was Growth:%d' % len(http_ips)) https_ips = self._proxy_active_check(all_ips.get('HTTPS', [])) CacheManager().smadd('%s:https' % self.proxy_conf.source_key, https_ips) CacheManager().smadd('%s:http' % self.proxy_conf.source_key, https_ips) log.info('Proxies To HTTPS Was Growth:%d' % len(https_ips)) except Exception as e: log.error('Exception[IP_SOURCE]:') log.exception(e)
def redis(): from tddc import RecordManager from tddc import CacheManager from tddc import StatusManager RecordManager() StatusManager().set_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10', 'test:event:status:value:test_id_10', 'client1', '1000') StatusManager().set_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10', 'test:event:status:value:test_id_10', 'client1', '1200') StatusManager().set_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10', 'test:event:status:value:test_id_10', 'tddc_worker_monitor_host_id', '1200') print(StatusManager().get_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10', 'client2')) print(StatusManager().get_the_hash_value_for_the_hash('test:event:status:test', 'test_id_10')) print(RecordManager().get_record_sync('tddc.event.record.crawler.a', '1-1505811162.56-1799', _callback)) RecordManager().logger.debug('Record') CacheManager() print(CacheManager().get_random('tddc:proxy:pool:che300')) CacheManager().logger.info('Cache') StatusManager() print(StatusManager().get_status('tddc.Task.Status.che300.1200', 'XRTDepEFx255UPyqEZEJsG')) StatusManager().logger.error('Status')
def _check(self, tag, proxy_type): cnt = 0 gevent.sleep(5) while True: try: if not len(self._rules_moulds[proxy_type]): gevent.sleep(10) continue proxy = CacheManager().get_random( '%s:%s' % (self.proxy_conf.source_key, proxy_type)) if not proxy: if not cnt % 6 and tag == 1: log.warning('No Proxy(%s).' % proxy_type) cnt += 1 gevent.sleep(10) continue for platform, cls in self._rules_moulds[proxy_type].items(): ret = cls(proxy) if ret.useful: CacheManager().set( '%s:%s' % (self.proxy_conf.pool_key, platform), proxy) log.debug('[%s:%s:%s]' % (proxy_type, platform, proxy)) except Exception as e: log.exception(e)
def remove_proxy(self, task, proxy): """ 从代理缓存池中移除当前 proxy :param task: :param proxy: :return: """ if not proxy: return proxy = proxy.split('//')[1] if task.proxy == 'ADSL': CacheManager().remove('tddc:proxy:adsl', proxy) else: CacheManager().remove( '%s:%s' % (task_conf.pool_key, task.platform), proxy)
def _get_adsl_proxy(self): while True: try: ip = self.adsl_server.get_ip() except Exception as e: self.warning(e.message) else: proxy = '%s:52460' % ip CacheManager().set('tddc:proxy:adsl', proxy) return proxy
def _redial_adsl_proxy(self): while True: try: ip = self.adsl_server.redial() except Exception as e: self.warning(e.message) else: if not ip: continue proxy = '%s:52460' % ip CacheManager().set('tddc:proxy:adsl', proxy) return proxy
def _update_adsl(self): proxy = CacheManager().get_random('tddc:proxy:adsl', False) if proxy or self.switching: return self.switching = True try: self.adsl_proxy = self._redial_adsl_proxy() if self.adsl_proxy: log.info('ADSL Proxy(%s) Was Updated.' % self.adsl_proxy) except Exception as e: log.warning(e.message) self.switching = False
def process_request(self, request, spider): ''' process request ''' proxy = request.meta.get('proxy') if not proxy: task, _ = request.meta.get('item') if task.proxy: if task.proxy not in ['http', 'https', 'HTTP', 'HTTPS', 'ADSL', 'adsl']: request.meta['proxy'] = task.proxy return if task.proxy == 'ADSL': ip_port = CacheManager().get_random('tddc:proxy:adsl', False) # auth = base64.encodestring('tddc_crawler:tddc_crawler!@#$%^') # request.headers['Proxy-Authorization'] = 'Basic ' + auth else: ip_port = CacheManager().get_random('%s:%s' % (self.proxy_conf.pool_key, task.platform)) if ip_port: request.headers['X-Forwarded-For'] = ip_port.split(':')[0] if not ip_port: return Response(url=request.url, status=-1000, request=request) ip, port = ip_port.split(':') proxy = '{}://{}:{}'.format(lower(getattr(task, 'proxy_type', 'http')), ip, port) request.meta['proxy'] = proxy
def _back_pool(): CacheManager().set( '%s:%s' % (task_conf.pool_key, task.platform), proxy.split('//')[1])