def load(self) -> list: ls = [] if self._num is None: return ls if self._context and self._context.logger: self._context.logger.info('SixSixIPProxySpider: loading proxy list.') url = SixSixIPProxySpider._POOL_URL.format(self._num) reg = re.compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)(?=<br />)') try: res = requests.get(url, proxies=self._sys_proxy, timeout=self._timeout) for match in reg.finditer(res.text): try: for protocol in ('http', 'https'): proxy = Proxy() proxy.ip = match.group(1) proxy.port = match.group(2) proxy.protocol = protocol proxy.proxy_url = self.proxy_url(proxy.ip, proxy.port, proxy.protocol) proxy.collect_time = Datetime.now() proxy.local = Config.local ls.append(proxy) except: pass return ls except: if self._context and self._context.logger: self._context.logger.exception('SixSixIPProxySpider: Failed be load proxy list.') raise
def load(self) -> list: ls = [] if self._context and self._context.logger: self._context.logger.info('FatezeroProxySpider: loading proxy list.') try: res = requests.get(FatezeroProxySpider._POOL_URL, proxies=self._sys_proxy, timeout=self._timeout) for text in res.text.split('\n'): try: p = json.loads(text, encoding='utf-8') proxy = Proxy() proxy.ip = p['host'] proxy.port = p['port'] proxy.protocol = p['type'] proxy.proxy_url = self.proxy_url(proxy.ip, proxy.port, proxy.protocol) proxy.collect_time = Datetime.now() proxy.local = Config.local ls.append(proxy) except: pass if self._num is None: return ls else: return ls[:self._num] except: if self._context and self._context.logger: self._context.logger.exception('FatezeroProxySpider: Failed be load proxy list.') raise