def build_check_request(self, item: Proxy): scheme = item.get('scheme') proxy_url = item.get('url') self.logger.debug('Checking %s' % proxy_url) url, response_parser = self.get_check_approach(scheme) url = url.format(scheme=scheme) meta = { 'proxy': proxy_url, 'max_retry_times': 5, 'download_timeout': 20, '_item_obj': item, '_response_parser': response_parser, } req = Request(url, callback=self.check_ip, meta=meta, dont_filter=True) if self.name == 'checker': req.errback = self.check_ip_failed return req
def build_check_recipient(self, ip, port, scheme, user=None, password=None): """ 1. build a request for availability checking 2. drop it if already existed :return: Request """ if self.complete_condition(): raise exceptions.CloseSpider('Enough items') spec = dict(ip=ip, port=port, scheme=scheme) if self.already_exists(spec): self.logger.debug('Dropped duplicated: %s' % spec.values()) return {} # drop it proxy_url = utils.build_proxy_url(ip, port, scheme, user, password) need_auth = bool(user and password) item = Proxy( ip=ip, scheme=scheme, port=port, need_auth=need_auth, url=proxy_url, ) if need_auth: item['user'], item['password'] = user, password return self.build_check_request(item)
def start_requests(self): keys = self.srv.get_all_keys() for key in keys: data = self.srv.hgetall_dict(key) last_check = data.get('last_check', 0) if not valid_format(data): self.srv.delete(key, 'Error format %s' % data) continue if exceed_check_period(last_check): item = Proxy(**data) yield self.build_check_request(item)
def _validate_type(self): if not isinstance(self._item, Proxy): self._item = Proxy(self._item)