def process_request(self, request, spider): token_and_used = one_valid_token(self.req_count, self.tk_alive) if token_and_used is None: log.msg(format='No token alive', level=log.INFO, spider=spider) raise CloseSpider('No Token Alive') token, used = token_and_used if used > self.per_token_hours_limit - self.buffer_size: calibration(self.req_count, self.tk_alive, self.per_token_hours_limit) token, _ = one_valid_token(self.req_count, self.tk_alive) tk_status = token_status(token) reset_time_in, remaining = tk_status if remaining < BUFFER_SIZE: log.msg( format= 'REACH API REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS', level=log.WARNING, spider=spider, reset_time_in=reset_time_in) time.sleep(reset_time_in) log.msg(format='Request token: %(token)s used: %(used)s', level=log.INFO, spider=spider, token=token, used=used) request.headers['Authorization'] = 'OAuth2 %s' % token
def process_request(self, request, spider): token_and_used = one_valid_token(self.req_count, self.tk_alive) if token_and_used is None: log.msg(format='No token alive', level=log.INFO, spider=spider) raise CloseSpider('No Token Alive') token, used = token_and_used if used > self.per_token_hours_limit - self.buffer_size: while 1: token, used = one_valid_token(self.req_count, self.tk_alive) remaining = self.per_token_hours_limit - used token_key, token_secret = token.split('_') if remaining < self.buffer_size: log.msg(format='[Token] %(token)s REACH API REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS', level=log.WARNING, spider=spider, token=token, reset_time_in=SLEEP_TIME_CHECK) time.sleep(SLEEP_TIME_CHECK) else:#当remaining恢复时,跳出sleep循环 break ip_addr, ip_used = one_ip_request_count(self.req_ip_count, self.localIp) if ip_used > self.per_ip_hours_limit - self.buffer_size: while 1: ip_addr, ip_used = one_ip_request_count(self.req_ip_count, self.localIp) if ip_used < self.per_ip_hours_limit - self.buffer_size: break else: log.msg(format='[Token] %(ip)s REACH IP REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS', level=log.WARNING, spider=spider, ip=ip_addr, reset_time_in=SLEEP_TIME_CHECK) time.sleep(SLEEP_TIME_CHECK) log.msg(format='Request token: %(token)s used: %(used)s, Request IP: %(ip)s used: %(ip_used)s', level=log.INFO, spider=spider, token=token, used=used, ip=ip_addr, ip_used=ip_used) token_key, token_secret = token.split('_') oauth4Scrapy = OAuth4Scrapy(self.api_key, self.api_secret) oauth4Scrapy.setToken(token_key, token_secret) headers = oauth4Scrapy.getAuthHeaders(request.url) request.headers.update(headers)
def process_request(self, request, spider): token_and_used = one_valid_token(self.req_count, self.tk_alive) if token_and_used is None: log.msg(format='No token alive', level=log.INFO, spider=spider) raise CloseSpider('No Token Alive') token, used = token_and_used if used > self.per_token_hours_limit - self.buffer_size: calibration(self.req_count, self.tk_alive, self.per_token_hours_limit) token, _ = one_valid_token(self.req_count, self.tk_alive) tk_status = token_status(token) reset_time_in, remaining = tk_status if remaining < BUFFER_SIZE: log.msg(format='REACH API REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS', level=log.WARNING, spider=spider, reset_time_in=reset_time_in) time.sleep(reset_time_in) log.msg(format='Request token: %(token)s used: %(used)s', level=log.INFO, spider=spider, token=token, used=used) request.headers['Authorization'] = 'OAuth2 %s' % token