Example #1
0
    def process_request(self, request, spider):
        token_and_used = one_valid_token(self.req_count, self.tk_alive)
        if token_and_used is None:
            log.msg(format='No token alive', level=log.INFO, spider=spider)

            raise CloseSpider('No Token Alive')
        token, used = token_and_used

        if used > self.per_token_hours_limit - self.buffer_size:
            calibration(self.req_count, self.tk_alive,
                        self.per_token_hours_limit)
            token, _ = one_valid_token(self.req_count, self.tk_alive)
            tk_status = token_status(token)
            reset_time_in, remaining = tk_status
            if remaining < BUFFER_SIZE:
                log.msg(
                    format=
                    'REACH API REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS',
                    level=log.WARNING,
                    spider=spider,
                    reset_time_in=reset_time_in)

                time.sleep(reset_time_in)

        log.msg(format='Request token: %(token)s used: %(used)s',
                level=log.INFO,
                spider=spider,
                token=token,
                used=used)
        request.headers['Authorization'] = 'OAuth2 %s' % token
Example #2
0
    def process_request(self, request, spider):
        token_and_used = one_valid_token(self.req_count, self.tk_alive)
        if token_and_used is None:
            log.msg(format='No token alive',
                    level=log.INFO, spider=spider)

            raise CloseSpider('No Token Alive')
        token, used = token_and_used

        if used > self.per_token_hours_limit - self.buffer_size:
            while 1:
                token, used = one_valid_token(self.req_count, self.tk_alive)
                remaining = self.per_token_hours_limit - used
                token_key, token_secret = token.split('_')   

                if remaining < self.buffer_size:
                    log.msg(format='[Token] %(token)s REACH API REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS',
                            level=log.WARNING, spider=spider, token=token, reset_time_in=SLEEP_TIME_CHECK)
                
                    time.sleep(SLEEP_TIME_CHECK)
                else:#当remaining恢复时,跳出sleep循环
                    break

        ip_addr, ip_used = one_ip_request_count(self.req_ip_count, self.localIp)
        if ip_used > self.per_ip_hours_limit - self.buffer_size:
            while 1:
                ip_addr, ip_used = one_ip_request_count(self.req_ip_count, self.localIp)
                if ip_used < self.per_ip_hours_limit - self.buffer_size:
                    break
                else:
                    log.msg(format='[Token] %(ip)s REACH IP REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS',
                            level=log.WARNING, spider=spider, ip=ip_addr, reset_time_in=SLEEP_TIME_CHECK)
                
                    time.sleep(SLEEP_TIME_CHECK)

        log.msg(format='Request token: %(token)s used: %(used)s, Request IP: %(ip)s used: %(ip_used)s',
                level=log.INFO, spider=spider, token=token, used=used, ip=ip_addr, ip_used=ip_used)    

        token_key, token_secret = token.split('_')
        oauth4Scrapy = OAuth4Scrapy(self.api_key, self.api_secret)
        oauth4Scrapy.setToken(token_key, token_secret)  
        headers = oauth4Scrapy.getAuthHeaders(request.url)
        request.headers.update(headers)
Example #3
0
    def process_request(self, request, spider):
        token_and_used = one_valid_token(self.req_count, self.tk_alive)
        if token_and_used is None:
            log.msg(format='No token alive',
                    level=log.INFO, spider=spider)

            raise CloseSpider('No Token Alive')
        token, used = token_and_used

        if used > self.per_token_hours_limit - self.buffer_size:
            calibration(self.req_count, self.tk_alive, self.per_token_hours_limit)
            token, _ = one_valid_token(self.req_count, self.tk_alive)
            tk_status = token_status(token)
            reset_time_in, remaining = tk_status
            if remaining < BUFFER_SIZE:
                log.msg(format='REACH API REQUEST BUFFER, SLEEP %(reset_time_in)s SECONDS',
                        level=log.WARNING, spider=spider, reset_time_in=reset_time_in)

                time.sleep(reset_time_in)

        log.msg(format='Request token: %(token)s used: %(used)s',
                level=log.INFO, spider=spider, token=token, used=used)
        request.headers['Authorization'] = 'OAuth2 %s' % token