def process_request(self, request, spider): # The proxy only works if the request comes from a spider that # have an operation associated (`catalog`, `collection`, etc) has_operation = hasattr(spider, '_operation') operations = ('catalog', 'collection', 'latest', 'manga') if not has_operation or spider._operation not in operations: return logger.debug( 'Process request - proxy: %s, url: %s' % (request.meta['proxy'] if 'proxy' in request.meta else 'no', request.url)) # If the proxy is already set, we are done if 'proxy' in request.meta: return if needs_proxy(spider.name): proxy = Proxy.objects.get_one(spider.name) if proxy: logger.info('Using proxy <%s> for request' % proxy) request.meta['proxy'] = 'http://%s' % proxy.proxy # Disable redirection when a proxy is in use request.meta['dont_redirect'] = True else: logger.error('No proxy found for %s' % spider.name)
def process_request(self, request, spider): # The proxy only works if the operation is fetch an issue or a # commection has_operation = hasattr(spider, '_operation') operations = ('catalog', 'collection', 'latest', 'manga') if not has_operation or spider._operation not in operations: return logger.debug( 'Process request - proxy: %s, url: %s' % (request.meta['proxy'] if 'proxy' in request.meta else 'no', request.url)) # If the proxy is already set, we are done if 'proxy' in request.meta: return if needs_proxy(spider.name): proxy = Proxy.objects.filter(source__spider=spider.name) proxy = proxy.order_by('?').first() if proxy: logger.info('Using proxy <%s> for request' % proxy) request.meta['proxy'] = 'http://%s' % proxy.proxy # Disable redirection when a proxy is in use request.meta['dont_redirect'] = True else: logger.error('No proxy found for %s' % spider.name)
def send(issues, user, accounts=None, loglevel=logging.WARNING): """Send a list of issues to an user.""" for issue in issues: issue.create_result_if_needed(user, Result.PROCESSING) # Split the issues in `fast` (direct access) and `slow` (needs proxy) fast_issues = [i for i in issues if not needs_proxy(i.manga.source.spider)] slow_issues = [i for i in issues if needs_proxy(i.manga.source.spider)] if fast_issues: scrape_job = scrape_issues.delay(fast_issues, accounts, loglevel) # This job also update the Result status create_mobi_and_send.delay(fast_issues, user, depends_on=scrape_job) if slow_issues: scrape_job = scrape_issues_slow.delay(slow_issues, accounts, loglevel) # This job also update the Result status create_mobi_and_send.delay(slow_issues, user, depends_on=scrape_job)
def _list_spiders(self, scrapy): """List current spiders than can be activated.""" header = 'List of current spiders:' spiders = scrapy.spider_list() self.stdout.write(header) self.stdout.write('=' * len(header)) self.stdout.write('') for spider in spiders: if needs_proxy(spider): self.stdout.write('- %s (PROXY)' % spider) else: self.stdout.write('- %s' % spider)