def init_proxies_queue(self, proxies=getHttpProxy()): self.redis.delete(self.http_proxies_queue_redis_key) buffer = [] for proxy in proxies: buffer.append(str(proxy)) self.redis.rpush(self.http_proxies_queue_redis_key, *buffer) self.check_avaliable_proxiex()
def init_proxies_queue(self, proxies=getHttpProxy()): super(ContinueMaster, self).init_proxies_queue(proxies=proxies)
def init_proxies_queue(self, proxies=getHttpProxy()): super(FirstMaster, self).init_proxies_queue(proxies=proxies)
# Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/topics/item-pipeline.html # -*- coding: utf-8 -*- from fake_useragent import UserAgent from multiprocess.core.HttpProxy import getHttpProxy, getHttpsProxy http_proxies, https_proxies = getHttpProxy(), getHttpsProxy() from scrapy_redis.connection import get_redis_from_settings class CustomHeadersDownLoadMiddleware(object): def __init__(self, crawler): self.crawler = crawler self.setting = crawler.settings self.spider = crawler.spider self.spider_name = self.spider.name self.http_proxies_queue_redis_key = self.setting.get( "HTTP_PROXIES_QUEUE_REDIS_KEY", "%(name)s:http_proxies_queue") % { "name": self.spider_name } self.logger.info(self.http_proxies_queue_redis_key) self.user_agent = UserAgent() self.redis = get_redis_from_settings(self.setting) self.current_proxy = self.get_new_proxy() @classmethod def from_crawler(cls, crawler): return cls(crawler)