Esempio n. 1
0
 def init_proxies_queue(self, proxies=getHttpProxy()):
     self.redis.delete(self.http_proxies_queue_redis_key)
     buffer = []
     for proxy in proxies:
         buffer.append(str(proxy))
     self.redis.rpush(self.http_proxies_queue_redis_key, *buffer)
     self.check_avaliable_proxiex()
Esempio n. 2
0
 def init_proxies_queue(self, proxies=getHttpProxy()):
     super(ContinueMaster, self).init_proxies_queue(proxies=proxies)
Esempio n. 3
0
 def init_proxies_queue(self, proxies=getHttpProxy()):
     super(FirstMaster, self).init_proxies_queue(proxies=proxies)
Esempio n. 4
0
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/topics/item-pipeline.html

# -*- coding: utf-8 -*-
from fake_useragent import UserAgent
from multiprocess.core.HttpProxy import getHttpProxy, getHttpsProxy

http_proxies, https_proxies = getHttpProxy(), getHttpsProxy()
from scrapy_redis.connection import get_redis_from_settings


class CustomHeadersDownLoadMiddleware(object):
    def __init__(self, crawler):
        self.crawler = crawler
        self.setting = crawler.settings
        self.spider = crawler.spider
        self.spider_name = self.spider.name
        self.http_proxies_queue_redis_key = self.setting.get(
            "HTTP_PROXIES_QUEUE_REDIS_KEY", "%(name)s:http_proxies_queue") % {
                "name": self.spider_name
            }
        self.logger.info(self.http_proxies_queue_redis_key)
        self.user_agent = UserAgent()
        self.redis = get_redis_from_settings(self.setting)
        self.current_proxy = self.get_new_proxy()

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)