def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.redis = connection.from_settings(settings) # Ensure the connection is working. self.redis.ping() #self.redis = settings.get("RCONN", redis.Redis(crawler.settings.get('REDIS_HOST', 'localhsot'), crawler.settings.get('REDIS_PORT', 6379))) initCookie(self.redis, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get( "RCONN", redis.Redis(crawler.settings.get('REDIS_HOST', 'localhsot'), crawler.settings.get('REDIS_PORT', 6379))) initCookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get( 'RCONN', redis.Redis(crawler.settings.get('REDIS_HOST', '192.168.195.1'), crawler.settings.get('REDIS_PORT', 6379))) initCookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get( "RCONN", redis.Redis(host=crawler.settings.get('REDIS_HOST', 'localhsot'), port=crawler.settings.get('REDIS_PORT', 6379), password=crawler.settings.get('REDIS_PASSWORD', ''))) initCookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) #self.rconn = redis.from_url(settings['REDIS_URL'], db=1, decode_responses=True) ##decode_responses设置取出的编码为str # 首次登陆获取cookies res = QichabaoCookie().init_cookie() # 存入redis中 PyRedis().get_redis().set("qichabao:Cookies", json.dumps(res, ensure_ascii=False))
def __init__(self, settings, crawler): #自己获取的ip self.TIMES = 10 RetryMiddleware.__init__(self, settings) self.rconn = settings.get( "RCONN", redis.Redis(crawler.settings.get('REDIS_HOST', 'localhsot'), crawler.settings.get('REDIS_PORT', 6379)))
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) # xiaoman self.rconn = redis.from_url(settings['REDIS_URL'], db=1, decode_responses=True) # weixin # self.rconn = redis.from_url(settings['REDIS_URL'], db=4, decode_responses=True) self.cookie_tool = CookieTool() self.cookie_tool.init_cookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): # 重载父类 RetryMiddleware.__init__(self, settings) # decode_responses 设置取出的编码为str # settings['REDIS_URL'] 访问scrapy的settings self.redis_connection = redis.from_url(settings['REDIS_URL'], db=14, decode_responses=True) # 往redis中添加cookie。第二个参数就是spidername的获取方法(其实就是字典啦!) init_cookie(self.redis_connection, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) # 设置cookie的redis连接 self.rconn = settings.get( "RCONN", redis.Redis(settings.get('COOKIE_REDIS_HOST', 'localhsot'), settings.get('COOKIE_REDIS_PORT', 6379), settings.get('COOKIE_REDIS_DB', 2))) # 初始化所有cookie,将cookie放入redis init_cookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) REDIS_HOST = settings.get('REDIS_HOST') REDIS_PORT = settings.get('REDIS_PORT') REDIS_DB = settings.get('COOKIES_DB') self.r = redis.Redis( host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True) #decode_responses设置取出的编码为str self.init_cookie(crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get( "RCONN", redis.Redis(crawler.settings.get('REDIS_HOST', 'localhost'), crawler.settings.get('REDIS_PORT', 6379), 2)) if crawler.spider.name not in ["SoGouSpider", "QxjSpider"]: # 由于微博的cookie是以spider.name管理的,这里为了统一,就都使用“SinaSpider” name = "SinaSpider" # name = crawler.spider.name initCookie(self.rconn, name)
def __init__(self, settings, crawler): self.logger = logging.getLogger("---Cookies池---") RetryMiddleware.__init__(self, settings) # 模拟登陆初始化 cookies,若注释掉需要手动向数据库中写入 cookies # 写入格式为: username(string) cookies(json string) # | username | cookies | # | "*****@*****.**" | "{"SSOLoginState": "1570157316", "SUB": "_2A25wksNUDeRhGedI7lER9i_Jzj6IHXVQfO0crDV6PUJbktANLW7HkW1NVzqyC0ntAqR8szHeQCefNRM41xZZJ3YI"}" | # init_cookies() client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT) db = client[MONGO_DB_NAME] col = db[COOKIES_COLLECTION_NAME] self.cookies_pool = [] for item in col.find(): self.cookies_pool.append(json.loads(item['cookies']))
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.proxy_list = settings.get('PROXY_LIST') self.proxies = {} for line in self.proxy_list: parts = re.match('(\w+://)(\w+:\w+@)?(.+)', line) # Cut trailing @ if parts.group(2): user_pass = parts.group(2)[:-1] else: user_pass = '' self.proxies[parts.group(1) + parts.group(3)] = user_pass
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.proxy_list = settings.get('PROXY_LIST') fin = open(self.proxy_list) self.proxies = {} for line in fin.readlines(): parts = re.match('(\w+://)(\w+:\w+@)?(.+)', line) # Cut trailing @ if parts.group(2): user_pass = parts.group(2)[:-1] else: user_pass = '' self.proxies[parts.group(1) + parts.group(3)] = user_pass fin.close()
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.mode = settings.get('PROXY_MODE') self.proxy_list = settings.get('PROXY_LIST') self.chosen_proxy = '' if self.mode == Mode.RANDOMIZE_PROXY_EVERY_REQUESTS or self.mode == Mode.RANDOMIZE_PROXY_ONCE: if self.proxy_list is None: raise KeyError('PROXY_LIST setting is missing') self.proxies = {} fin = open(self.proxy_list) try: for line in fin.readlines(): parts = re.match('(\w+://)(\w+:\w+@)?(.+)', line.strip()) if not parts: continue # Cut trailing @ if parts.group(2): user_pass = parts.group(2)[:-1] else: user_pass = '' self.proxies[parts.group(1) + parts.group(3)] = user_pass finally: fin.close() if self.mode == Mode.RANDOMIZE_PROXY_ONCE: self.chosen_proxy = random.choice(list(self.proxies.keys())) elif self.mode == Mode.SET_CUSTOM_PROXY: custom_proxy = settings.get('CUSTOM_PROXY') self.proxies = {} parts = re.match('(\w+://)([^:]+?:[^@]+?@)?(.+)', custom_proxy.strip()) if not parts: raise ValueError('CUSTOM_PROXY is not well formatted') if parts.group(2): user_pass = parts.group(2)[:-1] else: user_pass = '' self.proxies[parts.group(1) + parts.group(3)] = user_pass self.chosen_proxy = parts.group(1) + parts.group(3)
def __init__(self, setting, crawler): RetryMiddleware.__init__(self, setting) self.rconn = redis.from_url(setting['REDIS_URL'], db=1, decode_response=True) init_cookie(self.rconn, crawler.spider.name)
def __init__(self, settings): RetryMiddleware.__init__(self, settings)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) DownloaderBaseMiddleware.__init__(self, settings)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.rconn = redis.Redis(host='127.0.0.1', port=6379, db=3) #db3里专门放cookies信息 initCookie(self.rconn) #在第一次将所有账号的cookie获取
def __init__(self, settings): RetryMiddleware.__init__(self, settings)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.rconn = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=3)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = redis.Redis(host=crawler.settings.get( 'REDIS_HOST', 'localhost'), port=crawler.settings.get('REDIS_PORT', 6379)) init_cookies()
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) cookies.initCookie(crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = redis.from_url(settings['REDIS_URL'], db=1, decode_responses=True)##decode_responses设置取出的编码为str init_cookie(self.rconn, crawler.spider.name)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.retry_proxy_codes = set(int(x) for x in settings.getlist('RETRY_PROXY_CODES'))
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.rconn = redis.Redis(settings.get('REDIS_HOST', 'localhsot'), settings.get('REDIS_PORT', 6379)) self.cookiemanager=CookiesManager() self.cookiemanager.init_all_cookies(self.rconn)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.cant_retry_formdata_set = set()
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get("RCONN", redis.Redis("localhost", 6379)) initCookie(self.rconn, crawler.spider.name)
def __init__(self, crawler): RetryMiddleware.__init__(self, crawler.settings) RandomUserAgentBase.__init__(self, crawler)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.redis_conn = redis.Redis(host=settings.get('REDIS_HOST'), port=settings.get('REDIS_PORT'))
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get("RCONN", redis.Redis(crawler.settings.get('REDIS_HOST', 'localhsot'), crawler.settings.get('REDIS_PORT', 6379))) initCookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.rconn = settings.get("RCONN", redis.Redis("localhost", 6379)) initCookie(self.rconn, crawler.spider.name)
def __init__(self, settings, crawler): RetryMiddleware.__init__(self, settings) self.cookie = XueQiuCookie()
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.set_logger(self.crawler)
def __init__(self, settings): RetryMiddleware.__init__(self, settings) self.retry_intervals = settings.getint('RETRY_TIME_INTERVAL')