def removeCookie(accountText, rconn, spider): """ 删除某个账号的Cookie """ prefix = BaseHelper.get_cookie_key_prefix(spider) rconn.delete("%s:%s" % (prefix, accountText)) cookieNum = len(rconn.keys("{}:*".format(prefix))) spider.logger.warning("The num of the cookies left is %s" % cookieNum) if cookieNum == 0: spider.logger.error('removeCookie, cookie is used up. Stopping...')
def getCookie_mapi(account, password, spider): session = requests.Session() proxies = FREE_PROXIES p = random.choice(proxies) proxies = {'http': "http://%s" % p['ip_port']} headers = requests.utils.default_headers() headers.update(BaseHelper.get_login_headers()) # 访问 初始页面带上 cookie login_url = "https://passport.weibo.cn/sso/login" postdata = { 'entry': 'mweibo', 'savestate': '1', 'username': account, 'password': password, 'r': 'http://m.weibo.cn/', 'ec': '0', 'pagerefer': "https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F", 'wentry': '', 'loginfrom': '', 'client_id': '', 'code': '', 'qq': '', 'mainpageflag': '1', 'hff': '', 'hfp': '', } # if showpin == 0: login_page = session.post(login_url, data=postdata, headers=headers) # else: # pcid = sever_data["pcid"] # get_cha(pcid, session, headers) # postdata['door'] = raw_input(u"请输入验证码") # login_page = session.post(login_url, data=postdata, headers=headers) jsonStr = login_page.content.decode("GBK") info = json.loads(jsonStr) if info["retcode"] == 20000000: spider.logger.warning("Get Cookie Success!( Account:%s )" % account) cookie = session.cookies.get_dict() return json.dumps(cookie) elif info["retcode"] == "4049": # pcid = sever_data["pcid"] # get_cha(pcid, session, headers) # postdata['door'] = raw_input(u"请输入验证码") # login_page = session.post(login_url, data=postdata, headers=headers) # jsonStr = login_page.content.decode("GBK") return "" else: spider.logger.warning("Failed!( Reason:%s )" % info["reason"]) return ""
def updateCookie(accountText, rconn, spider): """ 更新一个账号的Cookie """ prefix = BaseHelper.get_cookie_key_prefix(spider) account = accountText.split("--")[0] password = accountText.split("--")[1] cookie = getCookie(account, password) if len(cookie) > 0: spider.logger.warning( "The cookie of %s has been updated successfully!" % account) rconn.set("%s:%s" % (prefix, accountText), cookie) else: spider.logger.error("The cookie of %s updated failed! Remove it!" % accountText) removeCookie(accountText, rconn, spider)
def initCookie(rconn, spider): """ 获取所有账号的Cookies,存入Redis。如果Redis已有该账号的Cookie,则不再获取。 """ prefix = BaseHelper.get_cookie_key_prefix(spider) myWeiBo = [(line.split('----')[0], line.split('----')[1]) for line in filter(lambda l: l, weiBo_str.split('\n'))] throttle = 5 for weibo in myWeiBo: if rconn.get("%s:%s--%s" % (prefix, weibo[0], weibo[1])) is None: # 'weibo:Cookies:账号--密码',为None即不存在。 cookie = getCookie(weibo[0], weibo[1], spider) if len(cookie) > 0: rconn.set("%s:%s--%s" % (prefix, weibo[0], weibo[1]), cookie) throttle -= 1 if throttle < 0: break cookieNum = len(rconn.keys("{}:*".format(prefix))) spider.logger.warning("The num of the cookies is %s" % cookieNum) if cookieNum == 0: spider.logger.error('initCookie: Stopping...')
def getCookie_old(account, password, spider): """ 获取一个账号的Cookie """ loginURL = "https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)" username = base64.b64encode(account.encode("utf-8")).decode("utf-8") postData = { "entry": "sso", "gateway": "1", "from": "null", "savestate": "30", "useticket": "0", "pagerefer": "", "vsnf": "1", "su": username, "service": "sso", "sp": password, "sr": "1440*900", "encoding": "UTF-8", "cdult": "3", "domain": "sina.com.cn", "prelt": "0", "returntype": "TEXT", } session = requests.Session() proxies = FREE_PROXIES p = random.choice(proxies) # proxies = {'http': "http://%s" % p['ip_port']} headers = requests.utils.default_headers() headers.update(BaseHelper.get_headers()) # headers = BaseHelper.get_login_headers() # r = session.post(loginURL, data=postData, proxies=proxies, headers=headers) r = session.post(loginURL, data=postData, headers=headers) jsonStr = r.content.decode("gbk") info = json.loads(jsonStr) if info["retcode"] == "0": spider.logger.warning("Get Cookie Success!( Account:%s )" % account) cookie = session.cookies.get_dict() return json.dumps(cookie) else: spider.logger.warning("Failed!( Reason:%s )" % info["reason"]) return ""
def process_request(self, request, spider): prefix = BaseHelper.get_cookie_key_prefix(spider) redisKeys = self.rconn.keys("{}:*".format(prefix)) if not redisKeys: return used_keys = self.rconn.smembers("cycle:{}".format(prefix)) rand_keys = list(set(redisKeys) - used_keys) if not rand_keys: rand_keys = redisKeys self.rconn.delete("cycle:{}".format(prefix)) elem = random.choice(rand_keys) cookie = json.loads(self.rconn.get(elem)) if request.cookies: extra = request.cookies cookie.update(extra) request.cookies = cookie request.meta["accountText"] = elem.split(prefix)[-1] self.rconn.sadd("cycle:{}".format(prefix), elem)
def getCookie_api(account, password, spider): session = requests.Session() proxies = FREE_PROXIES p = random.choice(proxies) proxies = {'http': "http://%s" % p['ip_port']} headers = requests.utils.default_headers() headers.update(BaseHelper.get_login_headers()) # 访问 初始页面带上 cookie index_url = "http://weibo.com/login.php" try: session.get(index_url, headers=headers, timeout=2) except: session.get(index_url, headers=headers) # try: # input = raw_input # except: # pass # su 是加密后的用户名 su = get_su(account) sever_data = get_server_data(su, session, headers) servertime = sever_data["servertime"] nonce = sever_data['nonce'] rsakv = sever_data["rsakv"] pubkey = sever_data["pubkey"] showpin = sever_data["showpin"] password_secret = get_password(password, servertime, nonce, pubkey) postdata = { 'entry': 'weibo', 'gateway': '1', 'from': '', 'savestate': '7', 'useticket': '1', 'pagerefer': "http://login.sina.com.cn/sso/logout.php?entry=miniblog&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl", 'vsnf': '1', 'su': su, 'service': 'miniblog', 'servertime': servertime, 'nonce': nonce, 'pwencode': 'rsa2', 'rsakv': rsakv, 'sp': password_secret, 'sr': '1366*768', 'encoding': 'UTF-8', 'prelt': '115', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype': 'TEXT' } login_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' if showpin == 0: login_page = session.post(login_url, data=postdata, headers=headers) else: pcid = sever_data["pcid"] get_cha(pcid, session, headers) postdata['door'] = raw_input(u"请输入验证码") login_page = session.post(login_url, data=postdata, headers=headers) jsonStr = login_page.content.decode("GBK") info = json.loads(jsonStr) if info["retcode"] == "0": spider.logger.warning("Get Cookie Success!( Account:%s )" % account) cookie = session.cookies.get_dict() return json.dumps(cookie) elif info["retcode"] == "4049": pcid = sever_data["pcid"] get_cha(pcid, session, headers) postdata['door'] = raw_input(u"请输入验证码") login_page = session.post(login_url, data=postdata, headers=headers) jsonStr = login_page.content.decode("GBK") return "" else: spider.logger.warning("Failed!( Reason:%s )" % info["reason"]) return ""