def get_session(name, password): proxy = getip.getIP("") url, yundama_obj, cid, session = do_login(name, password, proxy) if url != '': rs_cont = session.get(url, headers=headers, proxies=proxy) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers, proxies=proxy) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) LoginInfoOper.freeze_account(name, 0) return None other.info( 'Login successful! The login account is {}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict(), proxy['http']) return session other.error('login failed for {}'.format(name)) return None
def get_session(name, password, need_verify): url, yundama_obj, cid, session = do_login(name, password, need_verify) # 打码出错处理 while url == 'pinerror' and yundama_obj is not None: yundama_obj.report_error(cid) url, yundama_obj, cid, session = do_login(name, password, need_verify) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下, # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当) check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more' resp = session.get(check_url, headers=headers) # 通过实验,目前发现未经过手机验证的账号是救不回来了... if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) crawler.warning('账号{}已经被冻结'.format(name)) freeze_account(name, 0) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('本次账号{}登陆失败'.format(name)) return None
def get_session(name, password): proxy = getip.getIP("") url, yundama_obj, cid, session = do_login(name, password, proxy) if url != '': rs_cont = session.get(url, headers=headers, proxies=proxy) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers, proxies=proxy) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) LoginInfoOper.freeze_account(name, 0) return None other.info('Login successful! The login account is {}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict(), proxy['http']) return session other.error('login failed for {}'.format(name)) return None
def login(self): user_name = input('请输入登陆名: ') password = input('请输入密码: ') #判断登录方式 if re.match(r'\d{4}-\d{7}|1[34578]\d{9}', user_name): print('当前登录方式:手机登陆') post_url = 'https://www.zhihu.com/login/phone_num' post_data = { 'phone_num': user_name, 'password': password, '_xsrf': self.getXsrf() } else: print('当前登录方式:邮箱登陆') post_url = 'https://www.zhihu.com/login/email' post_data = { 'email': user_name, 'password': password, '_xsrf': self.getXsrf() } try: login_page = self.session.post(post_url, headers=headers, data=post_data) print(login_page.status) print('直接登陆成功!') except: print('直接登陆失败!') post_data['captcha'] = self.getCaptcha() login_page = self.session.post(post_url, headers=headers, data=post_data) Cookies.store_cookies(user_name, self.session.cookies.get_dict()) login_code = eval(login_page.text) print(login_code['msg'])
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # 访问微博官方账号看是否正常 check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) # 通过实验,目前发现未经过手机验证的账号是救不回来了... if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) freeze_account(name, 0) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('本次账号{}登陆失败'.format(name)) return None
def send_personal_message(target_uid, adver_message, user_verify=True, need_login=True): """ :param url: url to be crawled :param user_verify: if it's ajax url, the value is False, else True :param need_login: if the url is need to login, the value is True, else False :return: return '' if exception happens or status_code != 200 """ crawler.info('the send_personal_message uid is {uid}'.format(uid=str(target_uid))) count = 0 while count < max_retries: if need_login: name_cookies = Cookies.fetch_cookies() print(name_cookies) # check adver_timers if int(name_cookies[3]) >= int(adver_timers): continue if name_cookies is None: crawler.warning('no cookies in cookies pool, please find out the reason') send_email() os.kill(os.getppid(), signal.SIGTERM) try: if need_login: resp = requests.post('http://api.weibo.com/webim/2/direct_messages/new.json?source='+str(name_cookies[2]), data={'text': adver_message, 'uid':str(target_uid)}, cookies=name_cookies[1], headers=personal_message_headers) if "error" in resp.text: crawler.warning('account {} has been banned, resp.text is: {}'.format(name_cookies[0], resp.text)) freeze_account(name_cookies[0], 0) Cookies.delete_cookies(name_cookies[0]) continue else: # update adver_times Cookies.store_cookies(name_cookies[0], name_cookies[1], name_cookies[2], 1) return None # if "$CONFIG['islogin'] = '******'" in resp.text: # crawler.warning('account {} has been banned'.format(name_cookies[0])) # freeze_account(name_cookies[0], 0) # Cookies.delete_cookies(name_cookies[0]) # continue # # else: # # resp = requests.get(url, headers=headers, timeout=time_out, verify=False) # # page = resp.text # # if page: # page = page.encode('utf-8', 'ignore').decode('utf-8') # else: # continue # # # slow down to aviod being banned # time.sleep(interal) # # if user_verify: # if is_banned(resp.url) or is_403(page): # crawler.warning('account {} has been banned'.format(name_cookies[0])) # freeze_account(name_cookies[0], 0) # Cookies.delete_cookies(name_cookies[0]) # count += 1 # continue # # if 'verifybmobile' in resp.url: # crawler.warning('account {} has been locked,you should use your phone to unlock it'. # format(name_cookies[0])) # # freeze_account(name_cookies[0], -1) # Cookies.delete_cookies(name_cookies[0]) # continue # # if not is_complete(page): # count += 1 # continue # # if is_404(page): # crawler.warning('send_personal_message{uid} seems to be 404'.format(uid=str(target_uid))) # return '' except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError, AttributeError) as e: crawler.warning('excepitons happens when send_personal_message {},specific infos are {}'.format(target_uid, e)) count += 1 time.sleep(excp_interal) else: # Urls.store_crawl_url(url, 1) # return page return None crawler.warning('max tries for {},check the target_uid in redis db2'.format(target_uid)) # Urls.store_crawl_url(url, 0) return ''
def get_session(name, password): session = requests.Session() su = get_encodename(name) sever_data = get_server_data(su, session) servertime = sever_data["servertime"] nonce = sever_data['nonce'] rsakv = sever_data["rsakv"] pubkey = sever_data["pubkey"] sp = get_password(password, servertime, nonce, pubkey) # 提交的数据可以根据抓包获得 data = { 'encoding': 'UTF-8', 'entry': 'weibo', 'from': '', 'gateway': '1', 'nonce': nonce, 'pagerefer': "", 'prelt': 67, 'pwencode': 'rsa2', "returntype": "META", 'rsakv': rsakv, 'savestate': '7', 'servertime': servertime, 'service': 'miniblog', 'sp': sp, 'sr': '1920*1080', 'su': su, 'useticket': '1', 'vsnf': '1', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack' } post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' url = get_redirect(data, post_url, session) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m: if m.group(1): # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下, # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当) check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) crawler.warning('账号{}已经被冻结'.format(name)) freeze_account(name) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session else: other.error('本次账号{}登陆失败'.format(name)) return None else: other.error('本次账号{}登陆失败'.format(name)) return None else: other.error('本次账号{}登陆失败'.format(name)) return None
def test_store_and_fetch_cookies(self): assert Cookies.fetch_cookies() is None Cookies.store_cookies(FAKE_STR, FAKE_STR) assert Cookies.fetch_cookies() is not None