def get_session(name, password, need_verify): url, yundama_obj, cid, session = do_login(name, password, need_verify) # 打码出错处理 while url == 'pinerror' and yundama_obj is not None: yundama_obj.report_error(cid) url, yundama_obj, cid, session = do_login(name, password, need_verify) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下, # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当) check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more' resp = session.get(check_url, headers=headers) # 通过实验,目前发现未经过手机验证的账号是救不回来了... if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) crawler.warning('账号{}已经被冻结'.format(name)) freeze_account(name, 0) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('本次账号{}登陆失败'.format(name)) return None
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) freeze_account(name, 0) return None other.info('The login account is {}'.format(name)) cookies = session.cookies r = requests.post('http://api.weibo.com/webim/2/direct_messages/new.json?source=209678993', data={'text': 'what are u doing', 'uid':1794652091}, cookies=cookies, headers=personal_message_headers) print(r.text) # Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('login failed for {}'.format(name)) return None
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) freeze_account(name, 0) return None other.info( 'Login successful! The login account is {}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('login failed for {}'.format(name)) return None
def get_session(): name_password = get_weibo_args() session = requests.Session() js_path = os.path.join(os.getcwd(), 'wblogin/sinalogin.js') runntime = get_runntime(js_path) su = get_encodename(name_password['name'], runntime) post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&' \ 'su=' + su + '&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)' pre_obj = get_prelogin_info(prelogin_url, session) sp = get_pass(name_password['password'], pre_obj, runntime) # 提交的数据可以根据抓包获得 data = { 'encoding': 'UTF-8', 'entry': 'weibo', 'from': '', 'gateway': '1', 'nonce': pre_obj['nonce'], 'pagerefer': "", 'prelt': 67, 'pwencode': 'rsa2', "returntype": "META", 'rsakv': pre_obj['rsakv'], 'savestate': '7', 'servertime': pre_obj['servertime'], 'service': 'miniblog', 'sp': sp, 'sr': '1920*1080', 'su': su, 'useticket': '1', 'vsnf': '1', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack' } rs_datas = get_redirect(data, post_url, session) url = rs_datas[0] if url != '': post_cookies = rs_datas[1] rs_cont = session.get(url) cookies = requests.utils.dict_from_cookiejar(rs_cont.cookies) last_cookies = requests.utils.add_dict_to_cookiejar(post_cookies, cookies) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m: if m.group(1): other.info('本次登陆账号为:{name}'.format(name=name_password['name'])) return {'session': session, 'cookie': dict(last_cookies)} else: other.error('本次账号{name}登陆失败'.format(name=name_password['name'])) return None else: other.error('本次账号{name}登陆失败'.format(name=name_password['name'])) else: other.error('本次账号{name}登陆失败'.format(name=name_password['name'])) return None
def get_session(d): while 1: is_sucess = _get_session(d) if is_sucess: time.sleep(10 * 60 * 60) else: other.info('一分钟后重试模拟登录') time.sleep(60)
def start_scheduler(): scheduler = BackgroundScheduler() scheduler.remove_all_jobs() logger.info("启动定时,启动时间:%s" % (dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'))) scheduler.add_job(fetch_email_task, 'interval', max_instances=10, minutes=10) # scheduler.add_job(fetch_email_task, 'date', run_date='2017-02-13 12:21:00') try: scheduler.start() except (KeyboardInterrupt, SystemExit): scheduler.shutdown()
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # 访问微博官方账号看是否正常 check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) # 通过实验,目前发现未经过手机验证的账号是救不回来了... if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) freeze_account(name, 0) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('本次账号{}登陆失败'.format(name)) return None
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) freeze_account(name, 0) return None other.info('Login successful! The login account is {}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('login failed for {}'.format(name)) return None
def get_session(name, password): session = requests.Session() su = get_encodename(name) sever_data = get_server_data(su, session) servertime = sever_data["servertime"] nonce = sever_data['nonce'] rsakv = sever_data["rsakv"] pubkey = sever_data["pubkey"] sp = get_password(password, servertime, nonce, pubkey) # 提交的数据可以根据抓包获得 data = { 'encoding': 'UTF-8', 'entry': 'weibo', 'from': '', 'gateway': '1', 'nonce': nonce, 'pagerefer': "", 'prelt': 67, 'pwencode': 'rsa2', "returntype": "META", 'rsakv': rsakv, 'savestate': '7', 'servertime': servertime, 'service': 'miniblog', 'sp': sp, 'sr': '1920*1080', 'su': su, 'useticket': '1', 'vsnf': '1', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack' } post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' url = get_redirect(data, post_url, session) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m: if m.group(1): # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下, # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当) check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) crawler.warning('账号{}已经被冻结'.format(name)) freeze_account(name) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session else: other.error('本次账号{}登陆失败'.format(name)) return None else: other.error('本次账号{}登陆失败'.format(name)) return None else: other.error('本次账号{}登陆失败'.format(name)) return None
# 获取转发信息# -*-coding:utf-8 -*- from multiprocessing import Process, Manager from time import sleep, ctime from get_cookie import get_session from task.repost import get_all from logger.log import other if __name__ == '__main__': is_sleep = 1 while 1: mgr = Manager() d = mgr.dict() pw = Process(target=get_session, args=(d, )) pw.daemon = True pr = Process(target=get_all, args=(d, )) other.info('本轮抓取开始,开始时间为{endtime}'.format(endtime=ctime())) try: pw.start() pr.start() except Exception as e: other.error(e) is_sleep = 0 pr.join() pw.terminate() other.info('本轮抓取已经结束,结束时间为{endtime}'.format(endtime=ctime())) # 使其可以更新状态 pw.join()