def get_session(name, password, need_verify): url, yundama_obj, cid, session = do_login(name, password, need_verify) # 打码出错处理 while url == 'pinerror' and yundama_obj is not None: yundama_obj.report_error(cid) url, yundama_obj, cid, session = do_login(name, password, need_verify) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下, # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当) check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more' resp = session.get(check_url, headers=headers) # 通过实验,目前发现未经过手机验证的账号是救不回来了... if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) crawler.warning('账号{}已经被冻结'.format(name)) freeze_account(name, 0) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('本次账号{}登陆失败'.format(name)) return None
def wrapper(*args, **kwargs): try: result = func(*args, **kwargs) return result except: logger.error(traceback.format_exc()) raise e(traceback.format_exc(), platform)
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) freeze_account(name, 0) return None other.info('The login account is {}'.format(name)) cookies = session.cookies r = requests.post('http://api.weibo.com/webim/2/direct_messages/new.json?source=209678993', data={'text': 'what are u doing', 'uid':1794652091}, cookies=cookies, headers=personal_message_headers) print(r.text) # Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('login failed for {}'.format(name)) return None
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) freeze_account(name, 0) return None other.info( 'Login successful! The login account is {}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('login failed for {}'.format(name)) return None
def fetch_email_task(): mail_conf_list = DBStore.objects() for mail_conf_ in mail_conf_list: try: email_object_key = str(mail_conf_._id) fetch_result = flask_celery.send_task( "apps.celery_init.start_add_task", queue='start_spider_task', args=(email_object_key, 100)) except Exception as e: logger.error("调用celery执行邮件接收失败,%s.".format(traceback.format_exc()))
def get_session(): name_password = get_weibo_args() session = requests.Session() js_path = os.path.join(os.getcwd(), 'wblogin/sinalogin.js') runntime = get_runntime(js_path) su = get_encodename(name_password['name'], runntime) post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&' \ 'su=' + su + '&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)' pre_obj = get_prelogin_info(prelogin_url, session) sp = get_pass(name_password['password'], pre_obj, runntime) # 提交的数据可以根据抓包获得 data = { 'encoding': 'UTF-8', 'entry': 'weibo', 'from': '', 'gateway': '1', 'nonce': pre_obj['nonce'], 'pagerefer': "", 'prelt': 67, 'pwencode': 'rsa2', "returntype": "META", 'rsakv': pre_obj['rsakv'], 'savestate': '7', 'servertime': pre_obj['servertime'], 'service': 'miniblog', 'sp': sp, 'sr': '1920*1080', 'su': su, 'useticket': '1', 'vsnf': '1', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack' } rs_datas = get_redirect(data, post_url, session) url = rs_datas[0] if url != '': post_cookies = rs_datas[1] rs_cont = session.get(url) cookies = requests.utils.dict_from_cookiejar(rs_cont.cookies) last_cookies = requests.utils.add_dict_to_cookiejar(post_cookies, cookies) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m: if m.group(1): other.info('本次登陆账号为:{name}'.format(name=name_password['name'])) return {'session': session, 'cookie': dict(last_cookies)} else: other.error('本次账号{name}登陆失败'.format(name=name_password['name'])) return None else: other.error('本次账号{name}登陆失败'.format(name=name_password['name'])) else: other.error('本次账号{name}登陆失败'.format(name=name_password['name'])) return None
def jd_seckill_task(skuId): mongdb_conn = DBStore.get_datastores() mydb = mongdb_conn['JD'] ppool = ProxyStore.get_proxyPoolstores() for i in range(100): # todo 用户与地址策略需要调整,现在是用户、地址 做迪尔卡集 jd_users = mydb.Users.find({}).limit(100).skip(100 * i) for jd_user in jd_users: jd_user_dict = {} if jd_user['cookies']: jd_user_dict["_id"] = str(jd_user["_id"]) jd_user_dict["password"] = jd_user["password"] cookies_base = base64.b64decode(jd_user["cookies"]) jd_user_dict["cookies"] = jd_user["cookies"].decode() jd_user_dict["last_refresh"] = jd_user["last_refresh"] jd_user_dict["last_pool"] = jd_user["last_pool"] jd_user_dict["alive"] = jd_user["alive"] jd_user_dict["username"] = jd_user["username"] jd_user_dict["created_time"] = jd_user["created_time"] jd_user_dict["eid"] = jd_user["eid"] jd_user_dict["fp"] = jd_user["fp"] else: continue all_address = mydb.Address.find({}) for address in all_address: try: address_string = json.dumps(address, cls=DateEncoder) fetch_result = flask_celery.send_task( "celery_tasks.jd_seckill.jd_seckill.jd_seckill_task", queue='jd_seckill_task', args=(json.dumps(jd_user_dict), address_string, skuId, ppool.getProxy())) except Exception as e: print(traceback.format_exc()) logger.error("调用celery执行京东秒杀任务,%s.".format( traceback.format_exc()))
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # 访问微博官方账号看是否正常 check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) # 通过实验,目前发现未经过手机验证的账号是救不回来了... if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) freeze_account(name, 0) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('本次账号{}登陆失败'.format(name)) return None
def get_session(name, password): url, yundama_obj, cid, session = do_login(name, password) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m and m.group(1): # check if account is valid check_url = 'http://weibo.com/2671109275/about' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('account {} has been forbidden'.format(name)) freeze_account(name, 0) return None other.info('Login successful! The login account is {}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session other.error('login failed for {}'.format(name)) return None
def get_session(name, password): session = requests.Session() su = get_encodename(name) sever_data = get_server_data(su, session) servertime = sever_data["servertime"] nonce = sever_data['nonce'] rsakv = sever_data["rsakv"] pubkey = sever_data["pubkey"] sp = get_password(password, servertime, nonce, pubkey) # 提交的数据可以根据抓包获得 data = { 'encoding': 'UTF-8', 'entry': 'weibo', 'from': '', 'gateway': '1', 'nonce': nonce, 'pagerefer': "", 'prelt': 67, 'pwencode': 'rsa2', "returntype": "META", 'rsakv': rsakv, 'savestate': '7', 'servertime': servertime, 'service': 'miniblog', 'sp': sp, 'sr': '1920*1080', 'su': su, 'useticket': '1', 'vsnf': '1', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack' } post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' url = get_redirect(data, post_url, session) if url != '': rs_cont = session.get(url, headers=headers) login_info = rs_cont.text u_pattern = r'"uniqueid":"(.*)",' m = re.search(u_pattern, login_info) if m: if m.group(1): # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下, # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当) check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more' resp = session.get(check_url, headers=headers) if is_403(resp.text): other.error('账号{}已被冻结'.format(name)) crawler.warning('账号{}已经被冻结'.format(name)) freeze_account(name) return None other.info('本次登陆账号为:{}'.format(name)) Cookies.store_cookies(name, session.cookies.get_dict()) return session else: other.error('本次账号{}登陆失败'.format(name)) return None else: other.error('本次账号{}登陆失败'.format(name)) return None else: other.error('本次账号{}登陆失败'.format(name)) return None
from get_cookie import get_session from task.repost import get_all from logger.log import other if __name__ == '__main__': is_sleep = 1 while 1: mgr = Manager() d = mgr.dict() pw = Process(target=get_session, args=(d, )) pw.daemon = True pr = Process(target=get_all, args=(d, )) other.info('本轮抓取开始,开始时间为{endtime}'.format(endtime=ctime())) try: pw.start() pr.start() except Exception as e: other.error(e) is_sleep = 0 pr.join() pw.terminate() other.info('本轮抓取已经结束,结束时间为{endtime}'.format(endtime=ctime())) # 使其可以更新状态 pw.join() if is_sleep: sleep(2 * 60 * 60)