Esempio n. 1
0
def get_session(name, password, need_verify):
    url, yundama_obj, cid, session = do_login(name, password, need_verify)
    # 打码出错处理
    while url == 'pinerror' and yundama_obj is not None:
        yundama_obj.report_error(cid)
        url, yundama_obj, cid, session = do_login(name, password, need_verify)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下,
            # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当)
            check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more'
            resp = session.get(check_url, headers=headers)
            # 通过实验,目前发现未经过手机验证的账号是救不回来了...
            if is_403(resp.text):
                other.error('账号{}已被冻结'.format(name))
                crawler.warning('账号{}已经被冻结'.format(name))
                freeze_account(name, 0)
                return None
            other.info('本次登陆账号为:{}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict())
            return session

    other.error('本次账号{}登陆失败'.format(name))
    return None
Esempio n. 2
0
 def wrapper(*args, **kwargs):
     try:
         result = func(*args, **kwargs)
         return result
     except:
         logger.error(traceback.format_exc())
         raise e(traceback.format_exc(), platform)
def get_session(name, password):
    url, yundama_obj, cid, session = do_login(name, password)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # check if account is valid
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers)

            if is_403(resp.text):
                other.error('account {} has been forbidden'.format(name))
                freeze_account(name, 0)
                return None
            other.info('The login account is {}'.format(name))

            cookies = session.cookies
            r = requests.post('http://api.weibo.com/webim/2/direct_messages/new.json?source=209678993',
                              data={'text': 'what are u doing', 'uid':1794652091},
                              cookies=cookies, headers=personal_message_headers)
            print(r.text)
            # Cookies.store_cookies(name, session.cookies.get_dict())
            return session

    other.error('login failed for {}'.format(name))
    return None
def get_session(name, password):
    url, yundama_obj, cid, session = do_login(name, password)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # check if account is valid
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers)

            if is_403(resp.text):
                other.error('account {} has been forbidden'.format(name))
                freeze_account(name, 0)
                return None
            other.info(
                'Login successful! The login account is {}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict())
            return session

    other.error('login failed for {}'.format(name))
    return None
Esempio n. 5
0
def fetch_email_task():

    mail_conf_list = DBStore.objects()
    for mail_conf_ in mail_conf_list:
        try:
            email_object_key = str(mail_conf_._id)
            fetch_result = flask_celery.send_task(
                "apps.celery_init.start_add_task",
                queue='start_spider_task',
                args=(email_object_key, 100))

        except Exception as e:
            logger.error("调用celery执行邮件接收失败,%s.".format(traceback.format_exc()))
Esempio n. 6
0
def get_session():
    name_password = get_weibo_args()
    session = requests.Session()
    js_path = os.path.join(os.getcwd(), 'wblogin/sinalogin.js')
    runntime = get_runntime(js_path)

    su = get_encodename(name_password['name'], runntime)
    post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)'
    prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&' \
                   'su=' + su + '&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)'
    pre_obj = get_prelogin_info(prelogin_url, session)
    sp = get_pass(name_password['password'], pre_obj, runntime)

    # 提交的数据可以根据抓包获得
    data = {
        'encoding': 'UTF-8',
        'entry': 'weibo',
        'from': '',
        'gateway': '1',
        'nonce': pre_obj['nonce'],
        'pagerefer': "",
        'prelt': 67,
        'pwencode': 'rsa2',
        "returntype": "META",
        'rsakv': pre_obj['rsakv'],
        'savestate': '7',
        'servertime': pre_obj['servertime'],
        'service': 'miniblog',
        'sp': sp,
        'sr': '1920*1080',
        'su': su,
        'useticket': '1',
        'vsnf': '1',
        'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack'
    }

    rs_datas = get_redirect(data, post_url, session)

    url = rs_datas[0]
    if url != '':
        post_cookies = rs_datas[1]
        rs_cont = session.get(url)
        cookies = requests.utils.dict_from_cookiejar(rs_cont.cookies)
        last_cookies = requests.utils.add_dict_to_cookiejar(post_cookies, cookies)
        login_info = rs_cont.text
        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m:
            if m.group(1):
                other.info('本次登陆账号为:{name}'.format(name=name_password['name']))
                return {'session': session, 'cookie': dict(last_cookies)}
            else:
                other.error('本次账号{name}登陆失败'.format(name=name_password['name']))
                return None
        else:
            other.error('本次账号{name}登陆失败'.format(name=name_password['name']))
    else:
        other.error('本次账号{name}登陆失败'.format(name=name_password['name']))
        return None
Esempio n. 7
0
def jd_seckill_task(skuId):

    mongdb_conn = DBStore.get_datastores()
    mydb = mongdb_conn['JD']
    ppool = ProxyStore.get_proxyPoolstores()
    for i in range(100):
        # todo 用户与地址策略需要调整,现在是用户、地址 做迪尔卡集
        jd_users = mydb.Users.find({}).limit(100).skip(100 * i)
        for jd_user in jd_users:
            jd_user_dict = {}
            if jd_user['cookies']:
                jd_user_dict["_id"] = str(jd_user["_id"])
                jd_user_dict["password"] = jd_user["password"]
                cookies_base = base64.b64decode(jd_user["cookies"])
                jd_user_dict["cookies"] = jd_user["cookies"].decode()
                jd_user_dict["last_refresh"] = jd_user["last_refresh"]
                jd_user_dict["last_pool"] = jd_user["last_pool"]
                jd_user_dict["alive"] = jd_user["alive"]
                jd_user_dict["username"] = jd_user["username"]
                jd_user_dict["created_time"] = jd_user["created_time"]
                jd_user_dict["eid"] = jd_user["eid"]
                jd_user_dict["fp"] = jd_user["fp"]
            else:
                continue

            all_address = mydb.Address.find({})
            for address in all_address:
                try:
                    address_string = json.dumps(address, cls=DateEncoder)
                    fetch_result = flask_celery.send_task(
                        "celery_tasks.jd_seckill.jd_seckill.jd_seckill_task",
                        queue='jd_seckill_task',
                        args=(json.dumps(jd_user_dict), address_string, skuId,
                              ppool.getProxy()))

                except Exception as e:
                    print(traceback.format_exc())
                    logger.error("调用celery执行京东秒杀任务,%s.".format(
                        traceback.format_exc()))
Esempio n. 8
0
def get_session(name, password):
    url, yundama_obj, cid, session = do_login(name, password)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # 访问微博官方账号看是否正常
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers)
            # 通过实验,目前发现未经过手机验证的账号是救不回来了...
            if is_403(resp.text):
                other.error('账号{}已被冻结'.format(name))
                freeze_account(name, 0)
                return None
            other.info('本次登陆账号为:{}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict())
            return session

    other.error('本次账号{}登陆失败'.format(name))
    return None
Esempio n. 9
0
def get_session(name, password):
    url, yundama_obj, cid, session = do_login(name, password)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # check if account is valid
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers)

            if is_403(resp.text):
                other.error('account {} has been forbidden'.format(name))
                freeze_account(name, 0)
                return None
            other.info('Login successful! The login account is {}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict())
            return session
         
    other.error('login failed for {}'.format(name))
    return None
Esempio n. 10
0
def get_session(name, password):
    session = requests.Session()
    su = get_encodename(name)

    sever_data = get_server_data(su, session)
    servertime = sever_data["servertime"]
    nonce = sever_data['nonce']
    rsakv = sever_data["rsakv"]
    pubkey = sever_data["pubkey"]

    sp = get_password(password, servertime, nonce, pubkey)

    # 提交的数据可以根据抓包获得
    data = {
        'encoding': 'UTF-8',
        'entry': 'weibo',
        'from': '',
        'gateway': '1',
        'nonce': nonce,
        'pagerefer': "",
        'prelt': 67,
        'pwencode': 'rsa2',
        "returntype": "META",
        'rsakv': rsakv,
        'savestate': '7',
        'servertime': servertime,
        'service': 'miniblog',
        'sp': sp,
        'sr': '1920*1080',
        'su': su,
        'useticket': '1',
        'vsnf': '1',
        'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack'
    }
    post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)'
    url = get_redirect(data, post_url, session)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m:
            if m.group(1):
                # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下,
                # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当)
                check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more'
                resp = session.get(check_url, headers=headers)

                if is_403(resp.text):
                    other.error('账号{}已被冻结'.format(name))
                    crawler.warning('账号{}已经被冻结'.format(name))
                    freeze_account(name)
                    return None
                other.info('本次登陆账号为:{}'.format(name))
                Cookies.store_cookies(name, session.cookies.get_dict())
                return session
            else:
                other.error('本次账号{}登陆失败'.format(name))
                return None
        else:
            other.error('本次账号{}登陆失败'.format(name))
            return None
    else:
        other.error('本次账号{}登陆失败'.format(name))
        return None
Esempio n. 11
0
from get_cookie import get_session
from task.repost import get_all
from logger.log import other

if __name__ == '__main__':
    is_sleep = 1
    while 1:
        mgr = Manager()
        d = mgr.dict()
        pw = Process(target=get_session, args=(d, ))
        pw.daemon = True
        pr = Process(target=get_all, args=(d, ))
        other.info('本轮抓取开始,开始时间为{endtime}'.format(endtime=ctime()))

        try:
            pw.start()
            pr.start()
        except Exception as e:
            other.error(e)
            is_sleep = 0

        pr.join()

        pw.terminate()
        other.info('本轮抓取已经结束,结束时间为{endtime}'.format(endtime=ctime()))
        # 使其可以更新状态
        pw.join()

        if is_sleep:
            sleep(2 * 60 * 60)