Exemple #1
0
def get_session(name, password):
    proxy = getip.getIP("")

    url, yundama_obj, cid, session = do_login(name, password, proxy)

    if url != '':
        rs_cont = session.get(url, headers=headers, proxies=proxy)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # check if account is valid
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers, proxies=proxy)

            if is_403(resp.text):
                other.error('account {} has been forbidden'.format(name))
                LoginInfoOper.freeze_account(name, 0)
                return None
            other.info(
                'Login successful! The login account is {}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict(),
                                  proxy['http'])
            return session

    other.error('login failed for {}'.format(name))
    return None
Exemple #2
0
def get_session(name, password, need_verify):
    url, yundama_obj, cid, session = do_login(name, password, need_verify)
    # 打码出错处理
    while url == 'pinerror' and yundama_obj is not None:
        yundama_obj.report_error(cid)
        url, yundama_obj, cid, session = do_login(name, password, need_verify)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下,
            # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当)
            check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more'
            resp = session.get(check_url, headers=headers)
            # 通过实验,目前发现未经过手机验证的账号是救不回来了...
            if is_403(resp.text):
                other.error('账号{}已被冻结'.format(name))
                crawler.warning('账号{}已经被冻结'.format(name))
                freeze_account(name, 0)
                return None
            other.info('本次登陆账号为:{}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict())
            return session

    other.error('本次账号{}登陆失败'.format(name))
    return None
Exemple #3
0
def get_session(name, password):
    proxy = getip.getIP("")

    url, yundama_obj, cid, session = do_login(name, password, proxy)

    if url != '':
        rs_cont = session.get(url, headers=headers, proxies=proxy)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # check if account is valid
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers, proxies=proxy)

            if is_403(resp.text):
                other.error('account {} has been forbidden'.format(name))
                LoginInfoOper.freeze_account(name, 0)
                return None
            other.info('Login successful! The login account is {}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict(), proxy['http'])
            return session
        
    other.error('login failed for {}'.format(name))
    return None
Exemple #4
0
    def login(self):
        user_name = input('请输入登陆名: ')
        password = input('请输入密码: ')

        #判断登录方式
        if re.match(r'\d{4}-\d{7}|1[34578]\d{9}', user_name):
            print('当前登录方式:手机登陆')
            post_url = 'https://www.zhihu.com/login/phone_num'
            post_data = {
                'phone_num': user_name,
                'password': password,
                '_xsrf': self.getXsrf()
            }
        else:
            print('当前登录方式:邮箱登陆')
            post_url = 'https://www.zhihu.com/login/email'
            post_data = {
                'email': user_name,
                'password': password,
                '_xsrf': self.getXsrf()
            }
        try:
            login_page = self.session.post(post_url,
                                           headers=headers,
                                           data=post_data)
            print(login_page.status)
            print('直接登陆成功!')
        except:
            print('直接登陆失败!')
            post_data['captcha'] = self.getCaptcha()
            login_page = self.session.post(post_url,
                                           headers=headers,
                                           data=post_data)
            Cookies.store_cookies(user_name, self.session.cookies.get_dict())
            login_code = eval(login_page.text)
            print(login_code['msg'])
Exemple #5
0
def get_session(name, password):
    url, yundama_obj, cid, session = do_login(name, password)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m and m.group(1):
            # 访问微博官方账号看是否正常
            check_url = 'http://weibo.com/2671109275/about'
            resp = session.get(check_url, headers=headers)
            # 通过实验,目前发现未经过手机验证的账号是救不回来了...
            if is_403(resp.text):
                other.error('账号{}已被冻结'.format(name))
                freeze_account(name, 0)
                return None
            other.info('本次登陆账号为:{}'.format(name))
            Cookies.store_cookies(name, session.cookies.get_dict())
            return session

    other.error('本次账号{}登陆失败'.format(name))
    return None
Exemple #6
0
def send_personal_message(target_uid, adver_message, user_verify=True, need_login=True):
    """
    :param url: url to be crawled
    :param user_verify: if it's ajax url, the value is False, else True
    :param need_login: if the url is need to login, the value is True, else False
    :return: return '' if exception happens or status_code != 200
    """
    crawler.info('the send_personal_message uid is {uid}'.format(uid=str(target_uid)))
    count = 0

    while count < max_retries:
        if need_login:
            name_cookies = Cookies.fetch_cookies()
            print(name_cookies)
            # check adver_timers
            if int(name_cookies[3]) >= int(adver_timers):
                continue

            if name_cookies is None:
                crawler.warning('no cookies in cookies pool, please find out the reason')
                send_email()
                os.kill(os.getppid(), signal.SIGTERM)
        try:
            if need_login:
                resp = requests.post('http://api.weibo.com/webim/2/direct_messages/new.json?source='+str(name_cookies[2]),
                              data={'text': adver_message, 'uid':str(target_uid)},
                              cookies=name_cookies[1], headers=personal_message_headers)

                if "error" in resp.text:
                    crawler.warning('account {} has been banned, resp.text is: {}'.format(name_cookies[0], resp.text))
                    freeze_account(name_cookies[0], 0)
                    Cookies.delete_cookies(name_cookies[0])
                    continue
                else:
                    # update adver_times
                    Cookies.store_cookies(name_cookies[0], name_cookies[1], name_cookies[2], 1)
                    return None

            #     if "$CONFIG['islogin'] = '******'" in resp.text:
            #         crawler.warning('account {} has been banned'.format(name_cookies[0]))
            #         freeze_account(name_cookies[0], 0)
            #         Cookies.delete_cookies(name_cookies[0])
            #         continue
            # # else:
            # #     resp = requests.get(url, headers=headers, timeout=time_out, verify=False)
            #
            # page = resp.text
            #
            # if page:
            #     page = page.encode('utf-8', 'ignore').decode('utf-8')
            # else:
            #     continue
            #
            # # slow down to aviod being banned
            # time.sleep(interal)
            #
            # if user_verify:
            #     if is_banned(resp.url) or is_403(page):
            #         crawler.warning('account {} has been banned'.format(name_cookies[0]))
            #         freeze_account(name_cookies[0], 0)
            #         Cookies.delete_cookies(name_cookies[0])
            #         count += 1
            #         continue
            #
            #     if 'verifybmobile' in resp.url:
            #         crawler.warning('account {} has been locked,you should use your phone to unlock it'.
            #                         format(name_cookies[0]))
            #
            #         freeze_account(name_cookies[0], -1)
            #         Cookies.delete_cookies(name_cookies[0])
            #         continue
            #
            #     if not is_complete(page):
            #         count += 1
            #         continue
            #
            #     if is_404(page):
            #         crawler.warning('send_personal_message{uid} seems to be 404'.format(uid=str(target_uid)))
            #         return ''

        except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError, AttributeError) as e:
            crawler.warning('excepitons happens when send_personal_message {},specific infos are {}'.format(target_uid, e))
            count += 1
            time.sleep(excp_interal)

        else:
            # Urls.store_crawl_url(url, 1)
            # return page
            return None

    crawler.warning('max tries for {},check the target_uid in redis db2'.format(target_uid))
    # Urls.store_crawl_url(url, 0)
    return ''
Exemple #7
0
def get_session(name, password):
    session = requests.Session()
    su = get_encodename(name)

    sever_data = get_server_data(su, session)
    servertime = sever_data["servertime"]
    nonce = sever_data['nonce']
    rsakv = sever_data["rsakv"]
    pubkey = sever_data["pubkey"]

    sp = get_password(password, servertime, nonce, pubkey)

    # 提交的数据可以根据抓包获得
    data = {
        'encoding': 'UTF-8',
        'entry': 'weibo',
        'from': '',
        'gateway': '1',
        'nonce': nonce,
        'pagerefer': "",
        'prelt': 67,
        'pwencode': 'rsa2',
        "returntype": "META",
        'rsakv': rsakv,
        'savestate': '7',
        'servertime': servertime,
        'service': 'miniblog',
        'sp': sp,
        'sr': '1920*1080',
        'su': su,
        'useticket': '1',
        'vsnf': '1',
        'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack'
    }
    post_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)'
    url = get_redirect(data, post_url, session)

    if url != '':
        rs_cont = session.get(url, headers=headers)
        login_info = rs_cont.text

        u_pattern = r'"uniqueid":"(.*)",'
        m = re.search(u_pattern, login_info)
        if m:
            if m.group(1):
                # 任意验证一个页面看能否访问,使用这个方法验证比较依赖外部条件,但是没找到更好的方式(有的情况下,
                # 账号存在问题,但是可以访问自己的主页,所以通过自己的主页验证账号是否正常不恰当)
                check_url = 'http://weibo.com/p/1005051764222885/info?mod=pedit_more'
                resp = session.get(check_url, headers=headers)

                if is_403(resp.text):
                    other.error('账号{}已被冻结'.format(name))
                    crawler.warning('账号{}已经被冻结'.format(name))
                    freeze_account(name)
                    return None
                other.info('本次登陆账号为:{}'.format(name))
                Cookies.store_cookies(name, session.cookies.get_dict())
                return session
            else:
                other.error('本次账号{}登陆失败'.format(name))
                return None
        else:
            other.error('本次账号{}登陆失败'.format(name))
            return None
    else:
        other.error('本次账号{}登陆失败'.format(name))
        return None
Exemple #8
0
 def test_store_and_fetch_cookies(self):
     assert Cookies.fetch_cookies() is None
     Cookies.store_cookies(FAKE_STR, FAKE_STR)
     assert Cookies.fetch_cookies() is not None
 def test_store_and_fetch_cookies(self):
     assert Cookies.fetch_cookies() is None
     Cookies.store_cookies(FAKE_STR, FAKE_STR)
     assert Cookies.fetch_cookies() is not None