コード例 #1
0
def get_url_from_web(user_id):
    """
    Get user info according to user id.
    If user domain is 100505,the url is just 100505+userid;
    If user domain is 103505 or 100306, we need to request once more to get his info
    If user type is enterprise or service, we just crawl their home page info
    :param: user id
    :return: user entity
    """
    if not user_id:
        return None

    url = BASE_URL.format('100505', user_id)
    html = get_page(url, auth_level=1)

    if not is_404(html):
        domain = public.get_userdomain(html)

        # writers(special users)
        if domain == '103505' or domain == '100306':
            url = BASE_URL.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        # normal users
        elif domain == '100505':
            user = get_user_detail(user_id, html)
            samefollow_uid = get_samefollow_uid()
            if samefollow_uid.strip() != '':
                samefollow_uid = samefollow_uid.split(',')
                url = SAMEFOLLOW_URL.format(user_id)
                isFanHtml = get_page(url, auth_level=2)
                person.get_isFan(isFanHtml, samefollow_uid, user_id)
        # enterprise or service
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        if user.name:
            UserOper.add_one(user)
            storage.info(
                'Has stored user {id} info successfully'.format(id=user_id))
            return user
        else:
            return None

    else:
        return None
コード例 #2
0
ファイル: user.py プロジェクト: ResolveWang/WeiboSpider
def get_url_from_web(user_id):
    """
    Get user info according to user id.
    If user domain is 100505,the url is just 100505+userid;
    If user domain is 103505 or 100306, we need to request once more to get his info
    If user type is enterprise or service, we just crawl their home page info
    :param: user id
    :return: user entity
    """
    if not user_id:
        return None

    url = BASE_URL.format('100505', user_id)
    html = get_page(url, auth_level=1)

    if not is_404(html):
        domain = public.get_userdomain(html)

        # writers(special users)
        if domain == '103505' or domain == '100306':
            url = BASE_URL.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        # normal users
        elif domain == '100505':
            user = get_user_detail(user_id, html)
            samefollow_uid = get_samefollow_uid()
            if samefollow_uid.strip() != '':
                samefollow_uid = samefollow_uid.split(',')
                url = SAMEFOLLOW_URL.format(user_id)
                isFanHtml = get_page(url, auth_level=2)
                person.get_isFan(isFanHtml, samefollow_uid, user_id)
        # enterprise or service
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        if user.name:
            UserOper.add_one(user)
            storage.info('Has stored user {id} info successfully'.format(id=user_id))
            return user
        else:
            return None

    else:
        return None