Ejemplo n.º 1
0
def get_user_detail(user_id, html):
    user = person.get_detail(html)
    if user is not None:
        user.uid = user_id
        user.follows_num = person.get_friends(html)
        user.fans_num = person.get_fans(html)
        user.wb_num = person.get_status(html)
    else:
        set_seed_crawled(user_id, 2)
    return user
Ejemplo n.º 2
0
def get_url_from_web(user_id):
    """
    根据用户id获取用户资料:如果用户的domain为100505,那么会直接返回用户详细资料;如果是103505或者100306,那么需要再进行
    一次请求,因为用base_url的方式它只会定位到用户主页而不是详细资料页;如果是企业和服务号等,通过base_url访问也会跳转到该
    用户的主页,由于该类用户的详细页价值不大,所以不再进行请求它们的详细页
    :param user_id: 用户id
    :return: 用户类实体
    """
    if not user_id:
        return None

    url = base_url.format('100505', user_id)
    html = get_page(url)

    if not is_404(html):
        domain = public.get_userdomain(html)

        if domain == '103505' or domain == '100306':
            url = base_url.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        elif domain == '100505':
            user = get_user_detail(user_id, html)
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        # 保存用户信息到数据库
        save_user(user)
        set_seed_crawled(user_id, 1)
        storage.info('已经成功保存ID为{id}的用户信息'.format(id=user_id))

        return user
    else:
        set_seed_crawled(user_id, 2)
        return None
Ejemplo n.º 3
0
def get_profile(user_id):
    user = get_user_by_uid(user_id)

    if user:
        storage.info('user {id} has already crawled'.format(id=user_id))
        set_seed_crawled(user_id, 1)
    else:
        user = get_url_from_web(user_id)
        if user is not None:
            set_seed_crawled(user_id, 1)
        else:
            set_seed_crawled(user_id, 1)

    return user
Ejemplo n.º 4
0
def get_profile(user_id):
    # 判断数据库是否存在该用户信息
    user = get_user_by_uid(user_id)

    if user:
        storage.info('ID为{id}的用户信息已经存在于数据库中'.format(id=user_id))
        set_seed_crawled(user_id, 1)
    else:
        user = get_url_from_web(user_id)
        if user is not None:
            set_seed_crawled(user_id, 1)
        else:
            set_seed_crawled(user_id, 1)

    return user
Ejemplo n.º 5
0
def get_profile(user_id):
    """
    :param user_id: uid
    :return: user info and is crawled or not
    """
    user = get_user_by_uid(user_id)

    if user:
        storage.info('user {id} has already crawled'.format(id=user_id))
        set_seed_crawled(user_id, 1)
        is_crawled = 1
    else:
        user = get_url_from_web(user_id)
        if user is not None:
            set_seed_crawled(user_id, 1)
        else:
            set_seed_crawled(user_id, 2)
        is_crawled = 0

    return user, is_crawled
Ejemplo n.º 6
0
def get_profile(user_id, domain):
    """
    :param user_id: uid
    :return: user info and is crawled or not
    """
    user = get_user_by_uid(user_id)

    if user:
        storage.info('user {id} has already crawled'.format(id=user_id))
        set_seed_crawled(user_id, 1)
        is_crawled = 1
    else:
        user = get_url_from_web(user_id, domain)
        if user is not None:
            set_seed_crawled(user_id, 1)
        else:
            set_seed_crawled(user_id, 2)
        is_crawled = 0

    return user, is_crawled