예제 #1
0
def get_fans_or_followers_ids(user_id, crawl_type, verify_type):
    """
    Get followers or fans
    :param user_id: user id
    :param crawl_type: 1 stands for fans, 2 stands for follows
    :param verify_type: 1 stands for 100505(normal users), 2 stands for 100606(special users,such as writers)
    :return: lists of fans or followers
    """

    # todo deal with conditions that fans and followers more than 5 pages
    if crawl_type == 1 and verify_type == 1:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    elif crawl_type == 2 and verify_type == 1:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60'
    elif crawl_type == 1 and verify_type == 2:
        fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?relate=fans&page={}#Pl_Official_HisRelation__47'
    elif crawl_type == 2 and verify_type == 2:
        fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?page={}#Pl_Official_HisRelation__47'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = fans_or_follows_url.format(user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1
        # get ids and store relations
        user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type))

        cur_page += 1

    return user_ids
예제 #2
0
def get_fans_or_followers_ids(user_id, crawl_type):
    """
    获取用户的粉丝和关注用户
    :param user_id: 用户id
    :param crawl_type: 1表示获取粉丝,2表示获取关注
    :return: 获取的关注或者粉丝列表
    """

    # todo 验证作家等用户的粉丝和关注是否满足;处理粉丝或者关注5页的情况
    if crawl_type == 1:
        ff_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    else:
        ff_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = ff_url.format(user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            user_ids.extend(public.get_fans_or_follows(page))
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1

        cur_page += 1

    return user_ids
예제 #3
0
파일: user.py 프로젝트: dittoyi/weibospider
def get_fans_or_followers_ids(user_id, crawl_type):
    """
    Get followers or fans
    :param user_id: user id
    :param crawl_type: 1 stands for fans,2 stands for follows
    :return: lists of fans or followers
    """

    # todo check fans and followers the special users,such as writers
    # todo deal with conditions that fans and followers more than 5 pages
    if crawl_type == 1:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    else:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = fans_or_follows_url.format(user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1
        # get ids and store relations
        user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type))

        cur_page += 1

    return user_ids
예제 #4
0
def get_fans_or_followers_ids(user_id, domain, crawl_type):
    """
    Get followers or fans
    :param user_id: user id
    :param crawl_type: 1 stands for fans,2 stands for follows
    :return: lists of fans or followers
    """

    # todo check fans and followers the special users,such as writers
    # todo deal with conditions that fans and followers more than 5 pages
    if crawl_type == 1:
        fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    else:
        fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?page={}#Pl_Official_HisRelation__60'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = fans_or_follows_url.format(domain, user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1
        # get ids and store relations
        user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type))

        cur_page += 1

    return user_ids