Ejemplo n.º 1
0
 def test_get_fans(self):
     from page_parse.user import public
     with open('./tests/fans.html') as f:
         cont = f.read()
     public.get_fans_or_follows(cont)
     ids, cur_urls = public.get_fans_or_follows(cont)
     self.assertEqual(len(ids), 9)
     self.assertEqual(len(cur_urls), 5)
Ejemplo n.º 2
0
 def test_get_fans(self):
     """
     测试用户粉丝获取功能
     """
     from page_parse.user import public
     with open('./tests/fans.html', encoding='utf-8') as f:
         cont = f.read()
     public.get_fans_or_follows(cont)
     ids = public.get_fans_or_follows(cont)
     self.assertEqual(len(ids), 9)
Ejemplo n.º 3
0
def get_fans_or_followers_ids(user_id, crawl_type, verify_type):
    """
    Get followers or fans
    :param user_id: user id
    :param crawl_type: 1 stands for fans, 2 stands for follows
    :param verify_type: 1 stands for 100505(normal users), 2 stands for 100606(special users,such as writers)
    :return: lists of fans or followers
    """

    # todo deal with conditions that fans and followers more than 5 pages
    if crawl_type == 1 and verify_type == 1:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    elif crawl_type == 2 and verify_type == 1:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60'
    elif crawl_type == 1 and verify_type == 2:
        fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?relate=fans&page={}#Pl_Official_HisRelation__47'
    elif crawl_type == 2 and verify_type == 2:
        fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?page={}#Pl_Official_HisRelation__47'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = fans_or_follows_url.format(user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1
        # get ids and store relations
        user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type))

        cur_page += 1

    return user_ids
Ejemplo n.º 4
0
def get_fans_or_followers_ids(user_id, crawl_type):
    """
    Get followers or fans
    :param user_id: user id
    :param crawl_type: 1 stands for fans,2 stands for follows
    :return: lists of fans or followers
    """

    # todo check fans and followers the special users,such as writers
    # todo deal with conditions that fans and followers more than 5 pages
    if crawl_type == 1:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    else:
        fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = fans_or_follows_url.format(user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1
        # get ids and store relations
        user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type))

        cur_page += 1

    return user_ids
Ejemplo n.º 5
0
def get_fans_or_followers_ids(user_id, crawl_type):
    """
    获取用户的粉丝和关注用户
    :param user_id: 用户id
    :param crawl_type: 1表示获取粉丝,2表示获取关注
    :return: 获取的关注或者粉丝列表
    """

    # todo 验证作家等用户的粉丝和关注是否满足;处理粉丝或者关注5页的情况
    if crawl_type == 1:
        ff_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    else:
        ff_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = ff_url.format(user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            user_ids.extend(public.get_fans_or_follows(page))
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1

        cur_page += 1

    return user_ids
Ejemplo n.º 6
0
def get_fans_or_followers_ids(user_id, domain, crawl_type):
    """
    Get followers or fans
    :param user_id: user id
    :param crawl_type: 1 stands for fans,2 stands for follows
    :return: lists of fans or followers
    """

    # todo check fans and followers the special users,such as writers
    # todo deal with conditions that fans and followers more than 5 pages
    if crawl_type == 1:
        fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60'
    else:
        fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?page={}#Pl_Official_HisRelation__60'

    cur_page = 1
    max_page = 6
    user_ids = list()
    while cur_page < max_page:
        url = fans_or_follows_url.format(domain, user_id, cur_page)
        page = get_page(url)
        if cur_page == 1:
            urls_length = public.get_max_crawl_pages(page)
            if max_page > urls_length:
                max_page = urls_length + 1
        # get ids and store relations
        user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type))

        cur_page += 1

    return user_ids
Ejemplo n.º 7
0
 def test_get_fans(self):
     """
     test parsing fans pages
     """
     from page_parse.user import public
     url = TEST_SERVER + 'fans.html'
     resp = requests.get(url)
     resp.encoding = 'utf-8'
     cont = resp.text
     ids = public.get_fans_or_follows(cont, '2036911095', 1)
     self.assertEqual(len(ids), 9)
Ejemplo n.º 8
0
 def test_get_fans(self):
     """
     test parsing fans pages
     """
     from page_parse.user import public
     url = TEST_SERVER + 'fans.html'
     resp = requests.get(url)
     resp.encoding = 'utf-8'
     cont = resp.text
     ids = public.get_fans_or_follows(cont, '2036911095', 1)
     self.assertEqual(len(ids), 9)