def get_fans_or_followers_ids(user_id, crawl_type, verify_type): """ Get followers or fans :param user_id: user id :param crawl_type: 1 stands for fans, 2 stands for follows :param verify_type: 1 stands for 100505(normal users), 2 stands for 100606(special users,such as writers) :return: lists of fans or followers """ # todo deal with conditions that fans and followers more than 5 pages if crawl_type == 1 and verify_type == 1: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' elif crawl_type == 2 and verify_type == 1: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60' elif crawl_type == 1 and verify_type == 2: fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?relate=fans&page={}#Pl_Official_HisRelation__47' elif crawl_type == 2 and verify_type == 2: fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?page={}#Pl_Official_HisRelation__47' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = fans_or_follows_url.format(user_id, cur_page) page = get_page(url) if cur_page == 1: urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 # get ids and store relations user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type)) cur_page += 1 return user_ids
def get_fans_or_followers_ids(user_id, crawl_type): """ 获取用户的粉丝和关注用户 :param user_id: 用户id :param crawl_type: 1表示获取粉丝,2表示获取关注 :return: 获取的关注或者粉丝列表 """ # todo 验证作家等用户的粉丝和关注是否满足;处理粉丝或者关注5页的情况 if crawl_type == 1: ff_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' else: ff_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = ff_url.format(user_id, cur_page) page = get_page(url) if cur_page == 1: user_ids.extend(public.get_fans_or_follows(page)) urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 cur_page += 1 return user_ids
def get_fans_or_followers_ids(user_id, crawl_type): """ Get followers or fans :param user_id: user id :param crawl_type: 1 stands for fans,2 stands for follows :return: lists of fans or followers """ # todo check fans and followers the special users,such as writers # todo deal with conditions that fans and followers more than 5 pages if crawl_type == 1: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' else: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = fans_or_follows_url.format(user_id, cur_page) page = get_page(url) if cur_page == 1: urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 # get ids and store relations user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type)) cur_page += 1 return user_ids
def get_fans_or_followers_ids(user_id, domain, crawl_type): """ Get followers or fans :param user_id: user id :param crawl_type: 1 stands for fans,2 stands for follows :return: lists of fans or followers """ # todo check fans and followers the special users,such as writers # todo deal with conditions that fans and followers more than 5 pages if crawl_type == 1: fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' else: fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?page={}#Pl_Official_HisRelation__60' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = fans_or_follows_url.format(domain, user_id, cur_page) page = get_page(url) if cur_page == 1: urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 # get ids and store relations user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type)) cur_page += 1 return user_ids