def crawl_follower_fans(uid): seed = get_seed_by_id(uid) if seed.other_crawled == 0: rs = user_get.get_fans_or_followers_ids(uid, 1) rs.extend(user_get.get_fans_or_followers_ids(uid, 2)) datas = set(rs) # If data already exits, just skip it if datas: insert_seeds(datas) set_seed_other_crawled(uid)
def crawl_follower_fans(uid): seed = get_seed_by_id(uid) if seed.other_crawled == 0: rs = user_get.get_fans_or_followers_ids(uid, 1) rs.extend(user_get.get_fans_or_followers_ids(uid, 2)) datas = set(rs) # 重复数据跳过插入 if datas: insert_seeds(datas) set_seed_other_crawled(uid)
def crawl_person_infos(uid): """ 根据用户id来爬取用户相关资料和用户的关注数和粉丝数(由于微博服务端限制,默认爬取前五页,企业号的关注和粉丝也不能查看) :param uid: 用户id :return: """ if not uid: return # 由于与别的任务共享数据表,所以需要先判断数据库是否有该用户信息,再进行抓取 user = user_get.get_profile(uid) # 不抓取企业号 if user.verify_type == 2: set_seed_other_crawled(uid) return app.send_task('tasks.user.crawl_follower_fans', args=(uid,), queue='fans_followers', routing_key='for_fans_followers')
def crawl_person_infos(uid): """ 根据用户id来爬取用户相关资料和用户的关注数和粉丝数(由于微博服务端限制,默认爬取前五页,企业号的关注和粉丝也不能查看) :param uid: 用户id :return: """ # 由于与别的任务共享数据表,所以需要先判断数据库是否有该用户信息,再进行抓取 user = user_get.get_profile(uid) # 不抓取企业号 if user.verify_type == 2: return seed = get_seed_by_id(uid) if seed is None or seed.other_crawled == 0: rs = user_get.get_fans_or_followers_ids(uid, 1) rs.extend(user_get.get_fans_or_followers_ids(uid, 2)) # 重复数据跳过插入 datas = set(rs) insert_seeds(datas) set_seed_other_crawled(uid)
def crawl_person_infos(uid): """ Crawl user info and their fans and followers For the limit of weibo's backend, we can only crawl 5 pages of the fans and followers. We also have no permissions to view enterprise's followers and fans info :param uid: current user id :return: None """ if not uid: return user, is_crawled = user_get.get_profile(uid) # If it's enterprise user, just skip it if user and user.verify_type == 2: set_seed_other_crawled(uid) return # Crawl fans and followers if not is_crawled: app.send_task('tasks.user.crawl_follower_fans', args=(uid,), queue='fans_followers', routing_key='for_fans_followers')
def crawl_person_infos(uid): """ Crawl user info and their fans and followers For the limit of weibo's backend, we can only crawl 5 pages of the fans and followers. We also have no permissions to view enterprise's followers and fans info :param uid: current user id :return: None """ if not uid: return user = user_get.get_profile(uid) # If it's enterprise user, just skip it if user.verify_type == 2: set_seed_other_crawled(uid) return # Crawl fans and followers celery.send_task('celery_tasks.weibo.user.crawl_follower_fans', args=(uid, ), queue='fans_followers', routing_key='for_fans_followers')