Exemple #1
0
 def test_bulk_insert_with_duplicates(self):
     """
     测试批量插入的时候是否会重复插入(请到mysql数据库中查看结果)
     """
     from db.seed_ids import insert_seeds
     ids = ['2891529877', '2891529878', '281296709']
     insert_seeds(ids)
Exemple #2
0
 def test_bulk_insert_with_duplicates(self):
     """
     测试批量插入的时候是否会重复插入(请到mysql数据库中查看结果)
     """
     from db.seed_ids import insert_seeds
     ids = ['2891529877', '2891529878', '281296709']
     insert_seeds(ids)
Exemple #3
0
def crawl_person_infos(uid):
    """
    根据用户id来爬取用户相关资料和用户的关注数和粉丝数(由于微博服务端限制,默认爬取前五页,企业号的关注和粉丝也不能查看)
    :param uid: 用户id
    :return: 
    """
    if not uid:
        return

    # 由于与别的任务共享数据表,所以需要先判断数据库是否有该用户信息,再进行抓取
    user = user_get.get_profile(uid)
    # 不抓取企业号
    if user.verify_type == 2:
        set_seed_other_crawled(uid)
        return

    seed = get_seed_by_id(uid)
    if seed.other_crawled == 0:
        rs = user_get.get_fans_or_followers_ids(uid, 1)
        rs.extend(user_get.get_fans_or_followers_ids(uid, 2))
        datas = set(rs)
        # 重复数据跳过插入
        if datas:
            insert_seeds(datas)
        set_seed_other_crawled(uid)
Exemple #4
0
def crawl_follower_fans(uid):
    seed = get_seed_by_id(uid)
    if seed.other_crawled == 0:
        rs = user_get.get_fans_or_followers_ids(uid, 1)
        rs.extend(user_get.get_fans_or_followers_ids(uid, 2))
        datas = set(rs)
        # If data already exits, just skip it
        if datas:
            insert_seeds(datas)
        set_seed_other_crawled(uid)
Exemple #5
0
def crawl_follower_fans(uid):
    seed = get_seed_by_id(uid)
    if seed.other_crawled == 0:
        rs = user_get.get_fans_or_followers_ids(uid, 1)
        rs.extend(user_get.get_fans_or_followers_ids(uid, 2))
        datas = set(rs)
        # 重复数据跳过插入
        if datas:
            insert_seeds(datas)
        set_seed_other_crawled(uid)
Exemple #6
0
def crawl_follower_fans(uid):
    seed = get_seed_by_id(uid)
    if seed.other_crawled == 0:
        rs = user_get.get_fans_or_followers_ids(uid, 1)
        rs.extend(user_get.get_fans_or_followers_ids(uid, 2))
        datas = set(rs)
        # If data already exits, just skip it
        if datas:
            insert_seeds(datas)
        set_seed_other_crawled(uid)
Exemple #7
0
 def test_bulk_insert_with_duplicates(self):
     from db.seed_ids import insert_seeds
     ids = ['2891529877', '2891529878', '281296709']
     insert_seeds(ids)