예제 #1
0
파일: ed_snowball.py 프로젝트: wtgme/ohsn
def re_snowball_friends(olddbname, oldcomname, newdbname, newcomname):
    newdb = dbt.db_connect_no_auth(newdbname)
    newcom = newdb[newcomname]
    newnet = newdb['net']
    newcom.create_index("id", unique=True)
    newcom.create_index([('level', pymongo.ASCENDING),
                         ('following_prelevel_node', pymongo.ASCENDING)],
                        unique=False)
    newcom.create_index([('level', pymongo.ASCENDING),
                         ('follower_prelevel_node', pymongo.ASCENDING)],
                        unique=False)
    newnet.create_index([("user", pymongo.ASCENDING),
                         ("follower", pymongo.ASCENDING)],
                        unique=True)

    '''Reteive ED core users'''
    ed_users = iot.get_values_one_field(olddbname, oldcomname, 'id', {'level': 1})
    list_size = len(ed_users)
    length = int(math.ceil(list_size/100.0))
    for index in xrange(length):
        index_begin = index*100
        index_end = min(list_size, index_begin+100)
        lookup.lookup_user_list(ed_users[index_begin:index_end], newcom, 1, 'N')

    level = 1
    while True:
        # Each call of snowball_following and snowball_follower only process up to 200 users
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followings of seeds for sample db', level
        following_flag = following.snowball_following(newcom, newnet, level, 'N')
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followees of seeds for sample db', level
        follower_flag = follower.snowball_follower(newcom, newnet, level, 'N')
        if following_flag == False and follower_flag == False:
            break
        else:
            continue
예제 #2
0
파일: re_collect.py 프로젝트: wtgme/ohsn
def seed():
    db = dbt.db_connect_no_auth("ed")
    sample_user = db["com"]
    # neiblist = pickle.load(open('ygtimeuid.pick', 'r'))
    neiblist = getuid("fed", "com")
    list_size = len(neiblist)
    print list_size
    length = int(math.ceil(list_size / 100.0))
    for index in xrange(length):
        index_begin = index * 100
        index_end = min(list_size, index_begin + 100)
        lookup.lookup_user_list(neiblist[index_begin:index_end], sample_user, 1, "N")
예제 #3
0
def timeline_sampling(dbname, mode='N'):
    db = dbt.db_connect_no_auth(dbname)
    poi = db['tcom']
    timel = db['times']
    bnet = db['bnet']
    stream_users = db['poi']
    poi.create_index("id", unique=True)
    poi.create_index("level", unique=False)
    timel.create_index([('user.id', pymongo.ASCENDING),
                        ('id', pymongo.DESCENDING)],
                       unique=False)
    timel.create_index([('id', pymongo.ASCENDING)], unique=True)
    bnet.create_index([("id0", pymongo.ASCENDING), ("id1", pymongo.ASCENDING),
                       ("relationship", pymongo.ASCENDING),
                       ("statusid", pymongo.ASCENDING)],
                      unique=True)
    # while True:
    ed_seed = profiles_check.seed_all_profile(stream_users, 5)
    length = len(ed_seed)
    if length == 0:
        print datetime.datetime.now().strftime(
            "%Y-%m-%d-%H-%M-%S"), 'no seed users, finished!'
        # break
    else:
        print 'seed users: ', length
        lookup.trans_seed_to_poi(ed_seed, poi, mode)
        # continue
    level = 1
    while True:
        timelines.monitor_timeline(poi, timel, 1)
        timeline_network_miner.network_mining(poi, timel, bnet, level)
        for user in poi.find({'level': level}):
            neiblist = set()
            for relate in bnet.find({
                    'id0': user['id'],
                    'relationship': {
                        '$in': ['retweet', 'reply-to', 'dmentioned']
                    }
            }):
                neiblist.add(relate['id1'])
            neiblist = list(neiblist)
            list_size = len(neiblist)
            length = int(math.ceil(list_size / 100.0))
            for index in xrange(length):
                index_begin = index * 100
                index_end = min(list_size, index_begin + 100)
                lookup.lookup_user_list(neiblist[index_begin:index_end], poi,
                                        level + 1, mode)
        if poi.count() > 4000:
            break
        else:
            level += 1
            continue
예제 #4
0
def seed():
    db = dbt.db_connect_no_auth('ed')
    sample_user = db['com']
    # neiblist = pickle.load(open('ygtimeuid.pick', 'r'))
    neiblist = getuid('fed', 'com')
    list_size = len(neiblist)
    print list_size
    length = int(math.ceil(list_size / 100.0))
    for index in xrange(length):
        index_begin = index * 100
        index_end = min(list_size, index_begin + 100)
        lookup.lookup_user_list(neiblist[index_begin:index_end], sample_user,
                                1, 'N')
예제 #5
0
def timeline_sampling(dbname, mode='N'):
    db = dbt.db_connect_no_auth(dbname)
    poi = db['tcom']
    timel = db['times']
    bnet = db['bnet']
    stream_users = db['poi']
    poi.create_index("id", unique=True)
    poi.create_index("level", unique=False)
    timel.create_index([('user.id', pymongo.ASCENDING),
                                  ('id', pymongo.DESCENDING)], unique=False)
    timel.create_index([('id', pymongo.ASCENDING)], unique=True)
    bnet.create_index([("id0", pymongo.ASCENDING),
                             ("id1", pymongo.ASCENDING),
                             ("relationship", pymongo.ASCENDING),
                             ("statusid", pymongo.ASCENDING)],
                            unique=True)
    # while True:
    ed_seed = profiles_check.seed_all_profile(stream_users, 5)
    length = len(ed_seed)
    if length == 0:
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'no seed users, finished!'
        # break
    else:
        print 'seed users: ', length
        lookup.trans_seed_to_poi(ed_seed, poi, mode)
        # continue
    level = 1
    while True:
        timelines.monitor_timeline(poi, timel, 1)
        timeline_network_miner.network_mining(poi, timel, bnet, level)
        for user in poi.find({'level': level}):
            neiblist = set()
            for relate in bnet.find({'id0': user['id'],
                                     'relationship': {'$in': ['retweet', 'reply-to', 'dmentioned']}}):
                neiblist.add(relate['id1'])
            neiblist = list(neiblist)
            list_size = len(neiblist)
            length = int(math.ceil(list_size/100.0))
            for index in xrange(length):
                index_begin = index*100
                index_end = min(list_size, index_begin+100)
                lookup.lookup_user_list(neiblist[index_begin:index_end], poi, level+1, mode)
        if poi.count() > 4000:
            break
        else:
            level += 1
            continue
예제 #6
0
def snowball_friends(dbname, com_name, net_name):
    db = dbt.db_connect_no_auth(dbname)
    ed_poi = db[com_name]
    ed_net = db[net_name]
    # echelon = dbt.db_connect_no_auth('echelon')
    # echelon_poi = echelon['poi']
    ed_poi.create_index("id", unique=True)
    ed_poi.create_index([('level', pymongo.ASCENDING),
                         ('following_prelevel_node', pymongo.ASCENDING)],
                        unique=False)
    ed_poi.create_index([('level', pymongo.ASCENDING),
                         ('follower_prelevel_node', pymongo.ASCENDING)],
                        unique=False)
    ed_net.create_index([("user", pymongo.ASCENDING),
                         ("follower", pymongo.ASCENDING),
                         ("type", pymongo.ASCENDING)],
                        unique=True)
    # while True:
    #     ed_seed = profiles_check.seed_all_profile(stream_users)
    #     if len(ed_seed) == 0:
    #         print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'no seed users, finished!'
    #         break
    #     else:
    #         lookup.trans_seed_to_poi(ed_seed, ed_poi)
    #         continue
    ed_users = iot.get_values_one_field(dbname, 'stream_user', 'id')
    list_size = len(ed_users)
    length = int(math.ceil(list_size/100.0))
    for index in xrange(length):
        index_begin = index*100
        index_end = min(list_size, index_begin+100)
        lookup.lookup_user_list(ed_users[index_begin:index_end], ed_poi, 1, 'DP')

    level = 1
    while True:
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followings of seeds for sample db', level
        following_flag = following.snowball_following(ed_poi, ed_net, level, 'DP')
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followees of seeds for sample db', level
        follower_flag = follower.snowball_follower(ed_poi, ed_net, level, 'DP')
        if following_flag == False and follower_flag == False:
            break
        else:
            level = level + 1
            continue
예제 #7
0
def re_snowball_friends(olddbname, oldcomname, newdbname, newcomname):
    newdb = dbt.db_connect_no_auth(newdbname)
    newcom = newdb[newcomname]
    newnet = newdb['net']
    newcom.create_index("id", unique=True)
    newcom.create_index([('level', pymongo.ASCENDING),
                         ('following_prelevel_node', pymongo.ASCENDING)],
                        unique=False)
    newcom.create_index([('level', pymongo.ASCENDING),
                         ('follower_prelevel_node', pymongo.ASCENDING)],
                        unique=False)
    newnet.create_index([("user", pymongo.ASCENDING),
                         ("follower", pymongo.ASCENDING)],
                        unique=True)

    '''Reteive ED core users'''
    ed_users = iot.get_values_one_field(olddbname, oldcomname, 'id')
    list_size = len(ed_users)
    print '%d users to process' %list_size
    length = int(math.ceil(list_size/100.0))
    for index in xrange(length):
        index_begin = index*100
        index_end = min(list_size, index_begin+100)
        lookup.lookup_user_list(ed_users[index_begin:index_end], newcom, 1, 'N')


    '''Snowball sampling round'''
    level = 1
    while level < 2:
        # Each call of snowball_following and snowball_follower only process up to 200 users
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followings of seeds for sample db', level
        following_flag = following.snowball_following(newcom, newnet, level, 'N')
        print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followees of seeds for sample db', level
        follower_flag = follower.snowball_follower(newcom, newnet, level, 'N')
        if following_flag == False and follower_flag == False:
            level += 1
        continue