def re_snowball_friends(olddbname, oldcomname, newdbname, newcomname): newdb = dbt.db_connect_no_auth(newdbname) newcom = newdb[newcomname] newnet = newdb['net'] newcom.create_index("id", unique=True) newcom.create_index([('level', pymongo.ASCENDING), ('following_prelevel_node', pymongo.ASCENDING)], unique=False) newcom.create_index([('level', pymongo.ASCENDING), ('follower_prelevel_node', pymongo.ASCENDING)], unique=False) newnet.create_index([("user", pymongo.ASCENDING), ("follower", pymongo.ASCENDING)], unique=True) '''Reteive ED core users''' ed_users = iot.get_values_one_field(olddbname, oldcomname, 'id', {'level': 1}) list_size = len(ed_users) length = int(math.ceil(list_size/100.0)) for index in xrange(length): index_begin = index*100 index_end = min(list_size, index_begin+100) lookup.lookup_user_list(ed_users[index_begin:index_end], newcom, 1, 'N') level = 1 while True: # Each call of snowball_following and snowball_follower only process up to 200 users print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followings of seeds for sample db', level following_flag = following.snowball_following(newcom, newnet, level, 'N') print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followees of seeds for sample db', level follower_flag = follower.snowball_follower(newcom, newnet, level, 'N') if following_flag == False and follower_flag == False: break else: continue
def seed(): db = dbt.db_connect_no_auth("ed") sample_user = db["com"] # neiblist = pickle.load(open('ygtimeuid.pick', 'r')) neiblist = getuid("fed", "com") list_size = len(neiblist) print list_size length = int(math.ceil(list_size / 100.0)) for index in xrange(length): index_begin = index * 100 index_end = min(list_size, index_begin + 100) lookup.lookup_user_list(neiblist[index_begin:index_end], sample_user, 1, "N")
def timeline_sampling(dbname, mode='N'): db = dbt.db_connect_no_auth(dbname) poi = db['tcom'] timel = db['times'] bnet = db['bnet'] stream_users = db['poi'] poi.create_index("id", unique=True) poi.create_index("level", unique=False) timel.create_index([('user.id', pymongo.ASCENDING), ('id', pymongo.DESCENDING)], unique=False) timel.create_index([('id', pymongo.ASCENDING)], unique=True) bnet.create_index([("id0", pymongo.ASCENDING), ("id1", pymongo.ASCENDING), ("relationship", pymongo.ASCENDING), ("statusid", pymongo.ASCENDING)], unique=True) # while True: ed_seed = profiles_check.seed_all_profile(stream_users, 5) length = len(ed_seed) if length == 0: print datetime.datetime.now().strftime( "%Y-%m-%d-%H-%M-%S"), 'no seed users, finished!' # break else: print 'seed users: ', length lookup.trans_seed_to_poi(ed_seed, poi, mode) # continue level = 1 while True: timelines.monitor_timeline(poi, timel, 1) timeline_network_miner.network_mining(poi, timel, bnet, level) for user in poi.find({'level': level}): neiblist = set() for relate in bnet.find({ 'id0': user['id'], 'relationship': { '$in': ['retweet', 'reply-to', 'dmentioned'] } }): neiblist.add(relate['id1']) neiblist = list(neiblist) list_size = len(neiblist) length = int(math.ceil(list_size / 100.0)) for index in xrange(length): index_begin = index * 100 index_end = min(list_size, index_begin + 100) lookup.lookup_user_list(neiblist[index_begin:index_end], poi, level + 1, mode) if poi.count() > 4000: break else: level += 1 continue
def seed(): db = dbt.db_connect_no_auth('ed') sample_user = db['com'] # neiblist = pickle.load(open('ygtimeuid.pick', 'r')) neiblist = getuid('fed', 'com') list_size = len(neiblist) print list_size length = int(math.ceil(list_size / 100.0)) for index in xrange(length): index_begin = index * 100 index_end = min(list_size, index_begin + 100) lookup.lookup_user_list(neiblist[index_begin:index_end], sample_user, 1, 'N')
def timeline_sampling(dbname, mode='N'): db = dbt.db_connect_no_auth(dbname) poi = db['tcom'] timel = db['times'] bnet = db['bnet'] stream_users = db['poi'] poi.create_index("id", unique=True) poi.create_index("level", unique=False) timel.create_index([('user.id', pymongo.ASCENDING), ('id', pymongo.DESCENDING)], unique=False) timel.create_index([('id', pymongo.ASCENDING)], unique=True) bnet.create_index([("id0", pymongo.ASCENDING), ("id1", pymongo.ASCENDING), ("relationship", pymongo.ASCENDING), ("statusid", pymongo.ASCENDING)], unique=True) # while True: ed_seed = profiles_check.seed_all_profile(stream_users, 5) length = len(ed_seed) if length == 0: print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'no seed users, finished!' # break else: print 'seed users: ', length lookup.trans_seed_to_poi(ed_seed, poi, mode) # continue level = 1 while True: timelines.monitor_timeline(poi, timel, 1) timeline_network_miner.network_mining(poi, timel, bnet, level) for user in poi.find({'level': level}): neiblist = set() for relate in bnet.find({'id0': user['id'], 'relationship': {'$in': ['retweet', 'reply-to', 'dmentioned']}}): neiblist.add(relate['id1']) neiblist = list(neiblist) list_size = len(neiblist) length = int(math.ceil(list_size/100.0)) for index in xrange(length): index_begin = index*100 index_end = min(list_size, index_begin+100) lookup.lookup_user_list(neiblist[index_begin:index_end], poi, level+1, mode) if poi.count() > 4000: break else: level += 1 continue
def snowball_friends(dbname, com_name, net_name): db = dbt.db_connect_no_auth(dbname) ed_poi = db[com_name] ed_net = db[net_name] # echelon = dbt.db_connect_no_auth('echelon') # echelon_poi = echelon['poi'] ed_poi.create_index("id", unique=True) ed_poi.create_index([('level', pymongo.ASCENDING), ('following_prelevel_node', pymongo.ASCENDING)], unique=False) ed_poi.create_index([('level', pymongo.ASCENDING), ('follower_prelevel_node', pymongo.ASCENDING)], unique=False) ed_net.create_index([("user", pymongo.ASCENDING), ("follower", pymongo.ASCENDING), ("type", pymongo.ASCENDING)], unique=True) # while True: # ed_seed = profiles_check.seed_all_profile(stream_users) # if len(ed_seed) == 0: # print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'no seed users, finished!' # break # else: # lookup.trans_seed_to_poi(ed_seed, ed_poi) # continue ed_users = iot.get_values_one_field(dbname, 'stream_user', 'id') list_size = len(ed_users) length = int(math.ceil(list_size/100.0)) for index in xrange(length): index_begin = index*100 index_end = min(list_size, index_begin+100) lookup.lookup_user_list(ed_users[index_begin:index_end], ed_poi, 1, 'DP') level = 1 while True: print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followings of seeds for sample db', level following_flag = following.snowball_following(ed_poi, ed_net, level, 'DP') print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followees of seeds for sample db', level follower_flag = follower.snowball_follower(ed_poi, ed_net, level, 'DP') if following_flag == False and follower_flag == False: break else: level = level + 1 continue
def re_snowball_friends(olddbname, oldcomname, newdbname, newcomname): newdb = dbt.db_connect_no_auth(newdbname) newcom = newdb[newcomname] newnet = newdb['net'] newcom.create_index("id", unique=True) newcom.create_index([('level', pymongo.ASCENDING), ('following_prelevel_node', pymongo.ASCENDING)], unique=False) newcom.create_index([('level', pymongo.ASCENDING), ('follower_prelevel_node', pymongo.ASCENDING)], unique=False) newnet.create_index([("user", pymongo.ASCENDING), ("follower", pymongo.ASCENDING)], unique=True) '''Reteive ED core users''' ed_users = iot.get_values_one_field(olddbname, oldcomname, 'id') list_size = len(ed_users) print '%d users to process' %list_size length = int(math.ceil(list_size/100.0)) for index in xrange(length): index_begin = index*100 index_end = min(list_size, index_begin+100) lookup.lookup_user_list(ed_users[index_begin:index_end], newcom, 1, 'N') '''Snowball sampling round''' level = 1 while level < 2: # Each call of snowball_following and snowball_follower only process up to 200 users print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followings of seeds for sample db', level following_flag = following.snowball_following(newcom, newnet, level, 'N') print datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"), 'Snowball followees of seeds for sample db', level follower_flag = follower.snowball_follower(newcom, newnet, level, 'N') if following_flag == False and follower_flag == False: level += 1 continue