def user_important_rank_test(date, window_size): date_time = datetime2ts(date) uid_active = {} if window_size == 1: db_name = get_leveldb('important', date_time) daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) for uid, active in daily_user_active_bucket.RangeIter(): uid = int(uid) active = float(active) uid_active[uid] = active else: for i in range(window_size): db_name = get_leveldb('active', date_time - i*24*60*60) daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) for uid, active in daily_user_active_bucket.RangeIter(): uid = int(uid) active = float(active) if uid not in uid_active: uid_active[uid] = 0 uid_active[uid] += active sorted_uids = user_rank(uid_active, 'important', 500, date, window_size) for uid in sorted_uids[:5]: print uid
def burst_important_task(top_n, date, window_size): date_time = datetime2ts(date) uid_important = {} db_name = get_leveldb('important', date_time) previous_db_name = get_leveldb('important', date_time-window2time(window_size)) daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) previous_daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, previous_db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) for uid, important in daily_user_important_bucket.RangeIter(): uid = int(uid) important = float(important) try: previous_important = float(previous_daily_user_important_bucket.Get(str(uid))) except: previous_important = 0 uid_important[uid] = math.fabs(important - previous_important) if len(uid_important) < 100000000: sorted_uid_important = sorted(uid_important.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_uids = [] count = 0 for uid, value in sorted_uid_important: if count >= top_n: break sorted_uids.append(uid) count += 1 else: sorted_uids = user_rank(uid_important, 'whole_active', top_n, date, window_size) return sorted_uids
def important_rank(top_n, date, window_size): date_time = datetime2ts(date) uid_important = {} if window_size == 1: db_name = get_leveldb('important', date_time) daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) count = 0 for uid, important in daily_user_important_bucket.RangeIter(): count = count + 1 print count uid = int(uid) important = float(important) uid_important[uid] = important else: for i in range(window_size): db_name = get_leveldb('important', date_time - i*24*60*60) daily_user_important_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) for uid, important in daily_user_important_bucket.RangeIter(): uid = int(uid) important = float(important) if uid not in uid_important: uid_important[uid] = 0 uid_important[uid] += important if len(uid_important) < 100000000: sorted_uid_important = sorted(uid_important.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_uids = [] count = 0 for uid, value in sorted_uid_important: if is_in_trash_list(uid): continue if count >= top_n: break sorted_uids.append(uid) count += 1 else: sorted_uids = user_rank(uid_important, 'whole_active', top_n, date, window_size) return sorted_uids
def whole_active_task(top_n, date, window_size): date_time = datetime2ts(date) uid_active = {} if window_size == 1: db_name = get_leveldb('active', date_time) daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) for uid, active in daily_user_active_bucket.RangeIter(): uid = int(uid) active = float(active) uid_active[uid] = active else: for i in range(window_size): db_name = get_leveldb('active', date_time - i*24*60*60) daily_user_active_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, db_name), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) for uid, active in daily_user_active_bucket.RangeIter(): uid = int(uid) active = float(active) if uid not in uid_active: uid_active[uid] = 0 uid_active[uid] += active if len(uid_active) < 100000000: sorted_uid_active = sorted(uid_active.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_uids = [] count = 0 for uid, value in sorted_uid_active: if count >= top_n: break sorted_uids.append(uid) count += 1 else: sorted_uids = user_rank(uid_active, 'whole_active', top_n, date, window_size) return sorted_uids
def rank_simulation(uid_value): date = '2012-12-22' window_size = 1 sorted_uids = user_rank(uid_value, 'test', 500, date, window_size)