コード例 #1
0
def main():
    f = open(
        '/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_list.csv',
        'wb')
    writer = csv.writer(f)
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    dis_dict = dict()
    fw_dict = dict()
    n_dict = dict()
    save_list = []
    range_list = [100000, 10000, 1000, 100, 0]
    while 1:
        if scan_count == 1000000:
            break
        results = r.scan(scan_cursor, count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for result in results[1]:
            if 'be_retweet_' == result[:11]:
                uid = result[11:]
                be_retweet_dict = r.hgetall(result)
                be_retweet_user_count = len(be_retweet_dict)
                all_count += be_retweet_user_count
                try:
                    dis_dict[str(be_retweet_user_count)] += 1
                except:
                    dis_dict[str(be_retweet_user_count)] = 1
                if be_retweet_user_count >= 1000:
                    save_list.append((uid, be_retweet_user_count))
    sort_save_list = sorted(save_list, key=lambda x: x[1], reverse=True)
    for item in sort_save_list:
        uid = item[0]
        count = item[1]
        writer.writerow([uid, item])

    for rf in dis_dict:
        dis_count = dis_dict[rf]
        for range_up in range_list:
            if int(rf) > range_up:
                try:
                    fw_dict[str(range_up)] += int(rf)
                except:
                    fw_dict[str(range_up)] = int(rf)
                try:
                    n_dict[str(range_up)] += dis_count
                except:
                    n_dict[str(range_up)] = dis_count
    print 'Rf, N, Fw:'
    for range_up in range_list:
        try:
            n = n_dict[str(range_up)]
            fw = fw_dict[str(range_up)]
        except:
            n = 0
            fw = 0
        print range_up, n, fw

    f.close()
コード例 #2
0
def cal_class_ratio():
    ratio_results = {}
    date = '2013-09-07'
    ts = datetime2ts(date)
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    while 1:
        if scan_count == 1000000:
            break
        results = r_cluster.hscan('activity_' + str(ts),
                                  scan_cursor,
                                  count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for uid in results[1]:
            activity_dict_string = r_cluster.hget('activity_' + str(ts), uid)
            activity_dict = json.loads(activity_dict_string)
            weibo_count = 0
            for time_seg in activity_dict:
                weibo_count += int(activity_dict[time_seg])
            if weibo_count >= 6:
                indic_3 = '1'
            else:
                indic_3 = '0'
            retweet_results = r.hgetall('retweet_' + str(uid))
            retweet_count = len(retweet_results)
            if retweet_count >= 8:
                indic_1 = '1'
            else:
                indic_1 = '0'
            be_retweet_results = r.hgetall('be_retweet_' + str(uid))
            be_retweet_count = len(be_retweet_results)
            #print 'be_retweet_count:', be_retweet_count
            if be_retweet_count >= 9:
                indic_2 = '1'
            else:
                indic_2 = '0'
            #print 'indic_2:', indic_2
            key = indic_1 + indic_2 + indic_3
            try:
                ratio_results[key] += 1
            except:
                ratio_results[key] = 1
            # write eight type users
            '''
            if key=='001':
                writer1.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='111':
                writer2.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='101':
                writer3.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='011':
                writer4.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='110':
                writer5.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            if key=='010':
                writer6.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            '''
    print 'ratio_results:', ratio_results
コード例 #3
0
def cal_class_ratio():
    ratio_results = {}
    date = '2013-09-07'
    ts = datetime2ts(date)
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    while 1:
        if scan_count == 1000000:
            break
        results = r_cluster.hscan('activity_'+str(ts), scan_cursor, count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for uid in results[1]:
            activity_dict_string = r_cluster.hget('activity_'+str(ts), uid)
            activity_dict = json.loads(activity_dict_string)
            weibo_count = 0
            for time_seg in activity_dict:
                weibo_count += int(activity_dict[time_seg])
            if weibo_count >= 6:
                indic_3 = '1'
            else:
                indic_3 = '0'
            retweet_results = r.hgetall('retweet_'+str(uid))
            retweet_count = len(retweet_results)
            if retweet_count >= 8:
                indic_1 = '1'
            else:
                indic_1 = '0'
            be_retweet_results = r.hgetall('be_retweet_'+str(uid))
            be_retweet_count = len(be_retweet_results)
            #print 'be_retweet_count:', be_retweet_count
            if be_retweet_count >= 9:
                indic_2 = '1'
            else:
                indic_2 = '0'
            #print 'indic_2:', indic_2
            key = indic_1 + indic_2 + indic_3
            try:
                ratio_results[key] += 1
            except:
                ratio_results[key] = 1
            # write eight type users
            '''
            if key=='001':
                writer1.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='111':
                writer2.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='101':
                writer3.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='011':
                writer4.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='110':
                writer5.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            if key=='010':
                writer6.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            '''
    print 'ratio_results:', ratio_results
コード例 #4
0
def main():
    f = open('/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_list.csv', 'wb')
    writer = csv.writer(f)
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    dis_dict = dict()
    fw_dict = dict()
    n_dict = dict()
    save_list = []
    range_list = [100000,10000,1000,100,0]
    while 1:
        if scan_count == 1000000:
            break
        results = r.scan(scan_cursor, count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for result in results[1]:
            if 'be_retweet_' == result[:11]:
                uid = result[11:]
                be_retweet_dict = r.hgetall(result)
                be_retweet_user_count = len(be_retweet_dict)
                all_count += be_retweet_user_count
                try:
                    dis_dict[str(be_retweet_user_count)] += 1
                except:
                    dis_dict[str(be_retweet_user_count)] = 1
                if be_retweet_user_count >= 1000:
                    save_list.append((uid, be_retweet_user_count))
    sort_save_list = sorted(save_list, key=lambda x:x[1], reverse=True)
    for item in sort_save_list:
        uid = item[0]
        count = item[1]
        writer.writerow([uid, item])
    
    for rf in dis_dict:
        dis_count = dis_dict[rf]
        for range_up in range_list:
            if int(rf) > range_up:
                try:
                    fw_dict[str(range_up)] += int(rf)
                except:
                    fw_dict[str(range_up)] = int(rf)
                try:
                    n_dict[str(range_up)] += dis_count
                except:
                    n_dict[str(range_up)] = dis_count
    print 'Rf, N, Fw:'
    for range_up in range_list:
        try:
            n = n_dict[str(range_up)]
            fw = fw_dict[str(range_up)]
        except:
            n = 0
            fw = 0
        print range_up, n, fw
    
    f.close()
コード例 #5
0
def cal_core_class():
    date = '2013-09-07'
    timestamp = datetime2ts(date)
    f_r = open(
        '/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_list.csv',
        'rb')
    reader = csv.reader(f_r)
    f_w = open(
        '/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_class.csv',
        'wb')
    writer = csv.writer(f_w)
    result_list = []
    count011 = 0
    for line in reader:
        uid = line[0]
        retweet_results = r.hgetall('retweet_' + str(uid))
        retweet_count = len(retweet_results)
        be_retweet_results = r.hgetall('be_retweet_' + str(uid))
        be_retweet_count = len(be_retweet_results)
        weibo_count = 0
        for i in range(0, 7):
            ts = timestamp - 24 * 3600 * i
            activity_string = r_cluster.hget('activity_' + str(ts), str(uid))
            if activity_string:
                activity_dict = json.loads(activity_string)
            else:
                activity_dict = {}
            for time_seg in activity_dict:
                count = activity_dict[time_seg]
                weibo_count += count
        ave_weibo_count = float(weibo_count) / 7
        if retweet_count >= 8:
            indic_1 = '1'
        else:
            indic_1 = '0'
        if be_retweet_count >= 9:
            indic_2 = '1'
        else:
            indic_2 = '0'
        if ave_weibo_count >= 6:
            indic_3 = '1'
        else:
            indic_3 = '0'
        key = indic_1 + indic_2 + indic_3
        if key == '011':
            count011 += 1
        result_list.append(
            [uid, key, retweet_count, be_retweet_count, ave_weibo_count])
    f_r.close()
    sort_result = sorted(result_list, key=lambda x: x[3], reverse=True)
    for item in sort_result:
        writer.writerow(list(item))
    f_w.close()
    print 'count011:', count011
コード例 #6
0
def cal_core_class():
    date = '2013-09-07'
    timestamp = datetime2ts(date)
    f_r = open('/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_list.csv', 'rb')
    reader = csv.reader(f_r)
    f_w = open('/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_class.csv', 'wb')
    writer = csv.writer(f_w)
    result_list = []
    count011 = 0
    for line in reader:
        uid = line[0]
        retweet_results = r.hgetall('retweet_'+str(uid))
        retweet_count = len(retweet_results)
        be_retweet_results = r.hgetall('be_retweet_'+str(uid))
        be_retweet_count = len(be_retweet_results)
        weibo_count = 0
        for i in range(0,7):
            ts = timestamp - 24*3600*i
            activity_string = r_cluster.hget('activity_'+str(ts), str(uid))
            if activity_string:
                activity_dict = json.loads(activity_string)
            else:
                activity_dict = {}
            for time_seg in activity_dict:
                count = activity_dict[time_seg]
                weibo_count += count
        ave_weibo_count = float(weibo_count) / 7
        if retweet_count >= 8:
            indic_1 = '1'
        else:
            indic_1 = '0'
        if be_retweet_count >= 9:
            indic_2 = '1'
        else:
            indic_2 = '0'
        if ave_weibo_count >= 6:
            indic_3 = '1'
        else:
            indic_3 = '0'
        key = indic_1 + indic_2 + indic_3
        if key=='011':
            count011 += 1
        result_list.append([uid, key, retweet_count, be_retweet_count, ave_weibo_count])
    f_r.close()
    sort_result = sorted(result_list, key=lambda x:x[3], reverse=True)
    for item in sort_result:
        writer.writerow(list(item))
    f_w.close()
    print 'count011:', count011
コード例 #7
0
def cal_ave_fans():
    # test there should use r_dict
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    while 1:
        if scan_count == 1000000:
            break
        results = r.scan(scan_cursor, count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for result in results[1]:
            if 'be_retweet_' == result[:11]:
                uid = result[11:]
                retweet_dict = r.hgetall(result)
                retweet_user_count = len(retweet_dict)
                all_count += retweet_user_count
    ave_count = float(all_count) / scan_count
    print 'ave_count:', ave_count
コード例 #8
0
def cal_ave_fans():
    # test there should use r_dict
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    while 1:
        if scan_count == 1000000:
            break
        results = r.scan(scan_cursor, count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for result in results[1]:
            if 'be_retweet_' == result[:11]:
                uid = result[11:]
                retweet_dict = r.hgetall(result)
                retweet_user_count = len(retweet_dict)
                all_count += retweet_user_count
    ave_count = float(all_count) / scan_count
    print 'ave_count:', ave_count