count_map = {}
    count_user_item_map = {}
    for user in user_item_map:
        for item, cnt in user_item_map[user].items():
            count_map.setdefault(cnt, 0)
            count_map[cnt] += 1
            count_user_item_map.setdefault(cnt, {})
            count_user_item_map[cnt].setdefault(user, set())
            count_user_item_map[cnt][user].add(item)
    # for cnt in count_map:
    #     print 'the number of behavior which occurs %d is %d' %(cnt, count_map[cnt])
    return count_map, count_user_item_map

if __name__ == '__main__':
    id_position_map = DataSplit.itemIDMap('../File/tianchi_mobile_recommend_train_item.csv')
    test_user_item_map, test_user_item_map_size = DataSplit.userOperate('../File/user_test_data.csv', 4, id_position_map)
    fp_behavior_occur_count = open('../File/AnalysisResult/behavior_occur_count.csv', 'w')
    total1 = 0
    for i in xrange(1, 5):
        fp_behavior_occur_count.write('behavior type is %d: \n' %i)
        count_map, count_user_item_map = userBuyAppetite('../File/user_train_data.csv', i, id_position_map)
        total = 0
        for cnt in count_map:
            # print count_map[cnt], len(count_user_item_map[cnt])
            bingos, test_set_num = DataSplit.computeRatio1(count_user_item_map[cnt], test_user_item_map)
            fp_behavior_occur_count.write('behavior occurs = %d, in train data(%d,%d,%.2lf), in test data(%d,%d,%.2lf)\n' \
                                          %(cnt, bingos, count_map[cnt], 100.0 * bingos / count_map[cnt], bingos, test_set_num, 100.0 * bingos / test_set_num))
            total += bingos
            # total1 += len(count_user_item_map[cnt])
        fp_behavior_occur_count.write('%d\n' %total)
        print i, total
    for user in user_item_map:
        for item, cnt in user_item_map[user].items():
            count_map.setdefault(cnt, 0)
            count_map[cnt] += 1
            count_user_item_map.setdefault(cnt, {})
            count_user_item_map[cnt].setdefault(user, set())
            count_user_item_map[cnt][user].add(item)
    # for cnt in count_map:
    #     print 'the number of behavior which occurs %d is %d' %(cnt, count_map[cnt])
    return count_map, count_user_item_map


if __name__ == '__main__':
    id_position_map = DataSplit.itemIDMap(
        '../File/tianchi_mobile_recommend_train_item.csv')
    test_user_item_map, test_user_item_map_size = DataSplit.userOperate(
        '../File/user_test_data.csv', 4, id_position_map)
    fp_behavior_occur_count = open(
        '../File/AnalysisResult/behavior_occur_count.csv', 'w')
    total1 = 0
    for i in xrange(1, 5):
        fp_behavior_occur_count.write('behavior type is %d: \n' % i)
        count_map, count_user_item_map = userBuyAppetite(
            '../File/user_train_data.csv', i, id_position_map)
        total = 0
        for cnt in count_map:
            # print count_map[cnt], len(count_user_item_map[cnt])
            bingos, test_set_num = DataSplit.computeRatio1(
                count_user_item_map[cnt], test_user_item_map)
            fp_behavior_occur_count.write('behavior occurs = %d, in train data(%d,%d,%.2lf), in test data(%d,%d,%.2lf)\n' \
                                          %(cnt, bingos, count_map[cnt], 100.0 * bingos / count_map[cnt], bingos, test_set_num, 100.0 * bingos / test_set_num))
            total += bingos
Esempio n. 3
0
 """
 import os
 total_user_item_behavior_list = [[0 for i in xrange(0, 8)] for i in xrange(0, 5)]
 rootDir = '../File/EverydayData/'
 for cnt in xrange(0, 1):
     print 'process %02d' %cnt
     tag = 0
     ii = 0
     for file_name in os.listdir(rootDir):
         file_path = os.path.join(rootDir, file_name)
         if tag < cnt:
             tag += 1
             continue
         print '%s proceed begin %d %d %d' %(file_path, ii, tag, cnt)
         if ii == 0:
             test_user_item_map, test_user_item_map_size = DataSplit.userOperate(file_path, 4, id_position_map, True)
         elif ii <= 7:
             for i in xrange(1, 5):
                 everyday_user_item_map, everyday_user_item_map_size = DataSplit.userOperate(file_path, i, id_position_map, True)
                 print 'behavior %d' %i
                 # print 'train data: user_item_numbers = %d' %everyday_user_item_map_size
                 bingos, total = DataSplit.computeRatio1(everyday_user_item_map, test_user_item_map)
                 # print '(%d,%d,%.2lf)' %(bingos, everyday_user_item_map_size, 100.0 * bingos / everyday_user_item_map_size)
                 total_user_item_behavior_list[i][ii] += bingos
         else:
             break
         ii += 1
         tag += 1
 # test_user_item_map, test_user_item_map_size = DataSplit.userOperate('../File/user_test_data.csv', 4, id_position_map)
 # print 'test data: user_item_numbers = %d' %test_user_item_map_size
 # import os