count_map = {} count_user_item_map = {} for user in user_item_map: for item, cnt in user_item_map[user].items(): count_map.setdefault(cnt, 0) count_map[cnt] += 1 count_user_item_map.setdefault(cnt, {}) count_user_item_map[cnt].setdefault(user, set()) count_user_item_map[cnt][user].add(item) # for cnt in count_map: # print 'the number of behavior which occurs %d is %d' %(cnt, count_map[cnt]) return count_map, count_user_item_map if __name__ == '__main__': id_position_map = DataSplit.itemIDMap('../File/tianchi_mobile_recommend_train_item.csv') test_user_item_map, test_user_item_map_size = DataSplit.userOperate('../File/user_test_data.csv', 4, id_position_map) fp_behavior_occur_count = open('../File/AnalysisResult/behavior_occur_count.csv', 'w') total1 = 0 for i in xrange(1, 5): fp_behavior_occur_count.write('behavior type is %d: \n' %i) count_map, count_user_item_map = userBuyAppetite('../File/user_train_data.csv', i, id_position_map) total = 0 for cnt in count_map: # print count_map[cnt], len(count_user_item_map[cnt]) bingos, test_set_num = DataSplit.computeRatio1(count_user_item_map[cnt], test_user_item_map) fp_behavior_occur_count.write('behavior occurs = %d, in train data(%d,%d,%.2lf), in test data(%d,%d,%.2lf)\n' \ %(cnt, bingos, count_map[cnt], 100.0 * bingos / count_map[cnt], bingos, test_set_num, 100.0 * bingos / test_set_num)) total += bingos # total1 += len(count_user_item_map[cnt]) fp_behavior_occur_count.write('%d\n' %total) print i, total
for user in user_item_map: for item, cnt in user_item_map[user].items(): count_map.setdefault(cnt, 0) count_map[cnt] += 1 count_user_item_map.setdefault(cnt, {}) count_user_item_map[cnt].setdefault(user, set()) count_user_item_map[cnt][user].add(item) # for cnt in count_map: # print 'the number of behavior which occurs %d is %d' %(cnt, count_map[cnt]) return count_map, count_user_item_map if __name__ == '__main__': id_position_map = DataSplit.itemIDMap( '../File/tianchi_mobile_recommend_train_item.csv') test_user_item_map, test_user_item_map_size = DataSplit.userOperate( '../File/user_test_data.csv', 4, id_position_map) fp_behavior_occur_count = open( '../File/AnalysisResult/behavior_occur_count.csv', 'w') total1 = 0 for i in xrange(1, 5): fp_behavior_occur_count.write('behavior type is %d: \n' % i) count_map, count_user_item_map = userBuyAppetite( '../File/user_train_data.csv', i, id_position_map) total = 0 for cnt in count_map: # print count_map[cnt], len(count_user_item_map[cnt]) bingos, test_set_num = DataSplit.computeRatio1( count_user_item_map[cnt], test_user_item_map) fp_behavior_occur_count.write('behavior occurs = %d, in train data(%d,%d,%.2lf), in test data(%d,%d,%.2lf)\n' \ %(cnt, bingos, count_map[cnt], 100.0 * bingos / count_map[cnt], bingos, test_set_num, 100.0 * bingos / test_set_num)) total += bingos
""" import os total_user_item_behavior_list = [[0 for i in xrange(0, 8)] for i in xrange(0, 5)] rootDir = '../File/EverydayData/' for cnt in xrange(0, 1): print 'process %02d' %cnt tag = 0 ii = 0 for file_name in os.listdir(rootDir): file_path = os.path.join(rootDir, file_name) if tag < cnt: tag += 1 continue print '%s proceed begin %d %d %d' %(file_path, ii, tag, cnt) if ii == 0: test_user_item_map, test_user_item_map_size = DataSplit.userOperate(file_path, 4, id_position_map, True) elif ii <= 7: for i in xrange(1, 5): everyday_user_item_map, everyday_user_item_map_size = DataSplit.userOperate(file_path, i, id_position_map, True) print 'behavior %d' %i # print 'train data: user_item_numbers = %d' %everyday_user_item_map_size bingos, total = DataSplit.computeRatio1(everyday_user_item_map, test_user_item_map) # print '(%d,%d,%.2lf)' %(bingos, everyday_user_item_map_size, 100.0 * bingos / everyday_user_item_map_size) total_user_item_behavior_list[i][ii] += bingos else: break ii += 1 tag += 1 # test_user_item_map, test_user_item_map_size = DataSplit.userOperate('../File/user_test_data.csv', 4, id_position_map) # print 'test data: user_item_numbers = %d' %test_user_item_map_size # import os