count_user_item_map.setdefault(cnt, {}) count_user_item_map[cnt].setdefault(user, set()) count_user_item_map[cnt][user].add(item) # for cnt in count_map: # print 'the number of behavior which occurs %d is %d' %(cnt, count_map[cnt]) return count_map, count_user_item_map if __name__ == '__main__': id_position_map = DataSplit.itemIDMap('../File/tianchi_mobile_recommend_train_item.csv') test_user_item_map, test_user_item_map_size = DataSplit.userOperate('../File/user_test_data.csv', 4, id_position_map) fp_behavior_occur_count = open('../File/AnalysisResult/behavior_occur_count.csv', 'w') total1 = 0 for i in xrange(1, 5): fp_behavior_occur_count.write('behavior type is %d: \n' %i) count_map, count_user_item_map = userBuyAppetite('../File/user_train_data.csv', i, id_position_map) total = 0 for cnt in count_map: # print count_map[cnt], len(count_user_item_map[cnt]) bingos, test_set_num = DataSplit.computeRatio1(count_user_item_map[cnt], test_user_item_map) fp_behavior_occur_count.write('behavior occurs = %d, in train data(%d,%d,%.2lf), in test data(%d,%d,%.2lf)\n' \ %(cnt, bingos, count_map[cnt], 100.0 * bingos / count_map[cnt], bingos, test_set_num, 100.0 * bingos / test_set_num)) total += bingos # total1 += len(count_user_item_map[cnt]) fp_behavior_occur_count.write('%d\n' %total) print i, total # fp_behavior_occur_count.write('\n') # for cnt in count_map: # fp_behavior_occur_count.write('%d,' %count_map[cnt]) # fp_behavior_occur_count.write('\n') print total1
if __name__ == '__main__': id_position_map = DataSplit.itemIDMap( '../File/tianchi_mobile_recommend_train_item.csv') test_user_item_map, test_user_item_map_size = DataSplit.userOperate( '../File/user_test_data.csv', 4, id_position_map) fp_behavior_occur_count = open( '../File/AnalysisResult/behavior_occur_count.csv', 'w') total1 = 0 for i in xrange(1, 5): fp_behavior_occur_count.write('behavior type is %d: \n' % i) count_map, count_user_item_map = userBuyAppetite( '../File/user_train_data.csv', i, id_position_map) total = 0 for cnt in count_map: # print count_map[cnt], len(count_user_item_map[cnt]) bingos, test_set_num = DataSplit.computeRatio1( count_user_item_map[cnt], test_user_item_map) fp_behavior_occur_count.write('behavior occurs = %d, in train data(%d,%d,%.2lf), in test data(%d,%d,%.2lf)\n' \ %(cnt, bingos, count_map[cnt], 100.0 * bingos / count_map[cnt], bingos, test_set_num, 100.0 * bingos / test_set_num)) total += bingos # total1 += len(count_user_item_map[cnt]) fp_behavior_occur_count.write('%d\n' % total) print i, total # fp_behavior_occur_count.write('\n') # for cnt in count_map: # fp_behavior_occur_count.write('%d,' %count_map[cnt]) # fp_behavior_occur_count.write('\n') print total1
tag = 0 ii = 0 for file_name in os.listdir(rootDir): file_path = os.path.join(rootDir, file_name) if tag < cnt: tag += 1 continue print '%s proceed begin %d %d %d' %(file_path, ii, tag, cnt) if ii == 0: test_user_item_map, test_user_item_map_size = DataSplit.userOperate(file_path, 4, id_position_map, True) elif ii <= 7: for i in xrange(1, 5): everyday_user_item_map, everyday_user_item_map_size = DataSplit.userOperate(file_path, i, id_position_map, True) print 'behavior %d' %i # print 'train data: user_item_numbers = %d' %everyday_user_item_map_size bingos, total = DataSplit.computeRatio1(everyday_user_item_map, test_user_item_map) # print '(%d,%d,%.2lf)' %(bingos, everyday_user_item_map_size, 100.0 * bingos / everyday_user_item_map_size) total_user_item_behavior_list[i][ii] += bingos else: break ii += 1 tag += 1 # test_user_item_map, test_user_item_map_size = DataSplit.userOperate('../File/user_test_data.csv', 4, id_position_map) # print 'test data: user_item_numbers = %d' %test_user_item_map_size # import os # rootDir = '../File/EverydayData/' # total_user_item_behavior_list = [] # for file_name in os.listdir(rootDir): # user_item_behavior_list = [] # file_path = os.path.join(rootDir, file_name) # print file_path