def run_rmse_test_wrapper(args): try: return recommender_evaluator.perform_cross_validation(*args) except Exception as e: print('Caught exception in worker thread') # This prints the type, value, and stack trace of the # current exception being handled. traceback.print_exc() print() raise e
def run_rmse_test(records_file, recommenders, binary_reviews_file, reviews_type=None): records = load_records(records_file) # records = extractor.remove_users_with_low_reviews(records, 2) with open(binary_reviews_file, 'rb') as read_file: binary_reviews = pickle.load(read_file) if len(records) != len(binary_reviews): raise ValueError("The records and reviews should have the same length") num_folds = 5 dataset_info_map = {} dataset_info_map['dataset'] = records_file.split('/')[-1] dataset_info_map['cache_reviews'] = binary_reviews_file.split('/')[-1] dataset_info_map['num_records'] = len(records) dataset_info_map['reviews_type'] = reviews_type dataset_info_map['cross_validation_folds'] = num_folds results_list = [] results_log_list = [] count = 0 print('Total recommenders: %d' % (len(recommenders))) for recommender in recommenders: print('\n**************\n%d/%d\n**************' % (count, len(recommenders))) results = recommender_evaluator.perform_cross_validation( records, recommender, num_folds, binary_reviews, reviews_type) results_list.append(results) remaining_time = results['Execution time'] * (len(recommenders) - count) remaining_time /= 3600 print('Estimated remaining time: %.2f hours' % remaining_time) count += 1 for recommender, results in zip(recommenders, results_list): results_log_list.append( process_rmse_results(recommender, results, dataset_info_map)) timestamp = time.strftime("%Y%m%d-%H%M%S") file_name = 'recommender-rmse-results' + timestamp ETLUtils.save_csv_file(file_name + '.csv', results_log_list, RMSE_HEADERS, '\t')
def run_rmse_test( records_file, recommenders, binary_reviews_file, reviews_type=None): records = load_records(records_file) # records = extractor.remove_users_with_low_reviews(records, 2) with open(binary_reviews_file, 'rb') as read_file: binary_reviews = pickle.load(read_file) if len(records) != len(binary_reviews): raise ValueError("The records and reviews should have the same length") num_folds = 5 dataset_info_map = {} dataset_info_map['dataset'] = records_file.split('/')[-1] dataset_info_map['cache_reviews'] = binary_reviews_file.split('/')[-1] dataset_info_map['num_records'] = len(records) dataset_info_map['reviews_type'] = reviews_type dataset_info_map['cross_validation_folds'] = num_folds results_list = [] results_log_list = [] count = 0 print('Total recommenders: %d' % (len(recommenders))) for recommender in recommenders: print('\n**************\n%d/%d\n**************' % (count, len(recommenders))) results = recommender_evaluator.perform_cross_validation( records, recommender, num_folds, binary_reviews, reviews_type) results_list.append(results) remaining_time = results['Execution time'] * (len(recommenders) - count) remaining_time /= 3600 print('Estimated remaining time: %.2f hours' % remaining_time) count += 1 for recommender, results in zip(recommenders, results_list): results_log_list.append(process_rmse_results(recommender, results, dataset_info_map)) timestamp = time.strftime("%Y%m%d-%H%M%S") file_name = 'recommender-rmse-results' + timestamp ETLUtils.save_csv_file(file_name + '.csv', results_log_list, RMSE_HEADERS, '\t')
def main(): # reviews_file = "/Users/fpena/tmp/yelp_training_set/yelp_training_set_review_hotels.json" reviews_file = "/Users/fpena/UCC/Thesis/datasets/context/yelp_training_set_review_hotels_shuffled.json" # reviews_file = "/Users/fpena/UCC/Thesis/datasets/context/yelp_training_set_review_restaurants_shuffled.json" # my_records = context_utils.load_reviews(reviews_file) my_records = load_data(reviews_file) print("records:", len(my_records)) my_num_topics = 150 print("\n***************************\n") # my_records = load_data(reviews_file) # my_records = extractor.remove_users_with_low_reviews(my_records, 200) # my_records = extractor.remove_users_with_low_reviews(my_records, 2) # shuffle(my_records) # my_index = 0 # my_reviews = [] # for record in my_records: # my_index += 1 # my_reviews.append(Review(record)) # print('index', my_index) # my_file = '/Users/fpena/UCC/Thesis/datasets/context/reviews_context_restaurants_200.pkl' # my_file = '/Users/fpena/UCC/Thesis/datasets/context/reviews_context_hotel_2.pkl' my_file = '/Users/fpena/tmp/reviews_hotel_shuffled.pkl' # my_file = '/Users/fpena/tmp/reviews_restaurant_shuffled.pkl' # with open(my_file, 'wb') as write_file: # pickle.dump(my_reviews, write_file, pickle.HIGHEST_PROTOCOL) with open(my_file, 'rb') as read_file: my_cache_reviews = pickle.load(read_file) print("reviews:", len(my_cache_reviews)) context_knn = ContextKnn(my_num_topics) tknc = TopKNeighbourhoodCalculator() nc = ContextNeighbourhoodCalculator() ncc = NeighbourContributionCalculator() ubc = UserBaselineCalculator() usc = PBCSimilarityCalculator() # contextual_knn2 = ContextualKNN(my_num_topics, tknc, ncc, ubc, usc, my_reviews) bnc = BasicNeighbourhoodCalculator() bncc = BasicNeighbourContributionCalculator() bubc = BasicUserBaselineCalculator() busc = BasicUserSimilarityCalculator() snc = SimpleNeighbourhoodCalculator() chnc = ContextHybridNeighbourhoodCalculator() contextual_knn = ContextualKNN(my_num_topics, nc, ncc, ubc, usc, has_context=True) contextual_knn2 = ContextualKNN(my_num_topics, nc, ncc, ubc, busc, has_context=True) contextual_knn3 = ContextualKNN(my_num_topics, bnc, bncc, bubc, busc) # basic_contextual_knn = BasicContextualKNN(my_num_topics, bnc, bncc, bubc, busc) # contextual_knn.threshold1 = 0.8 # contextual_knn.threshold2 = 0.8 # contextual_knn.threshold3 = 0.8 # contextual_knn.threshold4 = 0.8 # print('Context KNN') # context_knn.load(my_records) # recommender_evaluator.perform_cross_validation(my_records, context_knn, 5, True) basic_knn_rec = BasicKNN(None) # print('Basic KNN') # recommender_evaluator.perform_cross_validation(my_records, basic_knn_rec, 5) # print('Context KNN') # recommender_evaluator.perform_cross_validation(my_records, context_knn, 5, my_cache_reviews) print('Contextual KNN') recommender_evaluator.perform_cross_validation(my_records, contextual_knn, 5, my_cache_reviews) print('Contextual KNN2') recommender_evaluator.perform_cross_validation(my_records, contextual_knn2, 5, my_cache_reviews) print('Contextual KNN3') # recommender_evaluator.perform_cross_validation(my_records, contextual_knn3, 5) # recommender_evaluator.perform_cross_validation(my_records, contextual_knn3, 5) # recommender_evaluator.perform_cross_validation(my_records, contextual_knn3, 5, True, my_cache_reviews) # precision_in_top_n.calculate_recall_in_top_n(my_records, basic_contextual_knn, 10, 65) # precision_in_top_n.calculate_recall_in_top_n(my_records, basic_knn_rec, 10, 65) print('Basic KNN') precision_in_top_n.calculate_recall_in_top_n(my_records, basic_knn_rec, 10, 5, 5.0, my_cache_reviews) # precision_in_top_n.calculate_recall_in_top_n(my_records, basic_knn_rec, 10, 5, 5.0, False, my_cache_reviews) print('Context KNN') precision_in_top_n.calculate_recall_in_top_n(my_records, context_knn, 10, 5, 5.0, my_cache_reviews) print('Contextual KNN') precision_in_top_n.calculate_recall_in_top_n(my_records, contextual_knn, 10, 5, 5.0, my_cache_reviews) print('Contextual KNN 2') precision_in_top_n.calculate_recall_in_top_n(my_records, contextual_knn2, 10, 5, 5.0, my_cache_reviews) print('Contextual KNN 3') precision_in_top_n.calculate_recall_in_top_n(my_records, contextual_knn3, 10, 5, 5.0, my_cache_reviews)