def similarity_threshold(similarity_method, similarity_method_name, similarity_method_name_short): if similarity_method == rec.cosine_similarity: prediction_method = rec.resnicks_cosine_prediction else: prediction_method = rec.resnicks_pearson_prediction print( "---------------------------------------------------------------------------" ) print( "* Please be patient. Generating predictions can take a few minutes.\n" ) print("Generating predictions...") print("Neighborhoods created by:\tMinimum similarity threshold") print("Similarity metric used: \t", similarity_method_name, "\n") min_corated_items = 5 user_similarity, user_sim_mean, user_sim_stdev = rec.populate_user_similarity( similarity_method, min_corated_items, users) similarity_min_cutoffs = [0.95, 0.90, 0.80, 0.70, 0.60, 0.50] if similarity_method_name == "Pearson's similarity": similarity_min_cutoffs = [ cos_range_to_pearson_range(s) for s in similarity_min_cutoffs ] rmse_all = [] time_all = [] coverage_all = [] print( "\nGenerating predictions across different minimum similarity thresholds for purposes of comparison:" ) for min_similarity_req in similarity_min_cutoffs: print("- Minimum similarity threshold: ", min_similarity_req) # Create neighborhood rec.assign_neighbors_threshold(users, user_similarity, min_similarity_req) # Make predictions output_file_name = "resnicks_" + similarity_method_name_short + str( min_similarity_req) + "_min_results.csv" overall_rmse, total_time, coverage = rec.make_predictions( prediction_method, output_file_name, users, user_similarity, item_count, user_count) print("\tResults saved in:\t", output_file_name) rmse_all.append(overall_rmse) time_all.append(total_time) coverage_all.append(coverage) curve_label = "Resnick's Formula with " + similarity_method_name generate_graphs( similarity_min_cutoffs, rmse_all, time_all, coverage_all, curve_label, "resnicks_" + similarity_method_name_short + "min_similarity_", "Minimum Similarity Between Neighbors") print( "---------------------------------------------------------------------------" ) return
def top_percent(similarity_method, similarity_method_name, similarity_method_name_short): if similarity_method == rec.cosine_similarity: prediction_method = rec.resnicks_cosine_prediction else: prediction_method = rec.resnicks_pearson_prediction print( "---------------------------------------------------------------------------" ) print( "* Please be patient. Generating predictions can take a few minutes.\n" ) print("Generating predictions...") print("Neighborhoods created by:\tTop n percent of closest users") print("Similarity metric used: \t", similarity_method_name, "\n") min_corated_items = 5 percent = [0.025, .05, .10, .15, .20, .25] rmse_all = [] time_all = [] coverage_all = [] user_similarity, user_sim_mean, user_sim_stdev = rec.populate_user_similarity( similarity_method, min_corated_items, users) print( "\nGenerating predictions across different top percents for purposes of comparison:" ) for p in percent: print("Top", p, "% of closest users as neighbors") # Assign neighborhoods rec.assign_neighbors_percent(users, user_similarity, p, similarity_method) output_file_name = "resnicks_" + similarity_method_name_short + str( p) + "_percent_results.csv" overall_rmse, total_time, coverage = rec.make_predictions( prediction_method, output_file_name, users, user_similarity, item_count, user_count) print("\tResults saved in:\t", output_file_name) rmse_all.append(overall_rmse) time_all.append(total_time) coverage_all.append(coverage) curve_label = "Resnick's Formula with " + similarity_method_name generate_graphs( percent, rmse_all, time_all, coverage_all, curve_label, "resnicks_" + similarity_method_name_short + "top_percent_", "Top N% of Neighbors") print( "---------------------------------------------------------------------------" )
def k_nearest(similarity_method, similarity_method_name, similarity_method_name_short): if similarity_method == rec.cosine_similarity: prediction_method = rec.resnicks_cosine_prediction else: prediction_method = rec.resnicks_pearson_prediction print( "---------------------------------------------------------------------------" ) print( "* Please be patient. Generating predictions can take a few minutes.\n" ) print("Generating predictions...") print("Neighborhoods created by:\tK-nearest users") print("Similarity metric used: \t", similarity_method_name, "\n") min_corated_items = 5 k_vals = [5, 10, 20, 30, 40, 50, 60] rmse_all = [] time_all = [] coverage_all = [] user_similarity, user_sim_mean, user_sim_stdev = rec.populate_user_similarity( similarity_method, min_corated_items, users) print( "\nGenerating predictions across different top percents for purposes of comparison:" ) for k in k_vals: print(k, "closest users as neighbors") # Assign neighborhoods rec.assign_neighbors_k_nearest(users, user_similarity, k, similarity_method) output_file_name = "resnicks_" + similarity_method_name_short + str( k) + "_closest_results.csv" # Make predictions overall_rmse, total_time, coverage = rec.make_predictions( prediction_method, output_file_name, users, user_similarity, item_count, user_count) print("\tResults saved in:\t", output_file_name) rmse_all.append(overall_rmse) time_all.append(total_time) coverage_all.append(coverage) curve_label = "Resnick's Formula with " + similarity_method_name generate_graphs(k_vals, rmse_all, time_all, coverage_all, curve_label, "resnicks_" + similarity_method_name_short + "k_nearest_", "K Nearest Neighbors") print( "---------------------------------------------------------------------------" )