def similarity_threshold(similarity_method, similarity_method_name,
                         similarity_method_name_short):
    if similarity_method == rec.cosine_similarity:
        prediction_method = rec.resnicks_cosine_prediction
    else:
        prediction_method = rec.resnicks_pearson_prediction
    print(
        "---------------------------------------------------------------------------"
    )
    print(
        "* Please be patient. Generating predictions can take a few minutes.\n"
    )
    print("Generating predictions...")
    print("Neighborhoods created by:\tMinimum similarity threshold")
    print("Similarity metric used: \t", similarity_method_name, "\n")
    min_corated_items = 5
    user_similarity, user_sim_mean, user_sim_stdev = rec.populate_user_similarity(
        similarity_method, min_corated_items, users)
    similarity_min_cutoffs = [0.95, 0.90, 0.80, 0.70, 0.60, 0.50]
    if similarity_method_name == "Pearson's similarity":
        similarity_min_cutoffs = [
            cos_range_to_pearson_range(s) for s in similarity_min_cutoffs
        ]
    rmse_all = []
    time_all = []
    coverage_all = []

    print(
        "\nGenerating predictions across different minimum similarity thresholds for purposes of comparison:"
    )
    for min_similarity_req in similarity_min_cutoffs:
        print("- Minimum similarity threshold: ", min_similarity_req)
        # Create neighborhood
        rec.assign_neighbors_threshold(users, user_similarity,
                                       min_similarity_req)
        # Make predictions
        output_file_name = "resnicks_" + similarity_method_name_short + str(
            min_similarity_req) + "_min_results.csv"
        overall_rmse, total_time, coverage = rec.make_predictions(
            prediction_method, output_file_name, users, user_similarity,
            item_count, user_count)
        print("\tResults saved in:\t", output_file_name)
        rmse_all.append(overall_rmse)
        time_all.append(total_time)
        coverage_all.append(coverage)
    curve_label = "Resnick's Formula with " + similarity_method_name
    generate_graphs(
        similarity_min_cutoffs, rmse_all, time_all, coverage_all, curve_label,
        "resnicks_" + similarity_method_name_short + "min_similarity_",
        "Minimum Similarity Between Neighbors")
    print(
        "---------------------------------------------------------------------------"
    )
    return
def top_percent(similarity_method, similarity_method_name,
                similarity_method_name_short):
    if similarity_method == rec.cosine_similarity:
        prediction_method = rec.resnicks_cosine_prediction
    else:
        prediction_method = rec.resnicks_pearson_prediction
    print(
        "---------------------------------------------------------------------------"
    )
    print(
        "* Please be patient. Generating predictions can take a few minutes.\n"
    )
    print("Generating predictions...")
    print("Neighborhoods created by:\tTop n percent of closest users")
    print("Similarity metric used: \t", similarity_method_name, "\n")
    min_corated_items = 5
    percent = [0.025, .05, .10, .15, .20, .25]
    rmse_all = []
    time_all = []
    coverage_all = []

    user_similarity, user_sim_mean, user_sim_stdev = rec.populate_user_similarity(
        similarity_method, min_corated_items, users)

    print(
        "\nGenerating predictions across different top percents for purposes of comparison:"
    )
    for p in percent:
        print("Top", p, "% of closest users as neighbors")
        # Assign neighborhoods
        rec.assign_neighbors_percent(users, user_similarity, p,
                                     similarity_method)
        output_file_name = "resnicks_" + similarity_method_name_short + str(
            p) + "_percent_results.csv"
        overall_rmse, total_time, coverage = rec.make_predictions(
            prediction_method, output_file_name, users, user_similarity,
            item_count, user_count)
        print("\tResults saved in:\t", output_file_name)
        rmse_all.append(overall_rmse)
        time_all.append(total_time)
        coverage_all.append(coverage)
    curve_label = "Resnick's Formula with " + similarity_method_name
    generate_graphs(
        percent, rmse_all, time_all, coverage_all, curve_label,
        "resnicks_" + similarity_method_name_short + "top_percent_",
        "Top N% of Neighbors")
    print(
        "---------------------------------------------------------------------------"
    )
def k_nearest(similarity_method, similarity_method_name,
              similarity_method_name_short):
    if similarity_method == rec.cosine_similarity:
        prediction_method = rec.resnicks_cosine_prediction
    else:
        prediction_method = rec.resnicks_pearson_prediction
    print(
        "---------------------------------------------------------------------------"
    )
    print(
        "* Please be patient. Generating predictions can take a few minutes.\n"
    )
    print("Generating predictions...")
    print("Neighborhoods created by:\tK-nearest users")
    print("Similarity metric used: \t", similarity_method_name, "\n")
    min_corated_items = 5
    k_vals = [5, 10, 20, 30, 40, 50, 60]
    rmse_all = []
    time_all = []
    coverage_all = []

    user_similarity, user_sim_mean, user_sim_stdev = rec.populate_user_similarity(
        similarity_method, min_corated_items, users)

    print(
        "\nGenerating predictions across different top percents for purposes of comparison:"
    )
    for k in k_vals:
        print(k, "closest users as neighbors")
        # Assign neighborhoods
        rec.assign_neighbors_k_nearest(users, user_similarity, k,
                                       similarity_method)
        output_file_name = "resnicks_" + similarity_method_name_short + str(
            k) + "_closest_results.csv"
        # Make predictions
        overall_rmse, total_time, coverage = rec.make_predictions(
            prediction_method, output_file_name, users, user_similarity,
            item_count, user_count)
        print("\tResults saved in:\t", output_file_name)
        rmse_all.append(overall_rmse)
        time_all.append(total_time)
        coverage_all.append(coverage)
    curve_label = "Resnick's Formula with " + similarity_method_name
    generate_graphs(k_vals, rmse_all, time_all, coverage_all, curve_label,
                    "resnicks_" + similarity_method_name_short + "k_nearest_",
                    "K Nearest Neighbors")
    print(
        "---------------------------------------------------------------------------"
    )