Beispiel #1
0
import numpy as np
from math260 import data_prep, recommend, score

GAMES_FILE = "data/games.csv"
REVIEWS_FILE = "data/reviews.csv"

if __name__ == "__main__":
    games, users = data_prep.parse_data(GAMES_FILE, REVIEWS_FILE, verbose=True)
    games_map, users_map, rating_matrix, bool_matrix  \
        = data_prep.create_review_matrix(games, users, sparse=False, verbose=True)

    # testing removing 10% from each user and predicting using average score


    def rf():
        return np.random.randint(0, 10)

    predictor = recommend.RandomPredictor(rf)

    rmse, errors = score.rmsecv(0.1, rating_matrix, bool_matrix,
                                predictor.predict)

    print('RMSE:\t\t{}'.format(rmse))
Beispiel #2
0
def cosine_similarity(u1, u2, rating_matrix, bool_matrix):
    return 1 - dist.cosine(rating_matrix[u1], rating_matrix[u2])

if __name__ == "__main__":
    games, users = data_prep.parse_data(GAMES_FILE, REVIEWS_FILE, verbose=True)
    games_map, users_map, rating_matrix, bool_matrix  \
        = data_prep.create_review_matrix(games, users, sparse=False, verbose=True)

    results = []

    for i in range(25):
        k = 20 * (i+1)
        print(f'using {k} neighbors')
        cosine_predictor = recommend.SimilarityPredictor(cosine_similarity, k)
        msd_predictor = recommend.SimilarityPredictor(msd_similarity, k)
        cosine_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                            cosine_predictor.predict, users=range(0, 1000))
        sim_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                            msd_predictor.predict, users=range(0, 1000))
        result = {'k':k, 'cosine': cosine_rmse, 'sim': sim_rmse}
        results.append(result)
        print(result)

    json.dump(results, open(output, mode='w'))



    
    


Beispiel #3
0
    combo_predictor = recommend.SimilarityPredictor(combo_sim, 120)
    adjusted_cosine_predictor = recommend.SimilarityPredictor(adjusted_cosine_similarity, 120)
    cosine_predictor = recommend.SimilarityPredictor(cosine_similarity, 120)
    msd_predictor = recommend.SimilarityPredictor(msd_similarity, 120)
    jaccard_predictor = recommend.SimilarityPredictor(jaccard_similarity, 120)
    avg_predictor = recommend.AveragePredictor(rating_matrix, bool_matrix)
    uavg_predictor = recommend.AveragePredictor(rating_matrix, bool_matrix)
    gavg_predictor = recommend.GlobalAveragePredictor(rating_matrix, bool_matrix)
    tavg_predictor = recommend.TwoWayAveragePredictor(rating_matrix, bool_matrix)

    ## adjusted_cosine_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
    ##                                        adjusted_cosine_predictor.predict, users=range(0, 1000))
    ## cosine_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
    ##                               cosine_predictor.predict, users=range(0, 1000))
    combo_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                                 combo_predictor.predict, users=range(0, 1000))
    jaccard_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                                   jaccard_predictor.predict, users=range(0, 1000))
    sim_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                               msd_predictor.predict, users=range(0, 1000))
    avg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                               avg_predictor.predict, users=range(0, 1000))
    uavg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                                uavg_predictor.predict, users=range(0, 1000))
    gavg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                                gavg_predictor.predict, users=range(0, 1000))
    tavg_rmse, _ = score.rmsecv(0.1, rating_matrix, bool_matrix,
                                tavg_predictor.predict, users=range(0, 1000))

    # print(f"Adjusted cosine predictor RMSE: {adjusted_cosine_rmse}")
    # print(f"Cosine predictor RMSE: {cosine_rmse}")