from math260 import data_prep, recommend, score import matplotlib.pyplot as plt import numpy as np from tqdm import tqdm # choose your datasets GAMES_DATA_FILE = "data/games.csv" REVIEWS_FILE = "data/reviews.csv" games, users = data_prep.parse_data(GAMES_DATA_FILE, REVIEWS_FILE, True) games_map, users_map, rating_matrix, bool_matrix = \ data_prep.create_review_matrix(games, users, sparse=False, verbose=True) # creates a bar plot of the frequency of each rating score_counts = np.zeros(11) game_names = list(games.keys()) print('Iterating over all reviews') for name in tqdm(game_names): game = games[name] reviews = game['reviews'] for review in reviews: score_counts[int(review['rating'])] += 1 plt.bar(np.arange(11), score_counts) plt.xlabel('Rating') plt.ylabel('Number of Reviews') plt.title('Distribution of Ratings') plt.savefig('figures/rating-dist.png')
from math260 import data_prep, recommend, score import numpy as np GAMES_FILE = "data/games.csv" REVIEWS_FILE = "data/reviews.csv" if __name__ == "__main__": games, users = data_prep.parse_data(GAMES_FILE, REVIEWS_FILE, verbose=True) games_map, users_map, rating_matrix, bool_matrix \ = data_prep.create_review_matrix(games, users, sparse=False, verbose=True) game_predictor = recommend.AveragePredictor(np.copy(rating_matrix), np.copy(bool_matrix)) removed = score.remove_fraction(0.1, rating_matrix, bool_matrix) # testing removing 10% from each user and predicting using average score global_predictor = recommend.GlobalAveragePredictor( rating_matrix, bool_matrix) user_predictor = recommend.UserAveragePredictor() tw_predictor = recommend.TwoWayAveragePredictor(rating_matrix, bool_matrix) glob_avg_rmse = score.rmse(removed, rating_matrix, bool_matrix, global_predictor.predict, users=range(0, 1000)) game_avg_rmse = score.rmse(removed, rating_matrix, bool_matrix, game_predictor.predict, users=range(0, 1000))