def load_movielens(): ml = MovieLens() print("Loading movie ratings...") data = ml.load() print("\nComputing movie popularity ranks so we can measure novelty later...") rankings = ml.get_popularity_ranks() return ml, data, rankings
def fit(self, trainset): """ Fit an algorithm to a KNN model. Parameters ---------- trainset: The data the used in training Returns ------- model: ContentKNNAlgorithm Returns the class instance """ AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie ml = MovieLens() genres = ml.get_genres() years = ml.get_years() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for rating1 in range(self.trainset.n_items): if rating1 % 100 == 0 and self.verbose: print(rating1, " of ", self.trainset.n_items) for rating2 in range(rating1 + 1, self.trainset.n_items): movie_id1 = int(self.trainset.to_raw_iid(rating1)) movie_id2 = int(self.trainset.to_raw_iid(rating2)) genre_similarity = self.genre_similarity( movie_id1, movie_id2, genres) year_similarity = self.year_similarity(movie_id1, movie_id2, years) self.similarities[rating1, rating2] = genre_similarity * year_similarity self.similarities[rating2, rating1] = self.similarities[rating1, rating2] print("...done.") return self
from common.movielens import MovieLens from surprise import KNNBasic from heapq import nlargest from collections import defaultdict from operator import itemgetter test_subject = '85' k = 10 # Top k suggestion if __name__ == '__main__': # Load our data set and compute the user similarity matrix ml = MovieLens() data = ml.load() trainset = data.build_full_trainset() sim_options = {'name': 'cosine', 'user_based': False} model = KNNBasic(sim_options=sim_options) model.fit(trainset) sims_matrix = model.compute_similarities() test_user_inner_id = trainset.to_inner_uid(test_subject) # Get the top K items we rated ratings = trainset.ur[test_user_inner_id] k_neighbors = nlargest(k, ratings, key=lambda t: t[1]) # Alternate approach would be to select items up to some similarity threshold # k_neighbors = [rating for rating in ratings if rating[1] > 4.0] # Get similar items to stuff we liked (weighted by rating)
from common.movielens import MovieLens from surprise import SVD, KNNBaseline from surprise.model_selection import train_test_split, LeaveOneOut from common.metrics import RecommenderMetrics as metrics if __name__ == '__main__': ml = MovieLens() print("Loading movie ratings...") data = ml.load() print( "\nComputing movie popularity ranks so we can measure novelty later..." ) rankings = ml.get_popularity_ranks() print("\nComputing item similarities so we can measure diversity later...") full_trainset = data.build_full_trainset() options = {'name': 'pearson_baseline', 'user_based': False} knn_model = KNNBaseline(sim_options=options) knn_model.fit(full_trainset) print("\nBuilding recommendation model...") train, test = train_test_split(data, test_size=.25, random_state=1) svd_model = SVD(random_state=10) svd_model.fit(train) print("\nComputing recommendations...") predictions = svd_model.test(test)