def load_movielens():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.load()
    print("\nComputing movie popularity ranks so we can measure novelty later...")
    rankings = ml.get_popularity_ranks()
    return ml, data, rankings
    def fit(self, trainset):
        """ Fit an algorithm to a KNN model.

        Parameters
        ----------
        trainset:
            The data the used in training

        Returns
        -------
        model: ContentKNNAlgorithm
            Returns the class instance
        """

        AlgoBase.fit(self, trainset)

        # Compute item similarity matrix based on content attributes
        # Load up genre vectors for every movie
        ml = MovieLens()
        genres = ml.get_genres()
        years = ml.get_years()

        print("Computing content-based similarity matrix...")

        # Compute genre distance for every movie combination as a 2x2 matrix
        self.similarities = np.zeros(
            (self.trainset.n_items, self.trainset.n_items))

        for rating1 in range(self.trainset.n_items):
            if rating1 % 100 == 0 and self.verbose:
                print(rating1, " of ", self.trainset.n_items)

            for rating2 in range(rating1 + 1, self.trainset.n_items):
                movie_id1 = int(self.trainset.to_raw_iid(rating1))
                movie_id2 = int(self.trainset.to_raw_iid(rating2))
                genre_similarity = self.genre_similarity(
                    movie_id1, movie_id2, genres)
                year_similarity = self.year_similarity(movie_id1, movie_id2,
                                                       years)
                self.similarities[rating1,
                                  rating2] = genre_similarity * year_similarity
                self.similarities[rating2,
                                  rating1] = self.similarities[rating1,
                                                               rating2]

        print("...done.")

        return self
Exemple #3
0
from common.movielens import MovieLens
from surprise import KNNBasic
from heapq import nlargest
from collections import defaultdict
from operator import itemgetter

test_subject = '85'
k = 10  # Top k suggestion

if __name__ == '__main__':
    # Load our data set and compute the user similarity matrix
    ml = MovieLens()
    data = ml.load()

    trainset = data.build_full_trainset()
    sim_options = {'name': 'cosine', 'user_based': False}

    model = KNNBasic(sim_options=sim_options)
    model.fit(trainset)
    sims_matrix = model.compute_similarities()

    test_user_inner_id = trainset.to_inner_uid(test_subject)

    # Get the top K items we rated
    ratings = trainset.ur[test_user_inner_id]
    k_neighbors = nlargest(k, ratings, key=lambda t: t[1])

    # Alternate approach would be to select items up to some similarity threshold
    # k_neighbors = [rating for rating in ratings if rating[1] > 4.0]

    # Get similar items to stuff we liked (weighted by rating)
Exemple #4
0
from common.movielens import MovieLens
from surprise import SVD, KNNBaseline
from surprise.model_selection import train_test_split, LeaveOneOut
from common.metrics import RecommenderMetrics as metrics

if __name__ == '__main__':
    ml = MovieLens()

    print("Loading movie ratings...")
    data = ml.load()

    print(
        "\nComputing movie popularity ranks so we can measure novelty later..."
    )
    rankings = ml.get_popularity_ranks()

    print("\nComputing item similarities so we can measure diversity later...")
    full_trainset = data.build_full_trainset()
    options = {'name': 'pearson_baseline', 'user_based': False}
    knn_model = KNNBaseline(sim_options=options)
    knn_model.fit(full_trainset)

    print("\nBuilding recommendation model...")
    train, test = train_test_split(data, test_size=.25, random_state=1)

    svd_model = SVD(random_state=10)
    svd_model.fit(train)

    print("\nComputing recommendations...")
    predictions = svd_model.test(test)