def recommand(self):
        sim_options = {
            'name': 'pearson_baseline',
            'shrinkage': 0  # no shrinkage
        }
        best_model = knns.KNNWithMeans(k=20, sim_options=sim_options)
        t_values, precisions_knn, recall_knn = self.test_with_t_and_k(
            best_model, msg='KNN')

        best_model = matrix_factorization.NMF(n_factors=20, biased=False)
        t_values, precisions_nmf, recall_nmf = self.test_with_t_and_k(
            best_model, msg='NMF')

        best_model = SVD(20)
        t_values, precisions_svd, recall_svd = self.test_with_t_and_k(
            best_model, msg='SVD')

        plt.plot(t_values, precisions_knn, label='precisions_knn')
        plt.plot(t_values, precisions_nmf, label='precisions_nmf')
        plt.plot(t_values, precisions_svd, label='precisions_svd')
        plt.plot(t_values, recall_knn, label='recall_knn')
        plt.plot(t_values, recall_nmf, label='recall_nmf')
        plt.plot(t_values, recall_svd, label='recall_svd')
        plt.xlabel('t_value')
        plt.ylabel('percent')
        plt.legend(loc="best")
        plt.show()
    def run_and_test_all_models(self):
        step_size = 2

        # KNN
        sim_options = {
            'name': 'pearson_baseline',
            'shrinkage': 0  # no shrinkage
        }
        algo = knns.KNNWithMeans
        args = {'sim_options': sim_options}
        best_model = knns.KNNWithMeans(k=20, sim_options=sim_options)
        roc_auc_KNN = self.run_and_test_model(algo, args, best_model,
                                              (2, 101, step_size), 'KNN')

        # # NMF
        algo = matrix_factorization.NMF
        args = {'biased': False}
        best_model = matrix_factorization.NMF(n_factors=20, biased=False)
        roc_auc_NMF = self.run_and_test_model(algo, args, best_model,
                                              (2, 51, step_size), 'NMF')

        # SVD
        algo = matrix_factorization.SVD
        args = {}
        best_model = SVD(20)
        roc_auc_SVD = self.run_and_test_model(algo, args, best_model,
                                              (2, 51, step_size), 'SVD')

        # all
        for i in range(len(roc_auc_KNN)):
            plt.plot(roc_auc_KNN[i][0],
                     roc_auc_KNN[i][1],
                     color='blue',
                     linewidth=2.0,
                     label='KNN')
            plt.plot(roc_auc_NMF[i][0],
                     roc_auc_NMF[i][1],
                     color='blue',
                     linewidth=2.0,
                     label='NMF')
            plt.plot(roc_auc_SVD[i][0],
                     roc_auc_SVD[i][1],
                     color='blue',
                     linewidth=2.0,
                     label='SVD')
            plt.plot([0, 1], [0, 1], color='yellow', linewidth=2.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.legend(loc="lower right")
            plt.show()

        # NaiveFilter
        self.run_naive_filter(msg='normal')
        self.run_naive_filter(test_filter=trimPopular, msg='trimPopular')
        self.run_naive_filter(test_filter=trimUnpopular, msg='trimUnpopular')
        self.run_naive_filter(test_filter=trimHighVariance,
                              msg='trimHighVariance')
    def non_negative_matrix_factorization(self):
        algo = matrix_factorization.NMF(20)
        model = algo.fit(self.data.build_full_trainset())
        U = model.pu
        V = model.qi
        print U.shape
        print V.shape

        top_ten = V[:, 1].argsort()[::-1][:10]

        movies = {}
        genre = []
        f = open('recommand/ml-latest-small/movies.csv')
        movies_reader = csv.reader(f)
        for movie_entry in movies_reader:
            movies[movie_entry[0]] = movie_entry[2]
        f.close()
        for i in top_ten:
            for j in movies:
                if j != "movieId" and int(i) == int(j):
                    genre.append([i, movies[j]])
        print(genre)
Esempio n. 4
0
 def _init_surprise_model(self):
     return matrix_factorization.NMF(n_factors=self._n_factors, random_state=self._random_state,
                                     n_epochs=self._n_epochs, reg_pu=self._reg_pu, reg_qi=self._reg_qi)