def movie_cluster_spec(self, rating_mat, movie_idx, movies, K): rating_sim = 1-sk.metrics.pairwise_distances(rating_mat.toarray(), metric='cosine') # clamp the negative similarities rating_sim[rating_sim<0]=0 spec_cluster = sk.cluster.spectral_clustering(rating_sim, n_clusters=K, eigen_solver='arpack', assign_labels='kmeans') cluster_result = pd.merge(utils.movie_cluster(spec_cluster, movie_idx, 'movieId_idx'), movies[['movieId', 'title']], on='movieId') return cluster_result
def movie_cluster_svd_spec(self, rating_mat, movie_idx, movies, K): svd = sk.decomposition.PCA(n_components=10, whiten=True) movie_feat = svd.fit_transform(rating_mat.toarray()) rating_sim = 1-sk.metrics.pairwise_distances(movie_feat, metric='cosine') # clamp the negative similarities rating_sim[rating_sim<0]=0 spec_cluster = sk.cluster.spectral_clustering(rating_sim, n_clusters=K, eigen_solver='arpack', assign_labels='kmeans') cluster_result = pd.merge(utils.movie_cluster(spec_cluster, movie_idx, 'movieId_idx'), movies[['movieId', 'title']], on='movieId') return cluster_result