fill_value=0) print(rating_crosstab.head(10)) print(rating_crosstab.shape) # Transpose the Utility matrix X = rating_crosstab.values.T X.shape print(X.shape) import sklearn from sklearn.decomposition import TruncatedSVD from sklearn.metrics import accuracy_score import numpy as np SVD = TruncatedSVD(n_components=12, random_state=17) result_matrix = SVD.fit_transform(X) result_matrix.shape print(result_matrix.shape) # PearsonR coef corr_matrix = np.corrcoef(result_matrix) corr_matrix.shape print(corr_matrix.shape) restaurant_names = rating_crosstab.columns restaurants_list = list(restaurant_names) popular_rest = restaurants_list.index('Banzai Sushi') print("index of the popular restaurant: ", popular_rest) # restaurant of interest
# Using Matrix Factorization (Not perfect... think so used when data amount is large) pivot2 = data.pivot(index='user_id', columns='title', values='rating').fillna(0) pivot2.head() pivot2.shape X = pivot2.values.T X.shape import sklearn from sklearn.decomposition import TruncatedSVD SVD = TruncatedSVD(n_components=3, random_state=17) svd_matrix = SVD.fit_transform(X) svd_matrix.shape # We use Pearson's R correlation coefficient for every book pair in our final matrix import warnings warnings.filterwarnings("ignore", category=RuntimeWarning) corr = np.corrcoef(matrix) corr.shape title = pivot2.columns book_list = list(title) coffey_hands = book_list.index("The Alchemist") print(coffey_hands) corr_coffey_hands = corr[coffey_hands] list(title[(corr_coffey_hands < 1.0) & (corr_coffey_hands > 0.9)])