fill_value=0)
print(rating_crosstab.head(10))
print(rating_crosstab.shape)

# Transpose the Utility matrix
X = rating_crosstab.values.T
X.shape
print(X.shape)

import sklearn
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import accuracy_score
import numpy as np

SVD = TruncatedSVD(n_components=12, random_state=17)
result_matrix = SVD.fit_transform(X)
result_matrix.shape
print(result_matrix.shape)

# PearsonR coef
corr_matrix = np.corrcoef(result_matrix)
corr_matrix.shape
print(corr_matrix.shape)

restaurant_names = rating_crosstab.columns
restaurants_list = list(restaurant_names)

popular_rest = restaurants_list.index('Banzai Sushi')
print("index of the popular restaurant: ", popular_rest)

# restaurant of interest
Beispiel #2
0
# Using Matrix Factorization (Not perfect... think so used when data amount is large)
pivot2 = data.pivot(index='user_id', columns='title',
                    values='rating').fillna(0)
pivot2.head()

pivot2.shape

X = pivot2.values.T
X.shape

import sklearn
from sklearn.decomposition import TruncatedSVD

SVD = TruncatedSVD(n_components=3, random_state=17)
svd_matrix = SVD.fit_transform(X)
svd_matrix.shape

# We use Pearson's R correlation coefficient for every book pair in our final matrix
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
corr = np.corrcoef(matrix)
corr.shape

title = pivot2.columns
book_list = list(title)
coffey_hands = book_list.index("The Alchemist")
print(coffey_hands)

corr_coffey_hands = corr[coffey_hands]
list(title[(corr_coffey_hands < 1.0) & (corr_coffey_hands > 0.9)])