def main(): reviews = load() err = [] for i in range(reviews.shape[0]): err.append( train_test(reviews[i], np.delete(reviews, i, 0)) ) revs = (reviews > 0).sum(1) err = np.array(err) rmse = np.sqrt(err / revs[:, None]) rmse_model, rmse_null = np.mean(rmse, 0) print("Average of RMSE / Null-model RMSE") print("{:.2}\t{:.2} (improvement: {:.1%}".format(rmse_model, rmse_null, (rmse_null-rmse_model)/rmse_null)) print() rmse_model, rmse_null = np.mean(rmse[revs > 60], 0) print("Average of RMSE / Null-model RMSE (users with more than 60 reviewed movies)") print("{:.2}\t{:.2} (improvement: {:.1%}".format(rmse_model, rmse_null, (rmse_null-rmse_model)/rmse_null))
# This code is supporting material for the book # Building Machine Learning Systems with Python # by Willi Richert and Luis Pedro Coelho # published by PACKT Publishing # # It is made available under the MIT License from load_ml100k import load from matplotlib import pyplot as plt data = load() data = data.toarray() plt.gray() plt.imshow(data[:200, :200], interpolation='nearest') plt.xlabel('User ID') plt.ylabel('Film ID') plt.savefig('../1400_08_03+.png')
from __future__ import print_function from all_correlations import all_correlations import numpy as np from scipy import sparse from load_ml100k import load reviews = load() def estimate_user(user, rest): bu = user > 0 br = rest > 0 ws = all_correlations(bu,br) selected = ws.argsort()[-100:] estimates = rest[selected].mean(0) estimates /= (.1+br[selected].mean(0)) return estimates def train_test(user, rest): estimates = estimate_user(user, rest) bu = user > 0 br = rest > 0 err = estimates[bu]-user[bu] null = rest.mean(0) null /= (.1+br.mean(0)) nerr = null[bu]-user[bu] return np.dot(err,err), np.dot(nerr, nerr) def cross_validate_all(): err = [] for i in xrange(reviews.shape[0]): err.append( train_test(reviews[i], np.delete(reviews, i, 0))
if reviews[uid, ell] > 0: pred = reviews[uid, ell] if c == k: return pred c += 1 return pred def all_estimates(reviews, k=1): reviews = reviews.astype(float) k -= 1 nusers, nmovies = reviews.shape estimates = np.zeros_like(reviews) for u in range(nusers): ureviews = np.delete(reviews, u, 0) ureviews -= ureviews.mean(0) ureviews /= (ureviews.std(0) + 1e-4) ureviews = ureviews.T.copy() for m in np.where(reviews[u] > 0)[0]: estimates[u, m] = nn_movie(ureviews, reviews, u, m, k) return estimates if __name__ == '__main__': reviews = load().toarray() estimates = all_estimates(reviews) error = (estimates - reviews) error **= 2 error = error[reviews > 0] print(np.sqrt(error).mean())
def __init__(self): if AbstractEstimateBase.reviews is None: AbstractEstimateBase.reviews = load()
if ell == mid: continue if reviews[uid,ell] > 0: pred = reviews[uid,ell] if c == k: return pred c += 1 return pred def all_estimates(reviews, k=1): reviews = reviews.astype(float) k -= 1 nusers, nmovies = reviews.shape estimates = np.zeros_like(reviews) for u in range(nusers): ureviews = np.delete(reviews, u, 0) ureviews -= ureviews.mean(0) ureviews /= (ureviews.std(0)+1e-4) ureviews = ureviews.T.copy() for m in np.where(reviews[u] > 0)[0]: estimates[u,m] = nn_movie(ureviews, reviews, u, m, k) return estimates if __name__ == '__main__': reviews = load().toarray() estimates = all_estimates(reviews) error = (estimates-reviews) error **= 2 error = error[reviews > 0] print(np.sqrt(error).mean())
def __init__(self): if AbstractEstimateBase.reviews is None: AbstractEstimateBase.reviews = load()