Ejemplo n.º 1
0
def main():
    reviews = load()

    err = []
    for i in range(reviews.shape[0]):
        err.append(
            train_test(reviews[i], np.delete(reviews, i, 0))
        )
    revs = (reviews > 0).sum(1)
    err = np.array(err)
    rmse = np.sqrt(err / revs[:, None])

    rmse_model, rmse_null = np.mean(rmse, 0)

    print("Average of RMSE / Null-model RMSE")
    print("{:.2}\t{:.2} (improvement: {:.1%}".format(rmse_model, rmse_null, (rmse_null-rmse_model)/rmse_null))
    print()

    rmse_model, rmse_null = np.mean(rmse[revs > 60], 0)
    print("Average of RMSE / Null-model RMSE (users with more than 60 reviewed movies)")
    print("{:.2}\t{:.2} (improvement: {:.1%}".format(rmse_model, rmse_null, (rmse_null-rmse_model)/rmse_null))
Ejemplo n.º 2
0
# This code is supporting material for the book
# Building Machine Learning Systems with Python
# by Willi Richert and Luis Pedro Coelho
# published by PACKT Publishing
#
# It is made available under the MIT License

from load_ml100k import load
from matplotlib import pyplot as plt
data = load()
data = data.toarray()
plt.gray()
plt.imshow(data[:200, :200], interpolation='nearest')
plt.xlabel('User ID')
plt.ylabel('Film ID')
plt.savefig('../1400_08_03+.png')
Ejemplo n.º 3
0
from __future__ import print_function
from all_correlations import all_correlations
import numpy as np
from scipy import sparse
from load_ml100k import load
reviews = load()

def estimate_user(user, rest):
    bu = user > 0
    br = rest > 0
    ws = all_correlations(bu,br)
    selected = ws.argsort()[-100:]
    estimates = rest[selected].mean(0)
    estimates /= (.1+br[selected].mean(0))
    return estimates

def train_test(user, rest):
    estimates = estimate_user(user, rest)
    bu = user > 0
    br = rest > 0
    err = estimates[bu]-user[bu]
    null = rest.mean(0)
    null /= (.1+br.mean(0))
    nerr = null[bu]-user[bu]
    return np.dot(err,err), np.dot(nerr, nerr)

def cross_validate_all():
    err = []
    for i in xrange(reviews.shape[0]):
        err.append(
            train_test(reviews[i], np.delete(reviews, i, 0))
Ejemplo n.º 4
0
        if reviews[uid, ell] > 0:
            pred = reviews[uid, ell]
            if c == k:
                return pred
            c += 1
    return pred


def all_estimates(reviews, k=1):
    reviews = reviews.astype(float)
    k -= 1
    nusers, nmovies = reviews.shape
    estimates = np.zeros_like(reviews)
    for u in range(nusers):
        ureviews = np.delete(reviews, u, 0)
        ureviews -= ureviews.mean(0)
        ureviews /= (ureviews.std(0) + 1e-4)
        ureviews = ureviews.T.copy()
        for m in np.where(reviews[u] > 0)[0]:
            estimates[u, m] = nn_movie(ureviews, reviews, u, m, k)
    return estimates


if __name__ == '__main__':
    reviews = load().toarray()
    estimates = all_estimates(reviews)
    error = (estimates - reviews)
    error **= 2
    error = error[reviews > 0]
    print(np.sqrt(error).mean())
Ejemplo n.º 5
0
 def __init__(self):
     if AbstractEstimateBase.reviews is None:
         AbstractEstimateBase.reviews = load()
Ejemplo n.º 6
0
        if ell == mid:
            continue
        if reviews[uid,ell] > 0:
            pred = reviews[uid,ell]
            if c == k:
                return pred
            c += 1
    return pred

def all_estimates(reviews, k=1):
    reviews = reviews.astype(float)
    k -= 1
    nusers, nmovies = reviews.shape
    estimates = np.zeros_like(reviews)
    for u in range(nusers):
        ureviews = np.delete(reviews, u, 0)
        ureviews -= ureviews.mean(0)
        ureviews /= (ureviews.std(0)+1e-4)
        ureviews = ureviews.T.copy()
        for m in np.where(reviews[u] > 0)[0]:
            estimates[u,m] = nn_movie(ureviews, reviews, u, m, k)
    return estimates

if __name__ == '__main__':
    reviews = load().toarray()
    estimates = all_estimates(reviews)
    error = (estimates-reviews)
    error **= 2
    error = error[reviews > 0]
    print(np.sqrt(error).mean())
Ejemplo n.º 7
0
 def __init__(self):
     if AbstractEstimateBase.reviews is None:
         AbstractEstimateBase.reviews = load()