import numpy as np
import pandas as pd
import pickle
import matrix_factorization_utilities

raw_dataset_df = pd.read_csv('movie_ratings_data_set.csv')


ratings_df = pd.pivot_table(raw_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max)

normalized_ratings, means = matrix_factorization_utilities.normalize_ratings(ratings_df.as_matrix())


U, M = matrix_factorization_utilities.low_rank_matrix_factorization(normalized_ratings, num_features=11, regularization_amount=1.1)


predicted_ratings = np.matmul(U, M)

predicted_ratings = predicted_ratings + means


pickle.dump(U, open("user_features.dat", "wb"))
pickle.dump(M, open("product_features.dat", "wb"))
pickle.dump(predicted_ratings, open("predicted_ratings.dat", "wb"))
pickle.dump(means, open("means.dat", "wb"))
Ejemplo n.º 2
0
import sys
sys.path.insert(0, '../chapter-5')

import pandas
import numpy
import pickle

import matrix_factorization_utilities


users_movie_ratings_list = pandas.read_csv('../chapter-4/movie_ratings_data_set.csv')


users_movie_ratings_pivot_table = pandas.pivot_table(users_movie_ratings_list, index='user_id', columns='movie_id',
                                                     aggfunc=numpy.max)

# normalise ratings around their mean
normalise_ratings, means = matrix_factorization_utilities.normalize_ratings(users_movie_ratings_pivot_table.as_matrix())

U,M = matrix_factorization_utilities.low_rank_matrix_factorization(normalise_ratings,
                                                                   num_features=11,
                                                                   regularization_amount=1.1)

predicted_ratings = numpy.matmul(U, M)

predicted_ratings = predicted_ratings + means

pickle.dump(U, open('user_features.dat', 'wb'))
pickle.dump(M, open('product_features.dat', 'wb'))
pickle.dump(predicted_ratings, open('predicted_ratings.dat', 'wb'))
pickle.dump(means, open('means.dat', 'wb'))
import numpy as np
import pandas as pd
import pickle
import matrix_factorization_utilities

# Load user ratings
raw_dataset_df = pd.read_csv('../data/csv/movie_ratings_data_set.csv')

# Convert the running list of user ratings into a matrix
ratings_df = pd.pivot_table(raw_dataset_df,
                            index='user_id',
                            columns='movie_id',
                            aggfunc=np.max)

# Normalize the ratings (center them around their mean)
normalized_ratings, means = matrix_factorization_utilities.normalize_ratings(
    ratings_df.to_numpy())

# Apply matrix factorization to find the latent features
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(
    normalized_ratings, num_features=11, regularization_amount=1.1)

# Find all predicted ratings by multiplying U and M
predicted_ratings = np.matmul(U, M)

# Add back in the mean ratings for each product to de-normalize the predicted results
predicted_ratings = predicted_ratings + means

# Save features and predicted ratings to files for later use
pickle.dump(U, open("../data/dat/user_features.dat", "wb"))
pickle.dump(M, open("../data/dat/product_features.dat", "wb"))
pickle.dump(predicted_ratings, open("../data/dat/predicted_ratings.dat", "wb"))