def main(): # Load user ratings raw_training_dataset_df = pd.read_csv( 'movie_ratings_data_set_training.csv') raw_testing_dataset_df = pd.read_csv('movie_ratings_data_set_testing.csv') # Convert the running list of user ratings into a matrix ratings_training_df = pd.pivot_table(raw_training_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max) ratings_testing_df = pd.pivot_table(raw_testing_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max) # Apply matrix factorization to find the latent features U, M = matrix_factorization_utilities.low_rank_matrix_factorization( ratings_training_df.values, num_features=11, regularization_amount=1.1) # Find all predicted ratings by multiplying U and M predicted_ratings = np.matmul(U, M) # Measure RMSE rmse_training = matrix_factorization_utilities.RMSE( ratings_training_df.values, predicted_ratings) rmse_testing = matrix_factorization_utilities.RMSE( ratings_testing_df.values, predicted_ratings) print("Training RMSE: {}".format(rmse_training)) print("Testing RMSE: {}".format(rmse_testing))
def stratLearn(self): """ Learn user use other users by matrix fuctorization mathod. first create pivot table matrix of users, products and purchases. create two matrix by factorization matrix : U-users features P-products features predicted purchases list get from multiplied U and P matrix """ self.setDataFrames() #normalize quntity self.user_product_purch_df_normalized = self.normalize_quantity() purchases = pd.pivot_table(self.user_product_purch_df_normalized, index='user_num', columns='product_num', aggfunc=np.max) U, P = matrix_factorization_utilities.low_rank_matrix_factorization( purchases.as_matrix(), num_features=15, regularization_amount=3.6) predicted_purchases = np.matmul(U, P) P = np.transpose(P) pickle.dump(U, open("user_features.dat", "wb")) pickle.dump(P, open("product_features.dat", "wb")) pickle.dump(predicted_purchases, open("predicted_purchases.dat", "wb")) rmse = matrix_factorization_utilities.RMSE(purchases.as_matrix(), predicted_purchases)
# Load user ratings raw_training_dataset_df = pd.read_csv( "data/movie_ratings_data_set_training.csv") raw_testing_dataset_df = pd.read_csv("data/movie_ratings_data_set_testing.csv") # Convert the running list of user ratings into a matrix ratings_training_df = pd.pivot_table(raw_training_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max) ratings_testing_df = pd.pivot_table(raw_testing_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max) # Apply matrix factorization to find latent features U, M = mfu.low_rank_matrix_factorization(ratings_training_df.as_matrix(), num_features=11, regularization_amount=1.1) # Find all predicted ratings by multiplying U and M predicted_ratings = np.matmul(U, M) # Measure RMSE rmse_training = mfu.RMSE(ratings_training_df.as_matrix(), predicted_ratings) rmse_testing = mfu.RMSE(ratings_testing_df.as_matrix(), predicted_ratings) print("Training RMSE: {}".format(rmse_training)) print("Testing RMSE: {}".format(rmse_testing))
import numpy as np import pandas as pd import matrix_factorization_utilities # Load user ratings raw_training_dataset_df = pd.read_csv('movie_ratings_data_set_training.csv') raw_testing_dataset_df = pd.read_csv('movie_ratings_data_set_testing.csv') # Convert the running list of user ratings into a matrix ratings_training_df = pd.pivot_table(raw_training_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max) ratings_testing_df = pd.pivot_table(raw_testing_dataset_df, index='user_id', columns='movie_id', aggfunc=np.max) # Apply matrix factorization to find the latent features U, M = matrix_factorization_utilities.low_rank_matrix_factorization(ratings_training_df.as_matrix(), num_features=11, regularization_amount=1.0) # Find all predicted ratings by multiplying U and M predicted_ratings = np.matmul(U, M) # Measure RMSE rmse_training = matrix_factorization_utilities.RMSE(ratings_training_df.as_matrix(), predicted_ratings) rmse_testing = matrix_factorization_utilities.RMSE(ratings_testing_df.as_matrix(), predicted_ratings) print("Training RMSE: {}".format(rmse_training)) print("Testing RMSE: {}".format(rmse_testing))
'movie_ratings_data_set_training.csv') users_movie_ratings_testing = pandas.read_csv( 'movie_ratings_data_set_testing.csv') users_movie_ratings_training_pivot_table = pandas.pivot_table( users_movie_ratings_training, index='user_id', columns='movie_id', aggfunc=numpy.max) users_movie_ratings_testing_pivot_table = pandas.pivot_table( users_movie_ratings_testing, index='user_id', columns='movie_id', aggfunc=numpy.max) U, M = matrix_factorization_utilities.low_rank_matrix_factorization( users_movie_ratings_training_pivot_table.as_matrix(), num_features=11, regularization_amount=1.1) predicted_ratings = numpy.matmul(U, M) # Measure RMSE rmse_training = matrix_factorization_utilities.RMSE( users_movie_ratings_training_pivot_table.as_matrix(), predicted_ratings) rmse_testing = matrix_factorization_utilities.RMSE( users_movie_ratings_testing_pivot_table.as_matrix(), predicted_ratings) print('Training RMSE: {}'.format(rmse_training)) print('Testing RMSE: {}'.format(rmse_testing))