def train_and_save(data, save_to='db'): start = time.time() print("> Training the NMF model over", data.shape, "items") mf = MF(data, K=20, alpha=0.001, beta=0.01, iterations=800) mf.train() saved_model = mf.full_matrix() end = time.time() print("> Elapsed Time to Train = ", end - start) if save_to == 'pickle': np.save('NMF', saved_model) if save_to == 'db': savetodb(saved_model) return 0
def matrix_factorization(): prefix = 'Data/' # ------------------------------- Learning ------------------------------- # # Load training data training_user_movie_pairs = base.load_from_csv( os.path.join(prefix, 'data_train.csv')) training_labels = base.load_from_csv( os.path.join(prefix, 'output_train.csv')) # Concatenating data user_movie_rating_triplets = np.hstack( (training_user_movie_pairs, training_labels.reshape((-1, 1)))) # Build the learning matrix rating_matrix = base.build_rating_matrix(user_movie_rating_triplets) # Build the model model = MF(rating_matrix, K=30, alpha=1e-5, beta=0.02, iterations=2000) with base.measure_time('Training'): print('Training matrix factorization...') model.train() # Save the predicted matrix predicted_matrix = np.matrix(model.full_matrix()) with open('predicted_matrix.txt', 'wb') as f: for line in predicted_matrix: np.savetxt(f, line, fmt='%.5f') # -----------------------Submission: Running model on provided test_set---------------------------- # df = pd.read_csv("Data/data_test.csv") R = pd.read_csv('predicted_matrix.txt', sep=" ", header=None) R = R.values users = df['user_id'].values movies = df['movie_id'].values ratings = [] for u, m in zip(users, movies): if (R[u - 1][m - 1] > 5.00): ratings.append(5.00) else: ratings.append(R[u - 1][m - 1]) fname = base.make_submission(ratings, df.values.squeeze(), 'MatrixFactorization') print('Submission file "{}" successfully written'.format(fname))
import numpy as np from mf import MF # A rating matrix with ratings from 5 users on 4 items # zero entries are unknown values R = np.array([ [5, 3, 0, 1], [4, 0, 0, 1], [1, 1, 0, 5], [1, 0, 0, 4], [0, 1, 5, 4], ]) # Perform training and obtain the user and item matrices mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20) training_process = mf.train() print(mf.P) print(mf.Q) print(mf.full_matrix())
[1, 0, 0, 4], [0, 1, 5, 4], ]) """ R = np.array(matrix) # Perform training and obtain the user and item matrices mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20) training_process = mf.train() print("proceso de entrenamiento:", training_process) print(mf.P) print(mf.Q) print("=================") #print(mf[0]) print(mf.full_matrix()) fullMatrix = mf.full_matrix() mayor = -1; menor = -1; newMayor = 5; for vec in fullMatrix: for x in vec: if(mayor == -1 or mayor < x): mayor = x if(menor == -1 or menor > x): menor = x print(fullMatrix) print(mayor)
prop = int(R.size * 0.2) #Randomly choose indices of the numpy array: i = [np.random.choice(range(R.shape[0])) for _ in range(prop)] j = [np.random.choice(range(R.shape[1])) for _ in range(prop)] #Change values with 0 R[i, j] = 0 print("Original:\n", R1) print("Test Set:\n", R) R = np.rint(R) from sklearn.metrics import mean_squared_error mse = mean_squared_error(R, R1) print("RMSE=", mse**0.5) print("\nTraining ...\n") mf = MF(R, K=2, alpha=0.01, beta=0.01, iterations=100) training_process = mf.train() L = np.rint(mf.full_matrix()) print("\nDone\n") x = [x for x, y in training_process] y = [y for x, y in training_process] x = x[::10] y = y[::10] plt.figure(figsize=((16, 4))) plt.plot(x, np.sqrt(y)) plt.xticks(x, x) print("Minimizing Error on Training Set:\n") plt.xlabel("Iterations") plt.ylabel("Root Mean Square Error") plt.grid(axis="y") print("Learnt=\n", mf.full_matrix()) print("\nRating predictions=\n", L) print()
[1, 1, 0, 5], [1, 0, 0, 4], [0, 1, 5, 4], ]) #Set the number of values to replace. For example 20%: prop = int(R.size * 0.2) #Randomly choose indices of the numpy array: i = [np.random.choice(range(R.shape[0])) for _ in range(prop)] j = [np.random.choice(range(R.shape[1])) for _ in range(prop)] #Change values with 0 R[i, j] = 0 print("Original:\n", R1) print("Test Set:\n", R) R = np.rint(R) from sklearn.metrics import mean_squared_error mse = mean_squared_error(R, R1) print("MSE=", mse**0.5) print("\nTraining ...\n") mf = MF(R, K=10000, alpha=0.01, beta=0.01, iterations=10000) training_process = mf.train() L = np.rint(mf.full_matrix()) print("Learnt=\n", L) print("\nFinding Error on test set...\n") msef = 0.0 for i1 in range(len(i)): for i2 in range(len(j)): if R1.item(i[i1], j[i2]) != 0: msef = msef + (R1.item((i[i1], j[i2])) - (L).item( (i[i1], j[i2])))**2 msef = (msef / (len(j) * len(i))) print("RMSE f=", msef**0.5)
import pandas as pd import numpy as np from mf import MF df_train = pd.read_csv('all/train.csv') df_train = df_train[0:10000] R = np.array( df_train.pivot(index='User', columns='Track', values='Rating').fillna(0)) d_mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100) training_process = d_mf.train() print() print("P x Q:") print(d_mf.full_matrix()) print()