Example #1
0
def train_and_save(data, save_to='db'):
    start = time.time()
    print("> Training the NMF model over", data.shape, "items")
    mf = MF(data, K=20, alpha=0.001, beta=0.01, iterations=800)
    mf.train()
    saved_model = mf.full_matrix()
    end = time.time()
    print("> Elapsed Time to Train = ", end - start)
    if save_to == 'pickle':
        np.save('NMF', saved_model)
    if save_to == 'db':
        savetodb(saved_model)
    return 0
Example #2
0
def matrix_factorization():
    prefix = 'Data/'

    # ------------------------------- Learning ------------------------------- #
    # Load training data
    training_user_movie_pairs = base.load_from_csv(
        os.path.join(prefix, 'data_train.csv'))
    training_labels = base.load_from_csv(
        os.path.join(prefix, 'output_train.csv'))

    # Concatenating data
    user_movie_rating_triplets = np.hstack(
        (training_user_movie_pairs, training_labels.reshape((-1, 1))))

    # Build the learning matrix
    rating_matrix = base.build_rating_matrix(user_movie_rating_triplets)

    # Build the model
    model = MF(rating_matrix, K=30, alpha=1e-5, beta=0.02, iterations=2000)
    with base.measure_time('Training'):
        print('Training matrix factorization...')
        model.train()

    # Save the predicted matrix
    predicted_matrix = np.matrix(model.full_matrix())
    with open('predicted_matrix.txt', 'wb') as f:
        for line in predicted_matrix:
            np.savetxt(f, line, fmt='%.5f')

    # -----------------------Submission: Running model on provided test_set---------------------------- #
    df = pd.read_csv("Data/data_test.csv")
    R = pd.read_csv('predicted_matrix.txt', sep=" ", header=None)
    R = R.values
    users = df['user_id'].values
    movies = df['movie_id'].values
    ratings = []
    for u, m in zip(users, movies):
        if (R[u - 1][m - 1] > 5.00):
            ratings.append(5.00)
        else:
            ratings.append(R[u - 1][m - 1])

    fname = base.make_submission(ratings, df.values.squeeze(),
                                 'MatrixFactorization')
    print('Submission file "{}" successfully written'.format(fname))
Example #3
0
import numpy as np
from mf import MF

# A rating matrix with ratings from 5 users on 4 items
# zero entries are unknown values
R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])

# Perform training and obtain the user and item matrices
mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20)
training_process = mf.train()
print(mf.P)
print(mf.Q)
print(mf.full_matrix())
Example #4
0
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])
"""


R = np.array(matrix)
# Perform training and obtain the user and item matrices 
mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20)
training_process = mf.train()
print("proceso de entrenamiento:", training_process)
print(mf.P)
print(mf.Q)
print("=================")
#print(mf[0])
print(mf.full_matrix())

fullMatrix = mf.full_matrix()
mayor = -1;
menor = -1;
newMayor = 5;

for vec in fullMatrix:
	for x in vec:
		if(mayor == -1 or mayor < x):
			mayor = x
		if(menor == -1 or menor > x):
			menor = x

print(fullMatrix)
print(mayor)
Example #5
0
prop = int(R.size * 0.2)
#Randomly choose indices of the numpy array:
i = [np.random.choice(range(R.shape[0])) for _ in range(prop)]
j = [np.random.choice(range(R.shape[1])) for _ in range(prop)]
#Change values with 0
R[i, j] = 0
print("Original:\n", R1)
print("Test Set:\n", R)
R = np.rint(R)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(R, R1)
print("RMSE=", mse**0.5)
print("\nTraining ...\n")
mf = MF(R, K=2, alpha=0.01, beta=0.01, iterations=100)
training_process = mf.train()
L = np.rint(mf.full_matrix())
print("\nDone\n")
x = [x for x, y in training_process]
y = [y for x, y in training_process]
x = x[::10]
y = y[::10]
plt.figure(figsize=((16, 4)))
plt.plot(x, np.sqrt(y))
plt.xticks(x, x)
print("Minimizing Error on Training Set:\n")
plt.xlabel("Iterations")
plt.ylabel("Root Mean Square Error")
plt.grid(axis="y")
print("Learnt=\n", mf.full_matrix())
print("\nRating predictions=\n", L)
print()
Example #6
0
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])
#Set the number of values to replace. For example 20%:
prop = int(R.size * 0.2)
#Randomly choose indices of the numpy array:
i = [np.random.choice(range(R.shape[0])) for _ in range(prop)]
j = [np.random.choice(range(R.shape[1])) for _ in range(prop)]
#Change values with 0
R[i, j] = 0
print("Original:\n", R1)
print("Test Set:\n", R)
R = np.rint(R)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(R, R1)
print("MSE=", mse**0.5)
print("\nTraining ...\n")
mf = MF(R, K=10000, alpha=0.01, beta=0.01, iterations=10000)
training_process = mf.train()
L = np.rint(mf.full_matrix())
print("Learnt=\n", L)
print("\nFinding Error on test set...\n")
msef = 0.0
for i1 in range(len(i)):
    for i2 in range(len(j)):
        if R1.item(i[i1], j[i2]) != 0:
            msef = msef + (R1.item((i[i1], j[i2])) - (L).item(
                (i[i1], j[i2])))**2
msef = (msef / (len(j) * len(i)))
print("RMSE f=", msef**0.5)
Example #7
0
import pandas as pd
import numpy as np
from mf import MF

df_train = pd.read_csv('all/train.csv')
df_train = df_train[0:10000]
R = np.array(
    df_train.pivot(index='User', columns='Track', values='Rating').fillna(0))
d_mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100)
training_process = d_mf.train()
print()
print("P x Q:")
print(d_mf.full_matrix())
print()