Beispiel #1
0
def train_and_save(data, save_to='db'):
    start = time.time()
    print("> Training the NMF model over", data.shape, "items")
    mf = MF(data, K=20, alpha=0.001, beta=0.01, iterations=800)
    mf.train()
    saved_model = mf.full_matrix()
    end = time.time()
    print("> Elapsed Time to Train = ", end - start)
    if save_to == 'pickle':
        np.save('NMF', saved_model)
    if save_to == 'db':
        savetodb(saved_model)
    return 0
Beispiel #2
0
def worker(fold, n_users, n_items, dataset_dir):
    traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt'
    trasR = loadSparseR(n_users, n_items, traFilePath)

    print(
        dataset_dir.split('/')[-2] + ':', trasR.shape, trasR.nnz,
        '%.2f' % (trasR.nnz / float(trasR.shape[0])))

    tra_tuple = np.array([(user, item, trasR[user, item])
                          for user, item in np.asarray(trasR.nonzero()).T
                          ])  # triad

    tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt'
    tstsR = loadSparseR(n_users, n_items, tstFilePath)
    tst_tuple = np.array([(user, item, tstsR[user, item])
                          for user, item in np.asarray(tstsR.nonzero()).T
                          ])  # triad

    sampler = Sampler(trasR=trasR, negRatio=.0, batch_size=batch_size)
    mf = MF(n_users, n_items, eval_metrics, range_of_ratings, reg, n_factors,
            batch_size)
    scores = mf.train(fold + 1, tra_tuple, tst_tuple, sampler)

    print('fold=%d:' % fold,
          ','.join(['%s' % eval_metric for eval_metric in eval_metrics]), '=',
          ','.join(['%.6f' % (score) for score in scores]))

    return scores
Beispiel #3
0
def matrix_factorization():
    prefix = 'Data/'

    # ------------------------------- Learning ------------------------------- #
    # Load training data
    training_user_movie_pairs = base.load_from_csv(
        os.path.join(prefix, 'data_train.csv'))
    training_labels = base.load_from_csv(
        os.path.join(prefix, 'output_train.csv'))

    # Concatenating data
    user_movie_rating_triplets = np.hstack(
        (training_user_movie_pairs, training_labels.reshape((-1, 1))))

    # Build the learning matrix
    rating_matrix = base.build_rating_matrix(user_movie_rating_triplets)

    # Build the model
    model = MF(rating_matrix, K=30, alpha=1e-5, beta=0.02, iterations=2000)
    with base.measure_time('Training'):
        print('Training matrix factorization...')
        model.train()

    # Save the predicted matrix
    predicted_matrix = np.matrix(model.full_matrix())
    with open('predicted_matrix.txt', 'wb') as f:
        for line in predicted_matrix:
            np.savetxt(f, line, fmt='%.5f')

    # -----------------------Submission: Running model on provided test_set---------------------------- #
    df = pd.read_csv("Data/data_test.csv")
    R = pd.read_csv('predicted_matrix.txt', sep=" ", header=None)
    R = R.values
    users = df['user_id'].values
    movies = df['movie_id'].values
    ratings = []
    for u, m in zip(users, movies):
        if (R[u - 1][m - 1] > 5.00):
            ratings.append(5.00)
        else:
            ratings.append(R[u - 1][m - 1])

    fname = base.make_submission(ratings, df.values.squeeze(),
                                 'MatrixFactorization')
    print('Submission file "{}" successfully written'.format(fname))
def doTrain(K, alpha, beta, gamma, iterations, maxError):
    print('>>> K=' + str(K) + ', alpha=' + str(alpha) + ', beta=' + str(beta) +
          ', gamma=' + str(gamma) + ', iterations=' + str(iterations) +
          ', maxError=' + str(maxError))
    inCsv = 'ml-latest-small/ratings.csv'
    inTestCsv = 'ml-latest-small/trainRatings.csv'
    outModel = 'trainedModel.pkl'

    ratings = readCsv(inCsv)
    trainSubset = readCsv(inTestCsv)
    maxUserId, maxMovieId = getMaxIds(ratings)
    R = getRatingsMatrix(trainSubset, maxUserId, maxMovieId)
    mf = MF(R, K, alpha, beta, gamma, iterations, maxError)

    print('Training...')
    training_process = mf.train()
    print('Done. Mse = ' + str(mf.get_mse()))

    print('Serializing model to ' + outModel)
    with open(outModel, 'wb') as output:
        pickle.dump(mf, output, pickle.HIGHEST_PROTOCOL)
    print('Done serializing model to ' + outModel)
Beispiel #5
0
import numpy as np
from mf import MF

# A rating matrix with ratings from 5 users on 4 items
# zero entries are unknown values
R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])

# Perform training and obtain the user and item matrices
mf = MF(R, K=2, alpha=0.1, beta=0.01, iterations=20)
training_process = mf.train()
print(mf.P)
print(mf.Q)
print(mf.full_matrix())
Beispiel #6
0
index2userid = {y: x for x, y in userid2index.items()}
index2itemid = {y: x for x, y in itemid2index.items()}

nonzero_row, nonzero_col = rating_matrix.nonzero()
inds = zip(nonzero_row.tolist(), nonzero_col.tolist())

import sys

sys.path.append('../tpmrec/')

from mf import MF

mf = MF(rating_matrix, inds, 10, 0.0001, 0.01)

mf.train(10)

for userindex in range(1000):
    userid = index2userid[userindex]
    if len(userid2itemindexes[userid]) > 20:
        continue
    pr = mf.predict()
    user_predict = pr[userindex, :]
    top_item_indexes = np.argsort(user_predict)[::-1][:10]
    print "userid = ", userid
    for itemid in userid2itemindexes[userid]:
        print itemid, itemid2name[itemid]
    print "recommend item"
    for itemindex in top_item_indexes:
        itemid = index2itemid[itemindex]
        print itemid, itemid2name[itemid]
Beispiel #7
0
        self.P = np.vstack([self.P, new_lf])

        print('in MF')
        print(len(self.P))

    def remove_new_user(self):
        self.P = np.delete(self.P, 168, 0)

    def predict(self):
        prediction_mat = np.matmul(self.P, self.Q.T)
        return prediction_mat


rating_data = np.load('rating_data.npy')
mf = MF(rating_data, rating_data)
_, _ = mf.train(epoch=20, verbose=False)

num_total_rest = 80

restaurants = pickle.load(open('restaurants.dict', 'rb'))


@app.route('/get_restaurants')
def get_restaurants():
    num_sample_rest = 5
    rand_ints = []
    while len(rand_ints) < 5:
        rand_int = randrange(num_total_rest)
        if not rand_int in rand_ints:
            rand_ints.append(rand_int)
Beispiel #8
0
import pickle

from mf import MF
from recommender.utils.dataprocess import ratings, meanRatings

userCount = ratings['userId'].max()
movieCount = ratings['movieIndex'].max() + 1

mf = MF(movieCount,
        userCount,
        meanRatings,
        alpha=0.01,
        reg=0.01,
        iterations=20,
        K=20)
mf.train(ratings)

pickle.dump(mf, open('pkl/mfModel.pkl', 'wb'))
Beispiel #9
0
import pandas as pd
import numpy as np
from mf import MF

df_train = pd.read_csv('all/train.csv')
df_train = df_train[0:10000]
R = np.array(
    df_train.pivot(index='User', columns='Track', values='Rating').fillna(0))
d_mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100)
training_process = d_mf.train()
print()
print("P x Q:")
print(d_mf.full_matrix())
print()