Esempio n. 1
0
    def __init__(self):
        self._model= RSVD.load("objects/")
        self._original_movieIDs=np.load("objects/original_movieIDs")
        self._ratings= np.load("objects/ratings")
        self._moviesInformations = functions.loadMoviesInformations('data_movilens1m/movies.dat')
        self._usersInformations = functions.loadUsersInformations('data_movilens1m/users.dat')
        self._dissim = functions.getSimilaritiesFromModel(self._model.u)
        self._movieMean=np.load("objects/movieMean")
        self._userMean=np.load("objects/userMean")

        # PCA pour les films les plus notes
        self._mostRated = functions.getNMostRatedMovies(self._ratings,2000)

        j = 0
        self._subModel=np.empty((len(self._mostRated),self._model.u.shape[1])) #subRatings doit etre du meme type que model
        self._subMovieMean = np.empty((len(self._mostRated),2))
        for i,row in enumerate(self._model.u):
            if i in self._mostRated:
                self._subMovieMean[j] = self._movieMean[i]
                self._subModel[j]=row
                j= j +1

        # PCA pour les films les plus notes
        self._mostRating = functions.getNMostRatingUsers(self._ratings)
        j = 0
        self._subModel2=np.empty((len(self._mostRating),self._model.v.shape[1])) #subRatings doit etre du meme type que model
        self._subMovieMean2 = np.empty((len(self._mostRating),2))
        for i,row in enumerate(self._model.v):
            if i in self._mostRating:
                self._subMovieMean2[j] = self._userMean[i]
                self._subModel2[j]=row
                j= j +1
Esempio n. 2
0
def makeModel(itemIdMap, userIdMap, ratings, factors=20, learnRate=0.001, regularization=0.011):
    """
    Makes a RSVD model from ratings
    """
    ratings = np.array(ratings, rating_t)
    np.random.shuffle(ratings)

    n = int(ratings.shape[0] * 0.8)
    # train = ratings[:n]
    test = ratings[n:]
    # v = int(train.shape[0] * 0.9)
    # val = train[v:]
    # train = train[:v]

    # Increasing training data
    v = int(ratings.shape[0] * 0.9)
    val = ratings[v:]
    train = ratings[:v]

    dims = (len(itemIdMap), len(userIdMap))

    model = RSVD.train(factors, train, dims, probeArray=val,
                learnRate=learnRate, regularization=regularization,
                maxEpochs=1000)

    sqerr=0.0
    for itemID,userID,rating in test:
        err = rating - model(itemID,userID)
        sqerr += err * err
    sqerr /= test.shape[0]
    print "Test RMSE: ", np.sqrt(sqerr)

    return model
Esempio n. 3
0
def findFactorsAndErrors(ratingsDataset):

    ratings=ratingsDataset.ratings()


    # create train, validation and test sets.
    n = int(ratings.shape[0]*0.8)
    train = ratings[:n]
    test = ratings[n:]
    v = int(train.shape[0]*0.9)
    val = train[v:]
    train = train[:v]


    dims = (ratingsDataset.movieIDs().shape[0], ratingsDataset.userIDs().shape[0])

    
    factors = []
    errors = []
    # lambda_f ne doit pas depasser 1


    # default values
    #probeArray=None
    #maxEpochs=100
    #minImprovement=0.000001
    #learnRate=0.001
    #regularization=0.011
    #randomize=False
    #randomNoise=0.005
    for factor in range(1, 100):
        model = RSVD.train(factor, train, dims, probeArray=val, maxEpochs = 1000, regularization=0.011)

        sqerr=0.0
        for movieID,userID,rating in test:
             err = rating - model(movieID,userID)
             sqerr += err * err
        sqerr /= test.shape[0]

        factors.append(factor)
        errors.append(np.sqrt(sqerr))

    # returns a dict, do result['best_factor'] to get the corresponding value
    return {'factors':factors, 'errors':errors}
Esempio n. 4
0
dataset = MovieLensDataset.loadDat("data/movielense/ratings.dat")
ratings = dataset.ratings()

# make sure that the ratings a properly shuffled
np.random.shuffle(ratings)

# create train, validation and test sets.
n = int(ratings.shape[0] * 0.8)
train = ratings[:n]
test = ratings[n:]
v = int(train.shape[0] * 0.9)
val = train[v:]
train = train[:v]

from rsvd import RSVD

dims = (dataset.movieIDs().shape[0], dataset.userIDs().shape[0])

model = RSVD.train(20, train, dims, probeArray=val, maxEpochs=100, learnRate=0.0005, regularization=0.005)

sqerr = 0.0
for movieID, userID, rating in test:
    err = rating - model(movieID, userID)
    sqerr += err * err
sqerr /= test.shape[0]
print "Test RMSE: ", np.sqrt(sqerr)

import IPython

IPython.embed()
Esempio n. 5
0
np.random.shuffle(ratings)

# create train, validation and test sets.
n = int(ratings.shape[0] * 0.8)
train = ratings[:n]
test = ratings[n:]
v = int(train.shape[0] * 0.9)
val = train[v:]
train = train[:v]

from rsvd import RSVD
dims = (dataset.movieIDs().shape[0], dataset.userIDs().shape[0])

model = RSVD.train(20,
                   train,
                   dims,
                   probeArray=val,
                   maxEpochs=100,
                   learnRate=0.0005,
                   regularization=0.005)

sqerr = 0.0
for movieID, userID, rating in test:
    err = rating - model(movieID, userID)
    sqerr += err * err
sqerr /= test.shape[0]
print "Test RMSE: ", np.sqrt(sqerr)

import IPython
IPython.embed()
Esempio n. 6
0
"""
Test
----
This is a test
"""

#log# Automatic Logger file. *** THIS MUST BE THE FIRST LINE ***
#log# DO NOT CHANGE THIS LINE OR THE TWO BELOW
#log# opts = Struct({'__allownew': True, 'logfile': 'ipython_log.py'})
#log# args = []
#log# It is safe to make manual edits below here.
#log#-----------------------------------------------------------------------
import numpy as np
from rsvd import RSVD
print 'load ratings'
ratings = np.load('data/ratings_float.arr')

probeRatings = np.load('data/probe_ratings_float.arr')

model = RSVD.train(20,
                   ratings, (17770, 480189),
                   probeRatings,
                   100,
                   randomize=False)
print "model trained..."

model.save("models/t_20_001_011_100")
Esempio n. 7
0
train = ratings[:n]
test = ratings[n:]
v = int(train.shape[0]*0.9)
val = train[v:]
train = train[:v]


dims = (ratingsDataset.movieIDs().shape[0], ratingsDataset.userIDs().shape[0])
factor = 40

lambdas = []
errors = []
# lambda_f ne doit pas depasser 1
# maxEpochs = 1000
for lambda_f in np.arange(0.0, 0.05, 0.0005): 
	model = RSVD.train(factor, train, dims, probeArray=val, maxEpochs = 1000, regularization=lambda_f)

	sqerr=0.0
	for movieID,userID,rating in test:
   		 err = rating - model(movieID,userID)
   		 sqerr += err * err
	sqerr /= test.shape[0]


	print "-------------------------------------------------"
	print "Pour lambda = ",lambda_f, " Test RMSE: ", np.sqrt(sqerr)
	print "-------------------------------------------------"
	lambdas.append(lambda_f)
	errors.append(np.sqrt(sqerr))

# print the lamdas and errors vectors
		print "ok"
		with open(ARTISTS_LUT_PATH, 'w') as outfile:
  			json.dump(ARTISTS_LUT, outfile)
  		with open(INV_ARTISTS_LUT_PATH, 'w') as outfile:
  			json.dump(INV_ARTISTS_LUT, outfile)
	else:
		with open(ARTISTS_LUT_PATH,'r') as jsonStream:
			ARTISTS_LUT = json.load(jsonStream)
		with open(INV_ARTISTS_LUT_PATH,'r') as jsonStream:
			INV_ARTISTS_LUT = json.load(jsonStream)

	f_corpus = formatData(CORPUS,LEXICON,PLAYLISTS_LUT,ARTISTS_LUT)
	CORPUS.clear() #no need to keep corpus in memory 
	print "ok"

	print "\nNumber of playlists:",len(PLAYLISTS_LUT),"\nNumber of artists:",len(ARTISTS_LUT)

	### Split the dataset
	print "\nGenerating the training/validation and test set ...",
	sys.stdout.flush()
	trainSet,validSet,testSet = splitDataset(f_corpus)
	print "ok"

	### train our MF model
	"""prototype: train(factors,ratingsArray,dims,probeArray=None,\
                  maxEpochs=100,minImprovement=0.000001,\
                  learnRate=0.001,regularization=0.011,\
                  randomize=False, randomNoise=0.005)"""
	model = RSVD.train(25,trainSet,(len(ARTISTS_LUT),len(PLAYLISTS_LUT)),validSet,learnRate=0.001,regularization=0.00000,minImprovement=0.0000001,maxEpochs=700)
	model.save("./")
Esempio n. 9
0
    f_testing = open(TESTING_FILENAME, 'r')
    f_out = open(OUTPUT_FILENAME + fn, 'w')
    print "Making %d predictions..." % NUM_TESTING
    start_time = time.time()
    i = 0
    j = 0
    for line in f_testing:
        user, movie, date = line.strip().split()
        f_out.write(str(model(int(user), int(movie))) + '\n')
        i += 1
        if i % (NUM_TESTING / INCR) == 0:
            j += 100.0 / INCR
            sys.stdout.write("\r%.1f%% done (elapsed time: %f s)." %
                             (j, time.time() - start_time))
            sys.stdout.flush()
    f_testing.close()
    print "Predictions complete (elapsed time: %f s)." % (time.time() -
                                                          start_time)
    f_out.close()


if __name__ == '__main__':
    print "Importing ratings..."
    ratings = np.fromfile(TRAINING_FILENAME, dtype=rating_t)
    print "Importing probes..."
    probeRatings = np.fromfile(PROBE_FILENAME, dtype=rating_t)
    print "running svd..."
    model = RSVD.train(10, ratings, (17770, 458292), probeRatings)
    print "predicting..."
    predict(model)
Esempio n. 10
0
def predict(model, fn=""):
    f_testing = open(TESTING_FILENAME, "r")
    f_out = open(OUTPUT_FILENAME + fn, "w")
    print "Making %d predictions..." % NUM_TESTING
    start_time = time.time()
    i = 0
    j = 0
    for line in f_testing:
        user, movie, date = line.strip().split()
        f_out.write(str(model(int(user), int(movie))) + "\n")
        i += 1
        if i % (NUM_TESTING / INCR) == 0:
            j += 100.0 / INCR
            sys.stdout.write("\r%.1f%% done (elapsed time: %f s)." % (j, time.time() - start_time))
            sys.stdout.flush()
    f_testing.close()
    print "Predictions complete (elapsed time: %f s)." % (time.time() - start_time)
    f_out.close()


if __name__ == "__main__":
    print "Importing ratings..."
    ratings = np.fromfile(TRAINING_FILENAME, dtype=rating_t)
    print "Importing probes..."
    probeRatings = np.fromfile(PROBE_FILENAME, dtype=rating_t)
    print "running svd..."
    model = RSVD.train(10, ratings, (17770, 458292), probeRatings)
    print "predicting..."
    predict(model)