#!/usr/bin/python """MovieLense Tutorial see doc/tutorial.rst for more information. """ import numpy as np from rsvd import MovieLensDataset dataset = MovieLensDataset.loadDat("data/movielense/ratings.dat") ratings = dataset.ratings() # make sure that the ratings a properly shuffled np.random.shuffle(ratings) # create train, validation and test sets. n = int(ratings.shape[0] * 0.8) train = ratings[:n] test = ratings[n:] v = int(train.shape[0] * 0.9) val = train[v:] train = train[:v] from rsvd import RSVD dims = (dataset.movieIDs().shape[0], dataset.userIDs().shape[0]) model = RSVD.train(20, train, dims, probeArray=val, maxEpochs=100, learnRate=0.0005, regularization=0.005) sqerr = 0.0 for movieID, userID, rating in test:
#!/usr/bin/python """MovieLense Tutorial see doc/tutorial.rst for more information. """ import numpy as np from rsvd import MovieLensDataset dataset = MovieLensDataset.loadDat('data/movielense/ratings.dat') ratings = dataset.ratings() # make sure that the ratings a properly shuffled np.random.shuffle(ratings) # create train, validation and test sets. n = int(ratings.shape[0] * 0.8) train = ratings[:n] test = ratings[n:] v = int(train.shape[0] * 0.9) val = train[v:] train = train[:v] from rsvd import RSVD dims = (dataset.movieIDs().shape[0], dataset.userIDs().shape[0]) model = RSVD.train(20, train, dims, probeArray=val, maxEpochs=100, learnRate=0.0005,
#!/usr/bin/python2.7 import numpy as np import matplotlib.pyplot as plt from rsvd import RSVD, rating_t, MovieLensDataset ratingsDataset = MovieLensDataset.loadDat('data_movilens1m/ratings.dat') ratings=ratingsDataset.ratings() # make sure that the ratings a properly shuffled np.random.shuffle(ratings) # create train, validation and test sets. n = int(ratings.shape[0]*0.8) train = ratings[:n] test = ratings[n:] v = int(train.shape[0]*0.9) val = train[v:] train = train[:v] dims = (ratingsDataset.movieIDs().shape[0], ratingsDataset.userIDs().shape[0]) factor = 40 lambdas = [] errors = [] # lambda_f ne doit pas depasser 1 # maxEpochs = 1000 for lambda_f in np.arange(0.0, 0.05, 0.0005):
# Bash needed # rm first line # tail -n +2 "data/20150701094451-Behavior_training.csv" > data/behavior.csv # ml conversion # awk -F',' '{gsub("TV", "", $3) ; gsub("-", "", $1) ; gsub("T", "", $1) ; print $2"::"$3"::"$5}' data/behavior.csv > data/behavior-ml.csv #!/usr/bin/python """MovieLense Tutorial see doc/tutorial.rst for more information. """ import numpy as np from rsvd import MovieLensDataset dataset = MovieLensDataset.loadDat('./data/behavior-ml.csv') ratings=dataset.ratings() # make sure that the ratings a properly shuffled np.random.shuffle(ratings) # create train, validation and test sets. n = int(ratings.shape[0]*0.8) train = ratings[:n] test = ratings[n:] v = int(train.shape[0]*0.9) val = train[v:] train = train[:v] from rsvd import RSVD dims = (dataset.movieIDs().shape[0], dataset.userIDs().shape[0]) model = RSVD.train(20, train, dims, probeArray=val, learnRate=0.0005, regularization=0.005)
# define rating array (itemID,userID,rating) ratings=np.empty((n,),dtype=rating_t) for i,row in enumerate(rows): ratings[i]=(row[1],row[0]-1,row[2]) movieIDs=np.unique(ratings['f0']) userIDs=np.unique(ratings['f1']) movieIDs.sort() userIDs.sort() #map movieIDs for i,rec in enumerate(ratings): ratings[i]['f0']=movieIDs.searchsorted(rec['f0'])+1 # correspondance entre les ids du fichier movies.dat et les ids utilisés dans l'objet ratings original_movieIDs=movieIDs movieIDs=np.unique(ratings['f0']) movieIDs.sort() ratingsDataset = MovieLensDataset(movieIDs,userIDs,ratings) finally: f.close() ratings=ratingsDataset.ratings() # make sure that the ratings a properly shuffled np.random.shuffle(ratings) # create train, validation and test sets. n = int(ratings.shape[0]*0.8) train = ratings[:n]