Example #1
0
 def compute_svd(self):
     '''    
     ratings = pd.read_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index_col= False)
     ratings = ratings.ix[1:]
     ratings.to_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index = False)
     self.data = Data()      
     self.data.load(self.ratings_file, sep=',', format={'col':0, 'row':1 ,'value':2, 'ids':float})
     self.train , self.test = self.data.split_train_test(percent=self.PERCENT_TRAIN)    
     self.svd = SVD()
     self.svd.set_data(self.train)    
     self.svd.compute(k=self.K, min_values=1, pre_normalize=None, mean_center=True, post_normalize=True)'''
     self.data = Data()
     self.data.load(self.ratings_file,
                    sep=',',
                    format={
                        'col': 0,
                        'row': 1,
                        'value': 2,
                        'ids': float
                    })
     self.train, self.test = self.data.split_train_test(percent=85)
     self.svd = SVDNeighbourhood()
     self.svd.set_data(self.train)
     self.svd.compute(k=100,
                      min_values=1,
                      pre_normalize=None,
                      mean_center=False,
                      post_normalize=True)
class KNNPlusSVD_lib:
    def __init__(self, filename, K):
        self.svd = SVDNeighbourhood()
        self.K = K
        self.svd.load_data(filename ,  sep='	', format={'col':0, 'row':1, 'value':2, 'ids': int})

    def predict(self, userId, itemId):
        self.svd.compute(self.K, min_values=5, pre_normalize='all' , mean_center=True, post_normalize=None)
        r = self.svd.predict(11, 33, weighted=True, MIN_VALUE=1.0, MAX_VALUE=5.0)
        return r
def compute_SVDNeighbourhood():
	svd = SVDNeighbourhood()
	svd.set_data(load_data())

	K=100
	svd.compute(k=K, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile=None)
	svd.save_model(os.path.join(utils.get_add_dir(), 'ratings_neigh'))
def recommend_users(probID, SVDNeighbourhood=False):
	if SVDNeighbourhood:
		svd2 = SVDNeighbourhood()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh'))
	else:
		svd2 = SVD()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings'))
	return svd2.recommend(probID)
def predict_rating(probID, userID, MIN_RATING, MAX_RATING, SVDNeighbourhood=False):
	if SVDNeighbourhood:
		svd2 = SVDNeighbourhood()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh'))
	else:
		svd2 = SVD()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings'))
	return svd2.predict(probID, userID, MIN_RATING, MAX_RATING)
def get_similar_problems(probID, SVDNeighbourhood=False):
	if SVDNeighbourhood:
		svd2 = SVDNeighbourhood()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh'))
	else:
		svd2 = SVD()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings'))
	return svd2.similar(probID)
def recommend_problems(userID, SVDNeighbourhood=False):
	if SVDNeighbourhood:
		svd2 = SVDNeighbourhood()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh'))
	else:
		svd2 = SVD()
		svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings'))
	problems = svd2.recommend(userID, n=20, only_unknowns=False, is_row=False)
	ret = []
	data = load_data()
	for problem in problems:	
		found = False
		for t in data:
			# print t, problem
			if t[1] == problem[0] and t[2] == 45:
				found = True
				break
		if not found:		
			# print problem
			ret.append(problem)

	return ret
    # recommender = Recommender()
    # recommender.load_web_data('dataset',
    #                           [{'Запах женщины': 9, 'The Usual Suspects': 8, 'The Departed': 8,
    #                             'Тутси': 7, 'Выпускник': 10, 'Залечь на дно в Брюгге': 4, 'Евротур': 7,
    #                             'Goodfellas': 6, 'Донни Браско': 8, 'Амели': 3, 'Идиократия': 7}],
    #                           100, 0, 10, 10)

    # recommender.load_local_data('dataset', K=100, min_values=0)
    # m = recommender.matrix.get_rating_matrix()
    #
    # m1 = recommender.get_predictions_for_all_users()

    from recsys.algorithm.factorize import SVDNeighbourhood

    svd = SVDNeighbourhood()
    svd.load_data('test_dataset',
                  sep=' ',
                  format={
                      'col': 1,
                      'row': 0,
                      'value': 2,
                      'ids': int
                  })
    svd.compute(100, 0)
    print svd.predict(108, 698)

    # svd.load_data(filename=sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
    # K=100
    # svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
Example #9
0
import sys
from numpy import nan, mean

# To show some messages:
import recsys.algorithm

recsys.algorithm.VERBOSE = True

from recsys.algorithm.factorize import SVD, SVDNeighbourhood
from recsys.datamodel.data import Data
from recsys.evaluation.prediction import RMSE, MAE

# Create SVD
K = 100
svd = SVD()
svd_neig = SVDNeighbourhood()

# Dataset
PERCENT_TRAIN = int(sys.argv[2])
data = Data()
data.load(sys.argv[1], sep="::", format={"col": 0, "row": 1, "value": 2, "ids": int})

rmse_svd_all = []
mae_svd_all = []
rmse_svd_neig_all = []
mae_svd_neig_all = []

RUNS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for run in RUNS:
    print "RUN(%d)" % run
    # Train & Test data
    # About format parameter:
    #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
    #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
    #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat file
    #   'ids': int -> Ids (row and col ids) are integers (not strings)

#Create SVD
list = []
for j in range(50,80,2):
    sum_value = 0.0
    for i in range(1,11):
        #Train & Test data
        train, test = data.split_train_test(percent=PERCENT_TRAIN)

        K=j
        svd = SVDNeighbourhood()
        svd.set_data(train)
        svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

        #Evaluation using prediction-based metrics
        rmse = RMSE()
        mae = MAE()
        for rating, item_id, user_id in test.get():
            try:
                pred_rating = svd.predict(item_id, user_id)
                rmse.add(rating, pred_rating)
                mae.add(rating, pred_rating)
            except KeyError:
                continue

        print 'RMSE=%s' % rmse.compute()
 def __init__(self, filename, K):
     self.svd = SVDNeighbourhood()
     self.K = K
     self.svd.load_data(filename ,  sep='	', format={'col':0, 'row':1, 'value':2, 'ids': int})

if __name__ == "__main__":

    # recommender = Recommender()
    # recommender.load_web_data('dataset',
    #                           [{'Запах женщины': 9, 'The Usual Suspects': 8, 'The Departed': 8,
    #                             'Тутси': 7, 'Выпускник': 10, 'Залечь на дно в Брюгге': 4, 'Евротур': 7,
    #                             'Goodfellas': 6, 'Донни Браско': 8, 'Амели': 3, 'Идиократия': 7}],
    #                           100, 0, 10, 10)

    # recommender.load_local_data('dataset', K=100, min_values=0)
    # m = recommender.matrix.get_rating_matrix()
    #
    # m1 = recommender.get_predictions_for_all_users()


    from recsys.algorithm.factorize import SVDNeighbourhood

    svd = SVDNeighbourhood()
    svd.load_data('test_dataset', sep=' ', format={'col': 1, 'row': 0, 'value': 2, 'ids': int})
    svd.compute(100, 0)
    print svd.predict(108, 698)

    # svd.load_data(filename=sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
    # K=100
    # svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)



Example #13
0
import sys
from numpy import nan, mean

#To show some messages:
import recsys.algorithm
recsys.algorithm.VERBOSE = True

from recsys.algorithm.factorize import SVD, SVDNeighbourhood
from recsys.datamodel.data import Data
from recsys.evaluation.prediction import RMSE, MAE

# Create SVD
K = 100
svd = SVD()
svd_neig = SVDNeighbourhood()

#Dataset
PERCENT_TRAIN = int(sys.argv[2])
data = Data()
data.load(sys.argv[1],
          sep='::',
          format={
              'col': 0,
              'row': 1,
              'value': 2,
              'ids': int
          })

rmse_svd_all = []
mae_svd_all = []
rmse_svd_neig_all = []
Example #14
0
class Collaborative_filtering(object):
    def __init__(self, ratings_file,
                 movies):  #No need to pass as ,will be provided in views.py
        #self.users = users
        self.movies = movies
        self.K = 100
        self.PERCENT_TRAIN = 85
        #Need to provide a default file location for ratings.csv instead of loading everytime.run below 2lines only once
        #or just provide this file instead.
        #self.users.to_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index= False)
        self.ratings_file = ratings_file  #Give your path to ratings.csv created from above 2 lines.
        self.data = None
        self.svd = None
        self.recommend_movies_list = None
        self.recommend_movies_ids = None
        self.similar_movies_list = None
        self.similar_movies_ids = None

        self.movie_id = None
        self.train = None
        self.test = None

    def compute_svd(self):
        '''    
        ratings = pd.read_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index_col= False)
        ratings = ratings.ix[1:]
        ratings.to_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index = False)
        self.data = Data()      
        self.data.load(self.ratings_file, sep=',', format={'col':0, 'row':1 ,'value':2, 'ids':float})
        self.train , self.test = self.data.split_train_test(percent=self.PERCENT_TRAIN)    
        self.svd = SVD()
        self.svd.set_data(self.train)    
        self.svd.compute(k=self.K, min_values=1, pre_normalize=None, mean_center=True, post_normalize=True)'''
        self.data = Data()
        self.data.load(self.ratings_file,
                       sep=',',
                       format={
                           'col': 0,
                           'row': 1,
                           'value': 2,
                           'ids': float
                       })
        self.train, self.test = self.data.split_train_test(percent=85)
        self.svd = SVDNeighbourhood()
        self.svd.set_data(self.train)
        self.svd.compute(k=100,
                         min_values=1,
                         pre_normalize=None,
                         mean_center=False,
                         post_normalize=True)

    def similarity_measure(
            self, movie1,
            movie2):  #gives a similarity measure value between -1 to 1
        return round(self.svd.similarity(movie1, movie2), 4)

    def recommend_movies(self, user_id):
        l = self.svd.recommend(user_id, n=10, only_unknowns=True, is_row=False)
        self.recommend_movies_list = []
        self.recommend_movies_ids = []
        for p in l:
            #movie names
            bb = str(movies.ix[movies['movie_id'] == p[0]]['title']).split()
            q = bb.index('Name:')
            bb = ' '.join(bb[1:q])
            self.recommend_movies_list.append(bb)
            #movie ids
            gg = movies.ix[movies['movie_id'] == p[0]]
            gg = gg.reset_index()
            del gg['index']
            gg = gg.ix[:, 0:2].as_matrix(columns=None).tolist()
            self.recommend_movies_ids.append(gg[0][0])
        return self.recommend_movies_list, self.recommend_movies_ids

    def get_similar_movies(self,
                           movie1):  #Returns a PYTHON list for similar movies.
        movie1 = int(movie1)
        l = self.svd.similar(movie1)
        self.similar_movies_list = []
        self.similar_movies_ids = []
        l = l[1:]

        for p in l:
            #getting movie names
            bb = str(movies.ix[movies['movie_id'] == p[0]]['title']).split()
            q = bb.index('Name:')
            bb = ' '.join(bb[1:q])
            self.similar_movies_list.append(bb)
            #getting movie id's
            self.similar_movies_ids.append(p[0])

        return self.similar_movies_list, self.similar_movies_ids
__author__ = 'ponomarevandrew'


from recsys.algorithm.factorize import SVDNeighbourhood

svd = SVDNeighbourhood()
svd.load_data(filename='ml-100k/u1.base',  sep='	', format={'col':0, 'row':1, 'value':2, 'ids': int})
K=30

svd.compute(k=K, min_values=5, pre_normalize='all' , mean_center=True, post_normalize=None)

print(svd.predict(11, 33, weighted=True, MIN_VALUE=1.0, MAX_VALUE=5.0))
    # About format parameter:
    #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
    #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
    #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat file
    #   'ids': int -> Ids (row and col ids) are integers (not strings)

#Train & Test data
sum_value = 0.0
list = []
for j in range(0,300,50):
    sum_value = 0.0
    for i in range(1,11):
        #Create SVD
        K= j
        train, test = data.split_train_test(percent=PERCENT_TRAIN)
        svd = SVDNeighbourhood()
        svd.set_data(train)
        svd.compute(k=K, min_values=20, pre_normalize=None, mean_center=True, post_normalize=True)

        #Evaluation using prediction-based metrics
        rmse = RMSE()
        mae = MAE()
        for rating, item_id, user_id in test.get():
            try:
                pred_rating = svd.predict(item_id, user_id, weighted=True, MIN_VALUE=0.0, MAX_VALUE=5.0)
                rmse.add(rating, pred_rating)
                mae.add(rating, pred_rating)
            except KeyError:
                continue
        print 'RMSE=%s' % rmse.compute()
        sum_value = sum_value + rmse.compute()