def compute_svd(self): ''' ratings = pd.read_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index_col= False) ratings = ratings.ix[1:] ratings.to_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index = False) self.data = Data() self.data.load(self.ratings_file, sep=',', format={'col':0, 'row':1 ,'value':2, 'ids':float}) self.train , self.test = self.data.split_train_test(percent=self.PERCENT_TRAIN) self.svd = SVD() self.svd.set_data(self.train) self.svd.compute(k=self.K, min_values=1, pre_normalize=None, mean_center=True, post_normalize=True)''' self.data = Data() self.data.load(self.ratings_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': float }) self.train, self.test = self.data.split_train_test(percent=85) self.svd = SVDNeighbourhood() self.svd.set_data(self.train) self.svd.compute(k=100, min_values=1, pre_normalize=None, mean_center=False, post_normalize=True)
class KNNPlusSVD_lib: def __init__(self, filename, K): self.svd = SVDNeighbourhood() self.K = K self.svd.load_data(filename , sep=' ', format={'col':0, 'row':1, 'value':2, 'ids': int}) def predict(self, userId, itemId): self.svd.compute(self.K, min_values=5, pre_normalize='all' , mean_center=True, post_normalize=None) r = self.svd.predict(11, 33, weighted=True, MIN_VALUE=1.0, MAX_VALUE=5.0) return r
def compute_SVDNeighbourhood(): svd = SVDNeighbourhood() svd.set_data(load_data()) K=100 svd.compute(k=K, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True, savefile=None) svd.save_model(os.path.join(utils.get_add_dir(), 'ratings_neigh'))
def recommend_users(probID, SVDNeighbourhood=False): if SVDNeighbourhood: svd2 = SVDNeighbourhood() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh')) else: svd2 = SVD() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings')) return svd2.recommend(probID)
def predict_rating(probID, userID, MIN_RATING, MAX_RATING, SVDNeighbourhood=False): if SVDNeighbourhood: svd2 = SVDNeighbourhood() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh')) else: svd2 = SVD() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings')) return svd2.predict(probID, userID, MIN_RATING, MAX_RATING)
def get_similar_problems(probID, SVDNeighbourhood=False): if SVDNeighbourhood: svd2 = SVDNeighbourhood() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh')) else: svd2 = SVD() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings')) return svd2.similar(probID)
def recommend_problems(userID, SVDNeighbourhood=False): if SVDNeighbourhood: svd2 = SVDNeighbourhood() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh')) else: svd2 = SVD() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings')) problems = svd2.recommend(userID, n=20, only_unknowns=False, is_row=False) ret = [] data = load_data() for problem in problems: found = False for t in data: # print t, problem if t[1] == problem[0] and t[2] == 45: found = True break if not found: # print problem ret.append(problem) return ret
# recommender = Recommender() # recommender.load_web_data('dataset', # [{'Запах женщины': 9, 'The Usual Suspects': 8, 'The Departed': 8, # 'Тутси': 7, 'Выпускник': 10, 'Залечь на дно в Брюгге': 4, 'Евротур': 7, # 'Goodfellas': 6, 'Донни Браско': 8, 'Амели': 3, 'Идиократия': 7}], # 100, 0, 10, 10) # recommender.load_local_data('dataset', K=100, min_values=0) # m = recommender.matrix.get_rating_matrix() # # m1 = recommender.get_predictions_for_all_users() from recsys.algorithm.factorize import SVDNeighbourhood svd = SVDNeighbourhood() svd.load_data('test_dataset', sep=' ', format={ 'col': 1, 'row': 0, 'value': 2, 'ids': int }) svd.compute(100, 0) print svd.predict(108, 698) # svd.load_data(filename=sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) # K=100 # svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
import sys from numpy import nan, mean # To show some messages: import recsys.algorithm recsys.algorithm.VERBOSE = True from recsys.algorithm.factorize import SVD, SVDNeighbourhood from recsys.datamodel.data import Data from recsys.evaluation.prediction import RMSE, MAE # Create SVD K = 100 svd = SVD() svd_neig = SVDNeighbourhood() # Dataset PERCENT_TRAIN = int(sys.argv[2]) data = Data() data.load(sys.argv[1], sep="::", format={"col": 0, "row": 1, "value": 2, "ids": int}) rmse_svd_all = [] mae_svd_all = [] rmse_svd_neig_all = [] mae_svd_neig_all = [] RUNS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] for run in RUNS: print "RUN(%d)" % run # Train & Test data
# About format parameter: # 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file # 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file # 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat file # 'ids': int -> Ids (row and col ids) are integers (not strings) #Create SVD list = [] for j in range(50,80,2): sum_value = 0.0 for i in range(1,11): #Train & Test data train, test = data.split_train_test(percent=PERCENT_TRAIN) K=j svd = SVDNeighbourhood() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute()
def __init__(self, filename, K): self.svd = SVDNeighbourhood() self.K = K self.svd.load_data(filename , sep=' ', format={'col':0, 'row':1, 'value':2, 'ids': int})
if __name__ == "__main__": # recommender = Recommender() # recommender.load_web_data('dataset', # [{'Запах женщины': 9, 'The Usual Suspects': 8, 'The Departed': 8, # 'Тутси': 7, 'Выпускник': 10, 'Залечь на дно в Брюгге': 4, 'Евротур': 7, # 'Goodfellas': 6, 'Донни Браско': 8, 'Амели': 3, 'Идиократия': 7}], # 100, 0, 10, 10) # recommender.load_local_data('dataset', K=100, min_values=0) # m = recommender.matrix.get_rating_matrix() # # m1 = recommender.get_predictions_for_all_users() from recsys.algorithm.factorize import SVDNeighbourhood svd = SVDNeighbourhood() svd.load_data('test_dataset', sep=' ', format={'col': 1, 'row': 0, 'value': 2, 'ids': int}) svd.compute(100, 0) print svd.predict(108, 698) # svd.load_data(filename=sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) # K=100 # svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
import sys from numpy import nan, mean #To show some messages: import recsys.algorithm recsys.algorithm.VERBOSE = True from recsys.algorithm.factorize import SVD, SVDNeighbourhood from recsys.datamodel.data import Data from recsys.evaluation.prediction import RMSE, MAE # Create SVD K = 100 svd = SVD() svd_neig = SVDNeighbourhood() #Dataset PERCENT_TRAIN = int(sys.argv[2]) data = Data() data.load(sys.argv[1], sep='::', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': int }) rmse_svd_all = [] mae_svd_all = [] rmse_svd_neig_all = []
class Collaborative_filtering(object): def __init__(self, ratings_file, movies): #No need to pass as ,will be provided in views.py #self.users = users self.movies = movies self.K = 100 self.PERCENT_TRAIN = 85 #Need to provide a default file location for ratings.csv instead of loading everytime.run below 2lines only once #or just provide this file instead. #self.users.to_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index= False) self.ratings_file = ratings_file #Give your path to ratings.csv created from above 2 lines. self.data = None self.svd = None self.recommend_movies_list = None self.recommend_movies_ids = None self.similar_movies_list = None self.similar_movies_ids = None self.movie_id = None self.train = None self.test = None def compute_svd(self): ''' ratings = pd.read_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index_col= False) ratings = ratings.ix[1:] ratings.to_csv("/home/sourabhkondapaka/Desktop/ratingsss.csv",index = False) self.data = Data() self.data.load(self.ratings_file, sep=',', format={'col':0, 'row':1 ,'value':2, 'ids':float}) self.train , self.test = self.data.split_train_test(percent=self.PERCENT_TRAIN) self.svd = SVD() self.svd.set_data(self.train) self.svd.compute(k=self.K, min_values=1, pre_normalize=None, mean_center=True, post_normalize=True)''' self.data = Data() self.data.load(self.ratings_file, sep=',', format={ 'col': 0, 'row': 1, 'value': 2, 'ids': float }) self.train, self.test = self.data.split_train_test(percent=85) self.svd = SVDNeighbourhood() self.svd.set_data(self.train) self.svd.compute(k=100, min_values=1, pre_normalize=None, mean_center=False, post_normalize=True) def similarity_measure( self, movie1, movie2): #gives a similarity measure value between -1 to 1 return round(self.svd.similarity(movie1, movie2), 4) def recommend_movies(self, user_id): l = self.svd.recommend(user_id, n=10, only_unknowns=True, is_row=False) self.recommend_movies_list = [] self.recommend_movies_ids = [] for p in l: #movie names bb = str(movies.ix[movies['movie_id'] == p[0]]['title']).split() q = bb.index('Name:') bb = ' '.join(bb[1:q]) self.recommend_movies_list.append(bb) #movie ids gg = movies.ix[movies['movie_id'] == p[0]] gg = gg.reset_index() del gg['index'] gg = gg.ix[:, 0:2].as_matrix(columns=None).tolist() self.recommend_movies_ids.append(gg[0][0]) return self.recommend_movies_list, self.recommend_movies_ids def get_similar_movies(self, movie1): #Returns a PYTHON list for similar movies. movie1 = int(movie1) l = self.svd.similar(movie1) self.similar_movies_list = [] self.similar_movies_ids = [] l = l[1:] for p in l: #getting movie names bb = str(movies.ix[movies['movie_id'] == p[0]]['title']).split() q = bb.index('Name:') bb = ' '.join(bb[1:q]) self.similar_movies_list.append(bb) #getting movie id's self.similar_movies_ids.append(p[0]) return self.similar_movies_list, self.similar_movies_ids
__author__ = 'ponomarevandrew' from recsys.algorithm.factorize import SVDNeighbourhood svd = SVDNeighbourhood() svd.load_data(filename='ml-100k/u1.base', sep=' ', format={'col':0, 'row':1, 'value':2, 'ids': int}) K=30 svd.compute(k=K, min_values=5, pre_normalize='all' , mean_center=True, post_normalize=None) print(svd.predict(11, 33, weighted=True, MIN_VALUE=1.0, MAX_VALUE=5.0))
# About format parameter: # 'row': 1 -> Rows in matrix come from column 1 in ratings.dat file # 'col': 0 -> Cols in matrix come from column 0 in ratings.dat file # 'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat file # 'ids': int -> Ids (row and col ids) are integers (not strings) #Train & Test data sum_value = 0.0 list = [] for j in range(0,300,50): sum_value = 0.0 for i in range(1,11): #Create SVD K= j train, test = data.split_train_test(percent=PERCENT_TRAIN) svd = SVDNeighbourhood() svd.set_data(train) svd.compute(k=K, min_values=20, pre_normalize=None, mean_center=True, post_normalize=True) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id, weighted=True, MIN_VALUE=0.0, MAX_VALUE=5.0) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() sum_value = sum_value + rmse.compute()