def predict_rating(probID, userID, MIN_RATING, MAX_RATING, SVDNeighbourhood=False): if SVDNeighbourhood: svd2 = SVDNeighbourhood() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings_neigh')) else: svd2 = SVD() svd2.load_model(os.path.join(utils.get_add_dir(), 'ratings')) return svd2.predict(probID, userID, MIN_RATING, MAX_RATING)
class KNNPlusSVD_lib: def __init__(self, filename, K): self.svd = SVDNeighbourhood() self.K = K self.svd.load_data(filename , sep=' ', format={'col':0, 'row':1, 'value':2, 'ids': int}) def predict(self, userId, itemId): self.svd.compute(self.K, min_values=5, pre_normalize='all' , mean_center=True, post_normalize=None) r = self.svd.predict(11, 33, weighted=True, MIN_VALUE=1.0, MAX_VALUE=5.0) return r
# recommender = Recommender() # recommender.load_web_data('dataset', # [{'Запах женщины': 9, 'The Usual Suspects': 8, 'The Departed': 8, # 'Тутси': 7, 'Выпускник': 10, 'Залечь на дно в Брюгге': 4, 'Евротур': 7, # 'Goodfellas': 6, 'Донни Браско': 8, 'Амели': 3, 'Идиократия': 7}], # 100, 0, 10, 10) # recommender.load_local_data('dataset', K=100, min_values=0) # m = recommender.matrix.get_rating_matrix() # # m1 = recommender.get_predictions_for_all_users() from recsys.algorithm.factorize import SVDNeighbourhood svd = SVDNeighbourhood() svd.load_data('test_dataset', sep=' ', format={ 'col': 1, 'row': 0, 'value': 2, 'ids': int }) svd.compute(100, 0) print svd.predict(108, 698) # svd.load_data(filename=sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) # K=100 # svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
# Evaluate rmse_svd = RMSE() mae_svd = MAE() rmse_svd_neig = RMSE() mae_svd_neig = MAE() i = 1 total = len(test.get()) print "Total Test ratings: %s" % total for rating, item_id, user_id in test: try: pred_rating_svd = svd.predict(item_id, user_id) rmse_svd.add(rating, pred_rating_svd) mae_svd.add(rating, pred_rating_svd) pred_rating_svd_neig = svd_neig.predict(item_id, user_id) # Koren & co. if pred_rating_svd_neig is not nan: rmse_svd_neig.add(rating, pred_rating_svd_neig) mae_svd_neig.add(rating, pred_rating_svd_neig) print "\rProcessed test rating %d" % i, sys.stdout.flush() i += 1 except KeyError: continue rmse_svd_all.append(rmse_svd.compute()) mae_svd_all.append(mae_svd.compute()) rmse_svd_neig_all.append(rmse_svd_neig.compute()) mae_svd_neig_all.append(mae_svd_neig.compute())
sum_value = 0.0 for i in range(1,11): #Train & Test data train, test = data.split_train_test(percent=PERCENT_TRAIN) K=j svd = SVDNeighbourhood() svd.set_data(train) svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() sum_value = sum_value + rmse.compute() print '-------' print 'the k value is %s' %j print 'Final RMSE=%s' % sum_value print '-------' sum_value = sum_value/10 list.append(sum_value) print 'i value is'
if __name__ == "__main__": # recommender = Recommender() # recommender.load_web_data('dataset', # [{'Запах женщины': 9, 'The Usual Suspects': 8, 'The Departed': 8, # 'Тутси': 7, 'Выпускник': 10, 'Залечь на дно в Брюгге': 4, 'Евротур': 7, # 'Goodfellas': 6, 'Донни Браско': 8, 'Амели': 3, 'Идиократия': 7}], # 100, 0, 10, 10) # recommender.load_local_data('dataset', K=100, min_values=0) # m = recommender.matrix.get_rating_matrix() # # m1 = recommender.get_predictions_for_all_users() from recsys.algorithm.factorize import SVDNeighbourhood svd = SVDNeighbourhood() svd.load_data('test_dataset', sep=' ', format={'col': 1, 'row': 0, 'value': 2, 'ids': int}) svd.compute(100, 0) print svd.predict(108, 698) # svd.load_data(filename=sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) # K=100 # svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
# Evaluate rmse_svd = RMSE() mae_svd = MAE() rmse_svd_neig = RMSE() mae_svd_neig = MAE() i = 1 total = len(test.get()) print 'Total Test ratings: %s' % total for rating, item_id, user_id in test: try: pred_rating_svd = svd.predict(item_id, user_id) rmse_svd.add(rating, pred_rating_svd) mae_svd.add(rating, pred_rating_svd) pred_rating_svd_neig = svd_neig.predict(item_id, user_id) #Koren & co. if pred_rating_svd_neig is not nan: rmse_svd_neig.add(rating, pred_rating_svd_neig) mae_svd_neig.add(rating, pred_rating_svd_neig) print "\rProcessed test rating %d" % i, sys.stdout.flush() i += 1 except KeyError: continue rmse_svd_all.append(rmse_svd.compute()) mae_svd_all.append(mae_svd.compute()) rmse_svd_neig_all.append(rmse_svd_neig.compute()) mae_svd_neig_all.append(mae_svd_neig.compute())
__author__ = 'ponomarevandrew' from recsys.algorithm.factorize import SVDNeighbourhood svd = SVDNeighbourhood() svd.load_data(filename='ml-100k/u1.base', sep=' ', format={'col':0, 'row':1, 'value':2, 'ids': int}) K=30 svd.compute(k=K, min_values=5, pre_normalize='all' , mean_center=True, post_normalize=None) print(svd.predict(11, 33, weighted=True, MIN_VALUE=1.0, MAX_VALUE=5.0))
for j in range(0,300,50): sum_value = 0.0 for i in range(1,11): #Create SVD K= j train, test = data.split_train_test(percent=PERCENT_TRAIN) svd = SVDNeighbourhood() svd.set_data(train) svd.compute(k=K, min_values=20, pre_normalize=None, mean_center=True, post_normalize=True) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id, weighted=True, MIN_VALUE=0.0, MAX_VALUE=5.0) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) except KeyError: continue print 'RMSE=%s' % rmse.compute() sum_value = sum_value + rmse.compute() print '-------' print 'the k value is %s' %j print 'Final RMSE=%s' % sum_value print '-------' sum_value = sum_value/10 list.append(sum_value) print 'i value is' for i in range(0,300,50):