def knnz_running_time(data): ''' Calculates the running times for training and predictions for KNN with Z-score Args: data(Dataset): a list of datasets with different numbers of users Returns: elapsedtime_KnnZtrain: running time for training elapsedtime_KnnZtest: running time for predictions on testset ''' elapsedtime_KnnZtrain = [] elapsedtime_KnnZtest = [] # tune the parameters on the entire data param_grid = { 'k': [5, 10, 20], 'sim_options': { 'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5], 'user_based': [False] } } grid_search = GridSearch(KNNWithZScore, param_grid, measures=['RMSE'], verbose=False) grid_search.evaluate(data[3]) param = grid_search.best_params['RMSE'] k = param['k'] sim = param['sim_options']['name'] min_support = param['sim_options']['min_support'] user_based = param['sim_options']['user_based'] # using the tuned parameters calculate running times for i in range(len(data)): # training running time training_start = time.time() training = data[i].build_full_trainset() testing = training.build_anti_testset() knnz = KNNWithZScore(k=k, name=sim, min_support=min_support, user_based=user_based) knnz.train(training) elapsedtime_KnnZtrain.append(time.time() - training_start) # prediction running time test_start = time.time() knnz.test(testing) elapsedtime_KnnZtest.append(time.time() - test_start) return elapsedtime_KnnZtrain, elapsedtime_KnnZtest
def knn_z(data, training, testing): ''' Tune KNN with Z-score parameters then calculates RMSE, coverage and running time of KNN with Z-score Args: data(Dataset): the whole dataset divided into 5 folds training(Dataset): training dataset testing(Dataset): test dataset Returns: rmse: RMSE of KNN with Z-score with optimized parameters top_n: number of unique predictions for top n items ''' # candidate parameters knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'], 'min_support': [1, 5],'user_based': [False]}} # optimize parameters knnz_grid_search = GridSearch(KNNWithZScore, knn_param_grid, measures=['RMSE'], verbose=False) knnz_grid_search.evaluate(data) param = knnz_grid_search.best_params['RMSE'] print('KNNWithZScore:', param) # fit model using the optimized parameters knnz = KNNWithZScore(k = param['k'], name=param['sim_options']['name'], min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based']) knnz.train(training) # evaluate the model using test data predictions = knnz.test(testing) rmse = accuracy.rmse(predictions, verbose=True) top_n = get_top_n(predictions, n=5) return rmse, top_n
def CFZ(self): kf = KFold(n_splits=5) sim_options = {'name': 'cosine', 'user_based': True} algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) for trainset, testset in kf.split(self.data): algo.fit(trainset) predictions = algo.test(testset) precisions, recalls = self.precision_recall_at_k(predictions) P = sum(prec for prec in precisions.values()) / len(precisions) R = sum(rec for rec in recalls.values()) / len(recalls) F1 = 2 * P * R / (P + R) print("Precision : ", P) print("Recall : ", R) print("F1 : ", F1)
class KNN_Normalized(BaseSurpriseSTLEstimator): def __init__(self, k, name='KNN_Normalized'): super().__init__(name, 'non_feature_based') self.k = k self.model = KNNWithZScore(k=self.k, verbose=False) def _fit(self, x): self.model.fit(x) def _predict(self, x): return self.model.test(x) def get_hyper_params(self): hparams = {'k': {'type': 'integer', 'values': [2, 13]}} return hparams def set_hyper_params(self, **kwargs): self.k = kwargs['k'] def similarity_matrix(self): return self.model.compute_similarities()
for q in shrinkage: for n1 in k: for n2 in min_k: print("================================================") sim_options = {'name': o, 'user_based': p, 'shrinkage': q} algo = KNNWithZScore(k=n1, min_k=n2, sim_options=sim_options) algo.train(trainset) print("This is the #" + str(count) + " parameter combination") predictions = algo.test(testset) print("name=" + str(o) + ", user_based=" + str(p) + ", shrinkage=" + str(q) + ", k=" + str(n1) + ", min_k=" + str(n2)) accuracy.rmse(predictions, verbose=True) accuracy.fcp(predictions, verbose=True) accuracy.mae(predictions, verbose=True) count = count + 1 name = ['cosine', 'pearson', 'msd'] # where default = 'msd' user_based = [False] # user or item based k = [20, 40] # maximum neighbors where default = 40 min_k = [1, 5] # minimum neighbors where default = 1
We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40 We train the model on train set ''' algo2 = KNNBasic(sim_options=sim_options, k=40, min_k=1) algo2.fit(trainset) predictions2 = algo2.test(testset) print("RMSE for KNNBasic:", accuracy.rmse(predictions2, verbose=True)) # In[ ]: ''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40 We train the model on train set ''' algo3 = KNNBaseline(sim_options=sim_options, k=40, min_k=1) algo3.fit(trainset) predictions3 = algo3.test(testset) print("RMSE for KNNBaseline:", accuracy.rmse(predictions3, verbose=True)) # In[ ]: ''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40 We train the model on train set ''' algo4 = KNNWithZScore(sim_options=sim_options, k=40, min_k=1) algo4.fit(trainset) predictions4 = algo4.test(testset) print("RMSE for KNNBasic:", accuracy.rmse(predictions4, verbose=True))
@author: lishuang @description: 使用邻域的协同过滤对movie lens进行预测,并采用K折交叉验证 """ from surprise import KNNWithZScore, Reader, Dataset from surprise import accuracy from surprise.model_selection import KFold # 加载数据 reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1) data = Dataset.load_from_file('data/ratings.csv', reader) # ItemCF 计算得分 # 取最相思的用户计算时,只取最相思的k个 algo = KNNWithZScore(k=40, sim_options={ 'user_based': False, 'verbose': 'True' }) kf = KFold(n_splits=3) for train_set, test_set in kf.split(data): algo.fit(train_set) pred = algo.test(test_set) rmse = accuracy.rmse(pred, verbose=True) accuracy.mae(pred, verbose=True) print(rmse)