def CFZ(self): u_id = [] I_id = [] r_ui_ = np.array([]) _est = np.array([]) sim_options = {'name': 'cosine', 'user_based': True} algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) u_id.append(int(pred.uid)) I_id.append(int(pred.iid)) r_ui_ = np.append(r_ui_, pred.r_ui) _est = np.append(_est, pred.est) self.df_est = pd.DataFrame({ 'uid': u_id, 'Iid': I_id, 'r_ui': r_ui_, 'est': _est }) self.arr = self.df_est['uid'].unique() self.CFWZ_ndcg_ = self.Calculate_NDCG()
def collab_recommender(train_data, test_data, user_based=True, normalization=False, k=100, sim='cosine'): """ Input: - train_data: dataframe, n*3, columns are ['userid','movieid','rating'] - test_data: dataframe, n*2, columns are ['userid', 'movieid'] - user_base: boolean, use user-based knn algorithm if True, use item-based knn algorithm if False - normalization: boolean, conduct z-score normalization on user/item matrix if True - k: int, number of nearest neighbors - sim: string, define the similarity matrix from ['cosine', 'pearson', 'msd', 'pearson_baseline'] Output: - pred_rating: dataframe, n*2, columns are ['movieid', 'rating'] """ try: function_log.trace('Start collaborative recommendation function') reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(train_data, reader) sim_options = {'name': sim, 'user_based': user_based} if normalization: algo = KNNWithZScore(k=k, sim_options=sim_options, verbose=False) else: algo = KNNWithMeans(k=k, sim_options=sim_options, verbose=False) train_set = data.build_full_trainset() algo.fit(train_set) pred_rating = {'movieid': [], 'rating': []} for idx in test_data.index: pred_rating['movieid'].append(test_data.loc[idx, 'movieid']) pred = algo.predict(test_data.loc[idx, 'userid'], test_data.loc[idx, 'movieid']) pred_rating['rating'].append(pred.est) function_log.trace('Finish collaborative recommendation function') return pd.DataFrame(pred_rating) except ValueError: function_log.warn("Training and test data cannot be none.") raise ValueError except Exception as x: function_log.exception( f'collaborative recommendation function failed {x}')
def CFZ(self): sim_options = {'name': 'cosine', 'user_based': True} algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) return pred
def computeKNNZScoreMovie(data, test_np): """Compute the k-NN with z score item based method and return the predictions on the test The method is on all the data and got the following settings: - Similarity function : Pearson baseline, item based - Number of closest neighbors : 108 data : data frame which represent the train set test_np : data frame on which the prediction will be returned return : test_np with a column of prediction named 'knnzscore_item_rating'""" trainset, test = dataTrainSurprise(data, test_np) sim_options = {'name':'pearson_baseline','user_based': False} knnz_algo = KNNWithZScore(k = 108, sim_options =sim_options).fit(trainset) test['knnzscore_item_rating'] = test[['user_id', 'movie_id']] \ .apply(lambda row: knnz_algo.predict(row['user_id'], row['movie_id'])[3], axis=1) return test
r_ui1 = 4 r_ui2 = 4 r_ui3 = 1 r_ui4 = 3 verboseFlag = True # get a prediction for specific users and items. print("KNNBaseLine:") predBaseLine1 = algoBaseLine.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predBaseLine2 = algoBaseLine.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predBaseLine3 = algoBaseLine.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predBaseLine4 = algoBaseLine.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag) print("\nKNNBasic:") predBasic1 = algoBasic.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predBasic2 = algoBasic.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predBasic3 = algoBasic.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predBasic4 = algoBasic.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag) print("\nKNNWithMeans:") predWithMeans1 = algoWithMeans.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predWithMeans2 = algoWithMeans.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predWithMeans3 = algoWithMeans.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predWithMeans4 = algoWithMeans.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag) print("\nKNNWithZScore:") predWithZScore1 = algoWithZScore.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag) predWithZScore2 = algoWithZScore.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag) predWithZScore3 = algoWithZScore.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag) predWithZScore4 = algoWithZScore.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)
predWithMeans2 = algoWithMeans.predict(uid2, iid2, r_ui=r_ui2, verbose=verboseFlag) predWithMeans3 = algoWithMeans.predict(uid3, iid3, r_ui=r_ui3, verbose=verboseFlag) predWithMeans4 = algoWithMeans.predict(uid4, iid4, r_ui=r_ui4, verbose=verboseFlag) print("\nKNNWithZScore:") predWithZScore1 = algoWithZScore.predict(uid1, iid1, r_ui=r_ui1, verbose=verboseFlag) predWithZScore2 = algoWithZScore.predict(uid2, iid2, r_ui=r_ui2, verbose=verboseFlag) predWithZScore3 = algoWithZScore.predict(uid3, iid3, r_ui=r_ui3, verbose=verboseFlag) predWithZScore4 = algoWithZScore.predict(uid4, iid4, r_ui=r_ui4, verbose=verboseFlag)
algo3 = KNNWithMeans(k=40, min_k=1, sim_options=sim_options2, verbose=True) algo3.fit(data_train.build_full_trainset()) pred1 = [] pred_f1 = [] pred2 = [] pred_f2 = [] pred3 = [] pred_f3 = [] with open("./data/testing.dat", "r", encoding='utf-8') as f: for line in f.readlines(): line_data = line.strip().split(",") a = algo1.predict(str(line_data[0]), str(line_data[1]), None, True, True)[3] b = algo2.predict(str(line_data[0]), str(line_data[1]), None, True, True)[3] c = algo3.predict(str(line_data[0]), str(line_data[1]), None, True, True)[3] pred1.append(int(round(a))) pred_f1.append(a) pred2.append(int(round(b))) pred_f2.append(b) pred3.append(int(round(c))) pred_f3.append(c) with open("./雷雨轩_PB18111791_4.txt", "w") as f: for ratings in pred1: f.write(str(ratings) + "\n") with open("./4_float.txt", "w") as f: for ratings in pred_f1:
pred1 = algo1.predict(uid, iid, verbose=True) #KNNWithMeans algo2 = KNNWithMeans(k=30, sim_options={ 'name': 'cosine', 'user_based': False }, verbose=True) algo2.fit(trainset) pred2 = algo2.predict(uid, iid, verbose=True) #KNNWithZScore f algo3 = KNNWithZScore(k=30, sim_options={ 'name': 'MSD', 'user_based': True }, verbose=True) algo3.fit(trainset) pred3 = algo3.predict(uid, iid, verbose=True) #KNNBaseline algo4 = KNNBaseline(k=30, sim_options={ 'name': 'MSD', 'user_based': True }, verbose=True) algo4.fit(trainset) pred4 = algo4.predict(uid, iid, verbose=True)
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1) data = Dataset.load_from_file('./ratings.csv', reader=reader) train_set = data.build_full_trainset() from surprise import KNNWithZScore algo = KNNWithZScore(k=50, sim_options={ 'user_based': False, 'verbose': 'True' }) algo.fit(train_set) uid = str(196) iid = str(332) pred = algo.predict(uid, iid, r_ui=4, verbose=True) kf = KFold(n_splits=3) for trainset, testset in kf.split(data): algo.fit(trainset) predictions = algo.test(testset) #计算RMSE,AME accuracy.rmse(predictions, verbose=True) accuracy.mae(predictions, verbose=True) ### 使用协同过滤正态分布 User based from surprise import KNNWithZScore algo = KNNWithZScore(k=50, sim_options={ 'user_based': False, 'verbose': 'True'