def cal_KNNWithZScore(trainset, df): # KNN With ZScore sim_options = {'name': 'cosine', 'user-based': True} algo_knnz = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) algo_knnz.fit(trainset) users = [] items = [] real = [] estimate = [] for i in range(len(df)): uid = df[i:i + 1].user.values[0] users.append(uid) iid = df[i:i + 1].store.values[0] items.append(iid) r_ui = df[i:i + 1].stars.values[0] real.append(r_ui) pred = algo.predict(uid, iid, r_ui, verbose=True) estimate.append(pred) print("end") # knn With Means df5 = pd.DataFrame(columns=['user', 'item', 'r_ui', 'est']) df5['user'] = users df5['item'] = items df5['r_ui'] = real df5['est'] = estimate #df3.head() df5['est'] = df5['est'].apply(lambda x: x[-2]) df5['err'] = abs(df5.est - df5.r_ui) df5.to_csv(save_file2)
def CFZ(self): u_id = [] I_id = [] r_ui_ = np.array([]) _est = np.array([]) sim_options = {'name': 'cosine', 'user_based': True} algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) u_id.append(int(pred.uid)) I_id.append(int(pred.iid)) r_ui_ = np.append(r_ui_, pred.r_ui) _est = np.append(_est, pred.est) self.df_est = pd.DataFrame({ 'uid': u_id, 'Iid': I_id, 'r_ui': r_ui_, 'est': _est }) self.arr = self.df_est['uid'].unique() self.CFWZ_ndcg_ = self.Calculate_NDCG()
def collab_recommender(train_data, test_data, user_based=True, normalization=False, k=100, sim='cosine'): """ Input: - train_data: dataframe, n*3, columns are ['userid','movieid','rating'] - test_data: dataframe, n*2, columns are ['userid', 'movieid'] - user_base: boolean, use user-based knn algorithm if True, use item-based knn algorithm if False - normalization: boolean, conduct z-score normalization on user/item matrix if True - k: int, number of nearest neighbors - sim: string, define the similarity matrix from ['cosine', 'pearson', 'msd', 'pearson_baseline'] Output: - pred_rating: dataframe, n*2, columns are ['movieid', 'rating'] """ try: function_log.trace('Start collaborative recommendation function') reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(train_data, reader) sim_options = {'name': sim, 'user_based': user_based} if normalization: algo = KNNWithZScore(k=k, sim_options=sim_options, verbose=False) else: algo = KNNWithMeans(k=k, sim_options=sim_options, verbose=False) train_set = data.build_full_trainset() algo.fit(train_set) pred_rating = {'movieid': [], 'rating': []} for idx in test_data.index: pred_rating['movieid'].append(test_data.loc[idx, 'movieid']) pred = algo.predict(test_data.loc[idx, 'userid'], test_data.loc[idx, 'movieid']) pred_rating['rating'].append(pred.est) function_log.trace('Finish collaborative recommendation function') return pd.DataFrame(pred_rating) except ValueError: function_log.warn("Training and test data cannot be none.") raise ValueError except Exception as x: function_log.exception( f'collaborative recommendation function failed {x}')
def CFZ(self): sim_options = {'name': 'cosine', 'user_based': True} algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) return pred
def CFZ(self): kf = KFold(n_splits=5) sim_options = {'name': 'cosine', 'user_based': True} algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options) for trainset, testset in kf.split(self.data): algo.fit(trainset) predictions = algo.test(testset) precisions, recalls = self.precision_recall_at_k(predictions) P = sum(prec for prec in precisions.values()) / len(precisions) R = sum(rec for rec in recalls.values()) / len(recalls) F1 = 2 * P * R / (P + R) print("Precision : ", P) print("Recall : ", R) print("F1 : ", F1)
class KNN_Normalized(BaseSurpriseSTLEstimator): def __init__(self, k, name='KNN_Normalized'): super().__init__(name, 'non_feature_based') self.k = k self.model = KNNWithZScore(k=self.k, verbose=False) def _fit(self, x): self.model.fit(x) def _predict(self, x): return self.model.test(x) def get_hyper_params(self): hparams = {'k': {'type': 'integer', 'values': [2, 13]}} return hparams def set_hyper_params(self, **kwargs): self.k = kwargs['k'] def similarity_matrix(self): return self.model.compute_similarities()
data = Dataset.load_builtin('ml-100k') # Retrieve the trainset. trainset = data.build_full_trainset() # Create instances of the 4 KNN based algorithms algoBaseLine = KNNBaseline() algoBasic = KNNBasic() algoWithMeans = KNNWithMeans() algoWithZScore = KNNWithZScore() # Train the 4 algorithms on the training set algoBaseLine.fit(trainset) algoBasic.fit(trainset) algoWithMeans.fit(trainset) algoWithZScore.fit(trainset) uid1 = str(196) # raw user id (as in the ratings file). They are **strings**! uid2 = str(73) uid3 = str(423) uid4 = str(504) iid1 = str(306) # raw item id (as in the ratings file). They are **strings**! iid2 = str(514) iid3 = str(977) iid4 = str(370) r_ui1 = 4 r_ui2 = 4 r_ui3 = 1 r_ui4 = 3
} bsl_options_SGD = {'method': 'sgd', 'learning_rate': .00005, 'n_epochs': 20} bsl_options_ALS = {'method': 'als', 'n_epochs': 5, 'reg_u': 12, 'reg_i': 5} #algo = KNNWithMeans(k=40, min_k=1, sim_options=sim_options, verbose=True) algo1 = KNNBaseline(k=40, min_k=1, sim_options=sim_options1, bsl_options=bsl_options_ALS, verbose=True) algo1.fit(data_train.build_full_trainset()) algo2 = KNNWithZScore(k=40, min_k=1, sim_options=sim_options1, verbose=True) algo2.fit(data_train.build_full_trainset()) algo3 = KNNWithMeans(k=40, min_k=1, sim_options=sim_options2, verbose=True) algo3.fit(data_train.build_full_trainset()) pred1 = [] pred_f1 = [] pred2 = [] pred_f2 = [] pred3 = [] pred_f3 = [] with open("./data/testing.dat", "r", encoding='utf-8') as f: for line in f.readlines(): line_data = line.strip().split(",") a = algo1.predict(str(line_data[0]), str(line_data[1]), None, True, True)[3]
ratings = [i[2] for i in all_ratings] ratings_dict = { "user": user_ids, "item": food_ids, "rating": ratings, } df = pd.DataFrame(ratings_dict) reader = Reader(rating_scale=(1, 5)) # Loads Pandas dataframe training_user_data = Dataset.load_from_df(df[["user", "item", "rating"]], reader) trainingSet_user = training_user_data.build_full_trainset() user_based.fit(trainingSet_user) training_item_data = Dataset.load_from_df(df[["item", "user", "rating"]], reader) trainingSet_item = training_item_data.build_full_trainset() item_based.fit(trainingSet_item) """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """ " USER_BASED FOOD_ITEMS " """ """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" # Get all the food items based on the user's specified filters def get_data_to_recommend(parameters): if parameters[5] == "true": available_query = """AND F.AVAILABLE = %s AND NOW() BETWEEN RES.OPENING_TIME AND RES.CLOSING_TIME"""
sep=",", rating_scale=(1, 5), skip_lines=1) # * loading the csv data = Dataset.load_from_file( file_path="../../ML_Dataset/ml-latest-small/ratings.csv", reader=reader) # * dividing in train and test sets trainset, testset = train_test_split(data, test_size=0.25) # * define a cross-validation iterator kf = KFold(n_splits=5) # * Choosing KNN with Z-Score as algorithm algo = KNNWithZScore() # * Train the algorithm on the trainset, and predict ratings for the testset for trainset, testset in kf.split(data): predictions = algo.fit(trainset).test(testset) precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4) accuracy.rmse(predictions) accuracy.mae(predictions) accuracy.mse(predictions) accuracy.fcp(predictions) print("Precision: ", sum(prec for prec in precisions.values()) / len(precisions)) print("Recall: ", sum(rec for rec in recalls.values()) / len(recalls)) df = pd.DataFrame(predictions, columns=["uid", "iid", "rui", "est", "details"]) df["err"] = abs(df.est - df.rui) df.to_csv("predictions_KNNZScore.csv")
pred1 = algo1.predict(uid, iid, verbose=True) #KNNWithMeans algo2 = KNNWithMeans(k=30, sim_options={ 'name': 'cosine', 'user_based': False }, verbose=True) algo2.fit(trainset) pred2 = algo2.predict(uid, iid, verbose=True) #KNNWithZScore f algo3 = KNNWithZScore(k=30, sim_options={ 'name': 'MSD', 'user_based': True }, verbose=True) algo3.fit(trainset) pred3 = algo3.predict(uid, iid, verbose=True) #KNNBaseline algo4 = KNNBaseline(k=30, sim_options={ 'name': 'MSD', 'user_based': True }, verbose=True) algo4.fit(trainset) pred4 = algo4.predict(uid, iid, verbose=True)
print(news.head) # 数据读取 reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1) data = Dataset.load_from_file('./ratings.csv', reader=reader) train_set = data.build_full_trainset() from surprise import KNNWithZScore algo = KNNWithZScore(k=50, sim_options={ 'user_based': False, 'verbose': 'True' }) algo.fit(train_set) uid = str(196) iid = str(332) pred = algo.predict(uid, iid, r_ui=4, verbose=True) kf = KFold(n_splits=3) for trainset, testset in kf.split(data): algo.fit(trainset) predictions = algo.test(testset) #计算RMSE,AME accuracy.rmse(predictions, verbose=True) accuracy.mae(predictions, verbose=True) ### 使用协同过滤正态分布 User based from surprise import KNNWithZScore algo = KNNWithZScore(k=50,
We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40 We train the model on train set ''' algo2 = KNNBasic(sim_options=sim_options, k=40, min_k=1) algo2.fit(trainset) predictions2 = algo2.test(testset) print("RMSE for KNNBasic:", accuracy.rmse(predictions2, verbose=True)) # In[ ]: ''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40 We train the model on train set ''' algo3 = KNNBaseline(sim_options=sim_options, k=40, min_k=1) algo3.fit(trainset) predictions3 = algo3.test(testset) print("RMSE for KNNBaseline:", accuracy.rmse(predictions3, verbose=True)) # In[ ]: ''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40 We train the model on train set ''' algo4 = KNNWithZScore(sim_options=sim_options, k=40, min_k=1) algo4.fit(trainset) predictions4 = algo4.test(testset) print("RMSE for KNNBasic:", accuracy.rmse(predictions4, verbose=True))
ratings = [i[2] for i in all_ratings] ratings_dict = { "user" : user_ids, "item" : food_ids, "rating": ratings, } df = pd.DataFrame(ratings_dict) reader = Reader(rating_scale=(1, 5)) # Loads Pandas dataframe training_data = Dataset.load_from_df(df[["user", "item", "rating"]], reader) trainingSet = training_data.build_full_trainset() algo.fit(trainingSet) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" " GROUP RECOMMENDER " """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" # Function to retrieve the information for all the restaurants from the database def get_all_restaurants(): sql = """SELECT RESTAURANT_ID, NAME, EMAIL, NUMBER, ADDRESS, RATING, OPENING_TIME, CLOSING_TIME FROM RESTAURANT""" mycursor.execute(sql) return mycursor.fetchall() # Function to retrieve all the food items for a specific restaurant def get_restaurant_foods(restaurant_id): parameters = (restaurant_id,)