def SlopeOne_train(self): ''' seed:int-3划分训练集测试集的随机种子 k:int-40,最大邻居数量 options:dict-{'name': 'pearson', 'user_based': False},算法的选项,默认为Pearson相似度,基于项目的方法 ''' self.algos = [] df = self.trainDatas names = locals() r = Reader(rating_scale=(1, 5)) # 读取、划分数据;训练预测数据 total = Dataset.load_from_df(df[['uid', 'iid', 'total']], reader=r) total_train = total.build_full_trainset() total_algo = SlopeOne() total_algo.fit(total_train) self.algos.append(total_algo) for i in range(1, self.no_of_criteria + 1): names['c' + str(i)] = Dataset.load_from_df( df[['uid', 'iid', 'c' + str(i)]], reader=r) names['c' + str(i) + '_train'] = names.get('c' + str(i)).build_full_trainset() names['algo_c' + str(i)] = SlopeOne() names.get('algo_c' + str(i)).fit(names.get('c' + str(i) + '_train')) self.algos.append(names.get('algo_c' + str(i)))
def slope_one(test, train, all): start = time.time() reader = Reader(rating_scale=(0.5, 5)) data = Dataset.load_from_df(train[['userId', 'movieId', 'rating']], reader) test_data = Dataset.load_from_df(test[['userId', 'movieId', 'rating']], reader) trainset = data.build_full_trainset() testset = test_data.build_full_trainset().build_testset() algo = SlopeOne() algo.fit(trainset) fit = time.time() fit_time = fit - start predictions = algo.test(testset) uid = [] mid = [] rate = [] for i in range(len(predictions)): uid.append(predictions[i].uid) mid.append(predictions[i].iid) rate.append(predictions[i].est) out = {'userId': uid, 'movieId': mid, 'rating': rate} out = pd.DataFrame.from_dict(out) predict_time = time.time() - fit overall = predict_time + fit - start return out, [fit_time, predict_time, overall]
def surprise_slopeOne(train_file, test_file): """ SlopeOne with Surprise library. Compute the predictions on a test_set after training on a train_set using the method SlopeOne from Surprise. Args: train_file (string): path to created test file test_file (string): path to created train file Hyperparameters: - Returns: numpy array: predictions """ print("slopeone") algo = SlopeOne() fold = [(train_file, test_file)] reader = Reader(line_format='user item rating', sep=',') data = Dataset.load_from_folds(fold, reader=reader) pkf = PredefinedKFold() for trainset, testset in pkf.split(data): # Train algo.fit(trainset) # Predict predictions = algo.test(testset) pred = np.zeros(len(predictions)) for i in range(len(predictions)): val = predictions[i].est pred[i] = val return pred
def SlopeOne_alg(): print('Using SlopeOne') alg = SlopeOne() print(alg) alg.fit(trainset) predictions = alg.test(testset) print(accuracy.rmse(predictions))
def get(self, user_id): # SQL query conn = mysql.connect() cursor = conn.cursor() df = pd.read_sql_query("SELECT * FROM story_reviews", conn) # Data and Model reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(df[['user_id', 'story_id', 'star']], reader) model = SlopeOne() # Training training_set = data.build_full_trainset() model.fit(training_set) # Prediction anti_training_set = training_set.build_anti_testset() prediction_set = [x for x in anti_training_set if x[0]==user_id] predictions = model.test(prediction_set) # Return Top N Recommendations n = 10 predictions.sort(key=lambda x:x.est, reverse=True) top_n_predictions = predictions[:n] story_recommendations = [] for predictionItem in top_n_predictions: story_recommendations.append(predictionItem.iid) return jsonify(recommendations = story_recommendations)
def slopeOne(trainset, testset): # Slope One print("\n" + "-" * 5 + " SlopeOne algorithm using surprise package " + "-" * 5) algo = SlopeOne() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def SlopeOne(self, namefile, uid, iid, rati, value_uid, value_iid): test_data = pd.read_csv('./container/' + namefile) dt = pd.DataFrame(test_data) # Retrieve the trainset. reader = Reader(rating_scale=(0, 100)) data = Dataset.load_from_df(dt[[uid, iid, rati]], reader) trainset = data.build_full_trainset() algo = SlopeOne() algo.fit(trainset) pred = algo.predict(float(value_uid), float(value_iid), r_ui=1, verbose=True) #var_rmse = accuracy.rmse(pred) #return result to json jsondata = {} jsondata["uid"] = pred.uid jsondata["idd"] = pred.iid jsondata["rati"] = round(pred.est, 2) return jsondata
def SlopeOne_from_to(self, namefile, uid, iid, rati, from_uid, to_uid, from_iid, to_iid): test_data = pd.read_csv('./container/' + namefile) dt = pd.DataFrame(test_data) # Retrieve the trainset. reader = Reader(rating_scale=(0, 100)) data = Dataset.load_from_df(dt[[uid, iid, rati]], reader) trainset = data.build_full_trainset() algo = SlopeOne() algo.fit(trainset) arr = [] for value_uid in range(from_uid, to_uid): for value_iid in range(from_iid, to_iid): pred = algo.predict(value_uid, value_iid, r_ui=1, verbose=True) tempdata = [] tempdata.append(pred.uid) tempdata.append(pred.iid) tempdata.append(round(pred.est, 2)) arr.append(tempdata) #return result to json return arr
def slopeone(train, test, ids, Xtest, Xids): """ Item based algorithm, reduces overfitting Argument : train, the trainset test, the testset ids, unknown ratings Xtest, predicted ratings for testset, to be used for final blending Xids, predicted ratings for unknown ratings, to be used for final blending """ print('SlopeOne') algo = SlopeOne() #Train algorithm on training set algo.fit(train) #Predict on train and compute RMSE predictions = algo.test(train.build_testset()) print(' Training RMSE: ', accuracy.rmse(predictions, verbose=False)) #Predict on test and compute RMSE predictions = algo.test(test) rmse = accuracy.rmse(predictions, verbose=False) print(' Test RMSE: ', rmse) preds_test = np.zeros(len(predictions)) for j, pred in enumerate(predictions): preds_test[j] = pred.est #Predict unknown ratings preds_ids = [] for i in range(len(ids[0])): pred = algo.predict(str(ids[0][i]), str(ids[1][i])) preds_ids.append(pred.est) Xtest.append(preds_test) Xids.append(preds_ids) return rmse, Xtest, Xids, preds_test, preds_ids
def slope_one(self): """ SlopeOne to reflect how much one item is liked over than another. Returns: predictions_df: The predictions of the model on the test data in Pandas Data Frame format """ algorithm = SlopeOne() predictions = algorithm.fit(self.train_data).test(self.test_data) predictions_df = self.data.test_df.copy() predictions_df['Rating'] = [x.est for x in predictions] if self.test_purpose: self.evalueate_model(predictions_df['Rating'], 'Surprise slope_one') return predictions_df
def fit_slopeone(trainset): logging.info(f'Recommendation System: Fit model - SlopeOne') model = SlopeOne() model_fitted = model.fit(trainset) return model_fitted
from surprise import accuracy from surprise.model_selection import KFold import pandas as pd import io import pandas as pd # 读取物品(电影)名称信息 def read_item_names(): file_name = ('./movies.csv') data = pd.read_csv('./movies.csv') rid_to_name = {} name_to_rid = {} for i in range(len(data['movieId'])): rid_to_name[data['movieId'][i]] = data['title'][i] name_to_rid[data['title'][i]] = data['movieId'][i] return rid_to_name, name_to_rid # 数据读取 reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1) data = Dataset.load_from_file('./ratings.csv', reader=reader) train_set = data.build_full_trainset() # 使用SlopeOne算法 algo = SlopeOne() algo.fit(train_set) # 对指定用户和商品进行评分预测 uid = str(196) iid = str(302) pred = algo.predict(uid, iid, r_ui=4, verbose=True)
# #### Run cross-validation with the best algo (in the specific case SlopeOne) # In[14]: # SlopeOne algorithm gave us the best rmse, therefore, we will train and predict with SlopeOne print('Executing SlopeOne') algo = SlopeOne() cross_validate(algo, data, measures=['RMSE'], cv=3, verbose=False) # #### Train and test the chosen algorithm # In[15]: trainset, testset = s_train_test_split(data, test_size=0.25) predictions = algo.fit(trainset).test(testset) accuracy.rmse(predictions) # #### Get detailed results for predictions/recommendations # In[16]: # inspect our predictions in details def get_Iu(uid): """ return the number of items clicked by given user args: uid: the id of the user returns: the number of items clicked by the user
from surprise import Dataset from surprise import accuracy from surprise import SlopeOne from surprise.model_selection import train_test_split # Load the movielens-100k dataset UserID::MovieID::Rating::Timestamp data = Dataset.load_builtin('ml-1m') trainset, testset = train_test_split(data, test_size=.15) # Configura o algoritmo. K = número de vizinhos. Name = Tipo de medida de similiradade. User based = filtragem por usuário ou item. print("Usando o algoritmo SlopeOne") algoritmo = SlopeOne() algoritmo.fit(trainset) # Selecionamos o usuário e o filme que será analisado # User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas uid = str(49) # Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4 iid = str(2058) # raw item id # get a prediction for specific users and items. print("Predição de avaliação: ") pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True) # run the trained model against the testset test_pred = algoritmo.test(testset) # Avalia RMSE
start = time.time() alg_SVD.fit(data_train.build_full_trainset()) end = time.time() print("***********************************************") print("Exe time:") print(end - start) # %% Best Hyper-parameters Training - Slope One alg_SL1 = SlopeOne() start = time.time() alg_SL1.fit(data_train.build_full_trainset()) end = time.time() print("***********************************************") print("Exe time:") print(end - start) # %% Best Hyper-parameters Training - KNN sim_options = { 'name': 'msd', 'user_based': True # compute similarities between users } alg_KNN = KNNBasic(sim_options=sim_options) start = time.time()
listOfRMSE.append(accuracy.rmse(predictionscoClustering)) models.append('CoClustering') # ## SlopeOne Implementation # In[89]: from surprise import SlopeOne slopeOne = SlopeOne() # Train the algorithm on the trainset, and predict ratings for the testset trainsetslopeOne = dataCoClustering.build_full_trainset() slopeOne.fit(trainsetslopeOne) testslopeOne = trainsetslopeOne.build_anti_testset() predictionsslopeOne = slopeOne.test(testslopeOne) accuracy.rmse(predictionsslopeOne) listOfRMSE.append(accuracy.rmse(predictionsslopeOne)) models.append('SlopeOne') # In[91]: models
for i in range(5): movieRecc = topMovies[i] movieRawID = movieRecc[0] movieName = movie[movieRawID] print(str(i+1) + '. ' + movieName ) #################predictions using Slope-One print('') print('Making more recommendations...') algo2 = SlopeOne() algo2.fit(trainset) predictions2 = algo2.test(testset) dictMovies2 = get_top_n(predictions2) topMovies2 = dictMovies2.get(672) print('') print('Here are the top 5 recommendations based on Slope-One! ') for i in range(5): movieRecc2 = topMovies2[i] movieRawID2 = movieRecc2[0] movieName2 = movie[movieRawID2] print(str(i+1) + '. ' + movieName2 )
# coding=utf-8 from surprise import Dataset from surprise import Reader from surprise import accuracy from surprise import NMF from surprise import SlopeOne from surprise.model_selection import train_test_split from surprise import dump filePath = r'E:\library\Final_lib\collaboration\coll2.csv' reader = Reader(line_format='user item rating', sep=',', rating_scale=(1, 5)) data = Dataset.load_from_file(filePath, reader=reader) trainset, testset = train_test_split(data, test_size=0.2) # algo = BaselineOnly() # algo.fit(trainset) algo = SlopeOne() algo.fit(trainset) predictions = algo.test(testset) accuracy.mae(predictions) accuracy.rmse(predictions) accuracy.fcp(predictions) dump.dump(r'E:\library\Final_lib\collaboration\ungrouped_algo\SlopeOne.txt', predictions=predictions, algo=algo)
''' from surprise import accuracy from surprise import Dataset from surprise import SlopeOne from surprise.model_selection import train_test_split # 加载movielens-100k数据集 data = Dataset.load_builtin('ml-100k') # 训练集和测试集划分 train, test = train_test_split(data, test_size=.15) # SlopeOne算法 slope = SlopeOne() slope.fit(train) # 预测第222用户对第750电影评分 uid = str(222) iid = str(750) pred = slope.predict(uid, iid, r_ui=5, verbose=True) # ######结果如下###### # user: 222 # item: 750 # r_ui = 5.00 # est = 3.97 # {'was_impossible': False} # 预测第222用户对第750电影评分为3.97 test_pred = slope.test(test)