def slope_one(trainset, testset, predset): modelname = 'slopeone' # Check if predictions already exist if is_already_predicted(modelname): return algo = SlopeOne() print('SlopeOne Model') algo.train(trainset) predictions = algo.test(trainset.build_testset()) print(' RMSE on Train: ', accuracy.rmse(predictions, verbose=False)) predictions = algo.test(testset) rmse = accuracy.rmse(predictions, verbose=False) print(' RMSE on Test: ', rmse) preds = np.zeros(len(predictions)) for j, pred in enumerate(predictions): preds[j] = pred.est save_predictions(modelname, rmse, preds, 'test') print(' Evaluate predicted ratings...') predictions = algo.test(predset) preds = np.zeros(len(predictions)) for j, pred in enumerate(predictions): preds[j] = pred.est save_predictions(modelname, rmse, preds)
def get(self, user_id): # SQL query conn = mysql.connect() cursor = conn.cursor() df = pd.read_sql_query("SELECT * FROM story_reviews", conn) # Data and Model reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(df[['user_id', 'story_id', 'star']], reader) model = SlopeOne() # Training training_set = data.build_full_trainset() model.fit(training_set) # Prediction anti_training_set = training_set.build_anti_testset() prediction_set = [x for x in anti_training_set if x[0]==user_id] predictions = model.test(prediction_set) # Return Top N Recommendations n = 10 predictions.sort(key=lambda x:x.est, reverse=True) top_n_predictions = predictions[:n] story_recommendations = [] for predictionItem in top_n_predictions: story_recommendations.append(predictionItem.iid) return jsonify(recommendations = story_recommendations)
def surprise_slopeOne(train_file, test_file): """ SlopeOne with Surprise library. Compute the predictions on a test_set after training on a train_set using the method SlopeOne from Surprise. Args: train_file (string): path to created test file test_file (string): path to created train file Hyperparameters: - Returns: numpy array: predictions """ print("slopeone") algo = SlopeOne() fold = [(train_file, test_file)] reader = Reader(line_format='user item rating', sep=',') data = Dataset.load_from_folds(fold, reader=reader) pkf = PredefinedKFold() for trainset, testset in pkf.split(data): # Train algo.fit(trainset) # Predict predictions = algo.test(testset) pred = np.zeros(len(predictions)) for i in range(len(predictions)): val = predictions[i].est pred[i] = val return pred
def slope_one(test, train, all): start = time.time() reader = Reader(rating_scale=(0.5, 5)) data = Dataset.load_from_df(train[['userId', 'movieId', 'rating']], reader) test_data = Dataset.load_from_df(test[['userId', 'movieId', 'rating']], reader) trainset = data.build_full_trainset() testset = test_data.build_full_trainset().build_testset() algo = SlopeOne() algo.fit(trainset) fit = time.time() fit_time = fit - start predictions = algo.test(testset) uid = [] mid = [] rate = [] for i in range(len(predictions)): uid.append(predictions[i].uid) mid.append(predictions[i].iid) rate.append(predictions[i].est) out = {'userId': uid, 'movieId': mid, 'rating': rate} out = pd.DataFrame.from_dict(out) predict_time = time.time() - fit overall = predict_time + fit - start return out, [fit_time, predict_time, overall]
def SlopeOne_alg(): print('Using SlopeOne') alg = SlopeOne() print(alg) alg.fit(trainset) predictions = alg.test(testset) print(accuracy.rmse(predictions))
def slopeOne(trainset, testset): # Slope One print("\n" + "-" * 5 + " SlopeOne algorithm using surprise package " + "-" * 5) algo = SlopeOne() algo.fit(trainset) predictions = algo.test(testset) rmse = accuracy.rmse(predictions) mae = accuracy.mae(predictions) return rmse, mae, predictions
def slopeone(train, test, ids, Xtest, Xids): """ Item based algorithm, reduces overfitting Argument : train, the trainset test, the testset ids, unknown ratings Xtest, predicted ratings for testset, to be used for final blending Xids, predicted ratings for unknown ratings, to be used for final blending """ print('SlopeOne') algo = SlopeOne() #Train algorithm on training set algo.fit(train) #Predict on train and compute RMSE predictions = algo.test(train.build_testset()) print(' Training RMSE: ', accuracy.rmse(predictions, verbose=False)) #Predict on test and compute RMSE predictions = algo.test(test) rmse = accuracy.rmse(predictions, verbose=False) print(' Test RMSE: ', rmse) preds_test = np.zeros(len(predictions)) for j, pred in enumerate(predictions): preds_test[j] = pred.est #Predict unknown ratings preds_ids = [] for i in range(len(ids[0])): pred = algo.predict(str(ids[0][i]), str(ids[1][i])) preds_ids.append(pred.est) Xtest.append(preds_test) Xids.append(preds_ids) return rmse, Xtest, Xids, preds_test, preds_ids
# A reader is still needed but only the rating_scale param is requiered. reader = Reader(rating_scale=(0.5, 5)) # The columns must correspond to user id, item id and ratings (in that order). rating_train2 = Dataset.load_from_df( rating_train[['userID', 'itemID', 'rating']], reader) rating_test2 = Dataset.load_from_df( rating_test[['userID', 'itemID', 'rating']], reader) trainset = rating_train2.build_full_trainset() testset = rating_test2.build_full_trainset().build_testset() #SlopeOne Model count = 1 start = dt.datetime.today() print("================================================") algo = SlopeOne() algo.train(trainset) #print("This is the #" + str(count) + " parameter combination") predictions = algo.test(testset) #print("n_factors="+str(i)+", n_epochs="+str(j)+", lr_all="+str(k)+", reg_all="+str(m)) accuracy.rmse(predictions, verbose=True) accuracy.fcp(predictions, verbose=True) accuracy.mae(predictions, verbose=True) count = count + 1 end = dt.datetime.today() print("Runtime: " + str(end - start))
from __future__ import (absolute_import, division, print_function, unicode_literals) from surprise import Dataset from surprise import SlopeOne from surprise import accuracy data = Dataset.load_builtin('ml-100k') algo = SlopeOne() trainset = data.build_full_trainset() algo.train(trainset) testset = trainset.build_testset() predictions = algo.test(testset) # RMSE should be low as we are biased accuracy.rmse(predictions, verbose=True) # ~ 0.68 (which is low) # We can also do this during a cross-validation procedure! print('CV procedure:') data.split(3) for i, (trainset_cv, testset_cv) in enumerate(data.folds()): print('fold number', i + 1) algo.train(trainset_cv) print('On testset,', end=' ') predictions = algo.test(testset_cv) accuracy.rmse(predictions, verbose=True)
# Load the movielens-100k dataset UserID::MovieID::Rating::Timestamp data = Dataset.load_builtin('ml-1m') trainset, testset = train_test_split(data, test_size=.15) # Configura o algoritmo. K = número de vizinhos. Name = Tipo de medida de similiradade. User based = filtragem por usuário ou item. print("Usando o algoritmo SlopeOne") algoritmo = SlopeOne() algoritmo.fit(trainset) # Selecionamos o usuário e o filme que será analisado # User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas uid = str(49) # Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4 iid = str(2058) # raw item id # get a prediction for specific users and items. print("Predição de avaliação: ") pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True) # run the trained model against the testset test_pred = algoritmo.test(testset) # Avalia RMSE print("Avaliação RMSE: ") accuracy.rmse(test_pred, verbose=True) # Avalia MAE print("Avaliação MAE: ") accuracy.mae(test_pred, verbose=True)
movieRecc = topMovies[i] movieRawID = movieRecc[0] movieName = movie[movieRawID] print(str(i+1) + '. ' + movieName ) #################predictions using Slope-One print('') print('Making more recommendations...') algo2 = SlopeOne() algo2.fit(trainset) predictions2 = algo2.test(testset) dictMovies2 = get_top_n(predictions2) topMovies2 = dictMovies2.get(672) print('') print('Here are the top 5 recommendations based on Slope-One! ') for i in range(5): movieRecc2 = topMovies2[i] movieRawID2 = movieRecc2[0] movieName2 = movie[movieRawID2] print(str(i+1) + '. ' + movieName2 ) #############predictions using Co-Clustering
# ## SlopeOne Implementation # In[89]: from surprise import SlopeOne slopeOne = SlopeOne() # Train the algorithm on the trainset, and predict ratings for the testset trainsetslopeOne = dataCoClustering.build_full_trainset() slopeOne.fit(trainsetslopeOne) testslopeOne = trainsetslopeOne.build_anti_testset() predictionsslopeOne = slopeOne.test(testslopeOne) accuracy.rmse(predictionsslopeOne) listOfRMSE.append(accuracy.rmse(predictionsslopeOne)) models.append('SlopeOne') # In[91]: models # In[105]:
data = Dataset.load_builtin('ml-100k') # 训练集和测试集划分 train, test = train_test_split(data, test_size=.15) # SlopeOne算法 slope = SlopeOne() slope.fit(train) # 预测第222用户对第750电影评分 uid = str(222) iid = str(750) pred = slope.predict(uid, iid, r_ui=5, verbose=True) # ######结果如下###### # user: 222 # item: 750 # r_ui = 5.00 # est = 3.97 # {'was_impossible': False} # 预测第222用户对第750电影评分为3.97 test_pred = slope.test(test) # RMSE和MAE print("RMSE: " + str(accuracy.rmse(test_pred, verbose=True))) print("MAE: " + str(accuracy.mae(test_pred, verbose=True))) # ######结果如下###### # RMSE: 0.9517 # MAE: 0.7460