def computeSlopeOne(data, test_np): """Compute the slope one method and return the predictions on the test The method has no parameter. data : data frame which represent the train set test_np : data frame on which the prediction will be returned return : test_np with a column of prediction named 'slopeone_rating'""" trainset, test = dataTrainSurprise(data, test_np) slopeone = SlopeOne().fit(trainset) test['slopeone_rating'] = test[['user_id', 'movie_id']] \ .apply(lambda row: slopeone.predict(row['user_id'], row['movie_id'])[3], axis=1) return test
def SlopeOne(self, namefile, uid, iid, rati, value_uid, value_iid): test_data = pd.read_csv('./container/' + namefile) dt = pd.DataFrame(test_data) # Retrieve the trainset. reader = Reader(rating_scale=(0, 100)) data = Dataset.load_from_df(dt[[uid, iid, rati]], reader) trainset = data.build_full_trainset() algo = SlopeOne() algo.fit(trainset) pred = algo.predict(float(value_uid), float(value_iid), r_ui=1, verbose=True) #var_rmse = accuracy.rmse(pred) #return result to json jsondata = {} jsondata["uid"] = pred.uid jsondata["idd"] = pred.iid jsondata["rati"] = round(pred.est, 2) return jsondata
def SlopeOne_from_to(self, namefile, uid, iid, rati, from_uid, to_uid, from_iid, to_iid): test_data = pd.read_csv('./container/' + namefile) dt = pd.DataFrame(test_data) # Retrieve the trainset. reader = Reader(rating_scale=(0, 100)) data = Dataset.load_from_df(dt[[uid, iid, rati]], reader) trainset = data.build_full_trainset() algo = SlopeOne() algo.fit(trainset) arr = [] for value_uid in range(from_uid, to_uid): for value_iid in range(from_iid, to_iid): pred = algo.predict(value_uid, value_iid, r_ui=1, verbose=True) tempdata = [] tempdata.append(pred.uid) tempdata.append(pred.iid) tempdata.append(round(pred.est, 2)) arr.append(tempdata) #return result to json return arr
def slopeone(train, test, ids, Xtest, Xids): """ Item based algorithm, reduces overfitting Argument : train, the trainset test, the testset ids, unknown ratings Xtest, predicted ratings for testset, to be used for final blending Xids, predicted ratings for unknown ratings, to be used for final blending """ print('SlopeOne') algo = SlopeOne() #Train algorithm on training set algo.fit(train) #Predict on train and compute RMSE predictions = algo.test(train.build_testset()) print(' Training RMSE: ', accuracy.rmse(predictions, verbose=False)) #Predict on test and compute RMSE predictions = algo.test(test) rmse = accuracy.rmse(predictions, verbose=False) print(' Test RMSE: ', rmse) preds_test = np.zeros(len(predictions)) for j, pred in enumerate(predictions): preds_test[j] = pred.est #Predict unknown ratings preds_ids = [] for i in range(len(ids[0])): pred = algo.predict(str(ids[0][i]), str(ids[1][i])) preds_ids.append(pred.est) Xtest.append(preds_test) Xids.append(preds_ids) return rmse, Xtest, Xids, preds_test, preds_ids
from surprise.model_selection import KFold import pandas as pd import io import pandas as pd # 读取物品(电影)名称信息 def read_item_names(): file_name = ('./movies.csv') data = pd.read_csv('./movies.csv') rid_to_name = {} name_to_rid = {} for i in range(len(data['movieId'])): rid_to_name[data['movieId'][i]] = data['title'][i] name_to_rid[data['title'][i]] = data['movieId'][i] return rid_to_name, name_to_rid # 数据读取 reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1) data = Dataset.load_from_file('./ratings.csv', reader=reader) train_set = data.build_full_trainset() # 使用SlopeOne算法 algo = SlopeOne() algo.fit(train_set) # 对指定用户和商品进行评分预测 uid = str(196) iid = str(302) pred = algo.predict(uid, iid, r_ui=4, verbose=True)
Pred_Test_KNN = [] Pred_Test_BSL = [] start = time.time() for line in data_test: Pred_Test_KNN.append( alg_KNN.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_SVD.append( alg_SVD.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_NMF.append( alg_NMF.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_SL1.append( alg_SL1.predict(str(line[1]), str(line[0]), clip=False).est) Pred_Test_BSL.append( alg_BSL.predict(str(line[1]), str(line[0]), clip=False).est) end = time.time() print("***********************************************") print("Exe time:") print(end - start) X_Test = np.matrix([ Pred_Test_SVD, Pred_Test_NMF, Pred_Test_SL1, Pred_Test_KNN, Pred_Test_BSL ]) X_Test = X_Test.T # %% Prior Based
def compute_recommendations(user_id, prediction_table, numeric_prediction_table): algo = 'SlopeOne' algorithm = SlopeOne() # add_pageview(user_id=user_id, item_id=None, page="Model Predictions", activity_type="Initialize Predictions - " + algo, rating=None) #pageview engine = create_engine(config.DB_URI, echo=True) session = scoped_session( sessionmaker(bind=engine, autocommit=False, autoflush=False)) #reading in the database df_ratings = pd.read_sql('SELECT * FROM ratings;', con=engine) df_ratings = df_ratings[['user_id', 'item_id', 'rating']] df_ratings = df_ratings.dropna() df_ratings = df_ratings.drop_duplicates() df_ratings2 = pd.read_csv('data/ratings.csv', low_memory=False) df_ratings2 = df_ratings2.rename(columns={'movie_id': 'item_id'}) df_ratings2 = df_ratings2[['user_id', 'item_id', 'rating']] df_ratings2 = df_ratings2.dropna() df_ratings2 = df_ratings2.drop_duplicates() df_ratings = pd.concat([df_ratings, df_ratings2], axis=0) reader = Reader(line_format='user item rating', sep=',', rating_scale=(1, 10)) data = Dataset.load_from_df(df_ratings, reader=reader) trainset = data.build_full_trainset() # algorithm = eval(algo + "()")# set the algorithm............................................... algorithm.train(trainset) items = pd.read_sql('SELECT distinct id FROM items;', con=engine) df_user_items = df_ratings.loc[df_ratings['user_id'] == user_id] total_items = items.id.unique() user_items = df_user_items.item_id.unique() # user_id = str(user_id) prediction_items = [x for x in total_items if x not in user_items] predictions = pd.DataFrame(columns=['user_id', 'item_id', 'prediction']) predicted_ratings = [] for i in prediction_items: a = user_id b = i est = algorithm.predict(a, b) predicted_ratings.append(est[3]) predictions['item_id'] = prediction_items predictions['user_id'] = pd.Series( [user_id for x in range(len(predictions.index))], index=predictions.index) predictions['prediction'] = predicted_ratings predictions = predictions.sort_values('prediction', ascending=False) test_prediction = predictions predictions = predictions.head(n=10) cols = [ 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5', 'pred_6', 'pred_7', 'pred_8', 'pred_9', 'pred_10' ] df_pred = predictions[['item_id']].T df_pred.columns = cols df_pred['id'] = user_id df_pred = df_pred[[ 'id', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5', 'pred_6', 'pred_7', 'pred_8', 'pred_9', 'pred_10' ]] df_pred['id'] = df_pred['id'].astype(int) df_pred.to_sql(prediction_table, engine, if_exists='append', index=False) #if_exists='append' session.commit() df_num_ratings = test_prediction df_num_ratings = df_num_ratings.head(n=20) df_num_ratings['algorithm'] = algo df_num_ratings.rename(columns={'prediction': 'predicted_rating'}, inplace=True) df_num_ratings.to_sql('numeric_predictions', engine, if_exists='append', index=False) #if_exists='append' session.commit() predcols = [ 'num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'num_6', 'num_7', 'num_8', 'num_9', 'num_10' ] df_num_ratings_transpose = predictions[['prediction']].T df_num_ratings_transpose.columns = predcols df_num_ratings_transpose['id'] = user_id df_num_ratings_transpose = df_num_ratings_transpose[[ 'id', 'num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'num_6', 'num_7', 'num_8', 'num_9', 'num_10' ]] df_num_ratings_transpose['id'] = df_num_ratings_transpose['id'].astype(int) df_num_ratings_transpose.to_sql(numeric_prediction_table, engine, if_exists='append', index=False) #if_exists='append' session.commit()
from surprise import SlopeOne, BaselineOnly, Reader, evaluate, Dataset import os from surprise.model_selection import cross_validate file_path = os.path.expanduser('um/separated/probe_training_data.dta') reader = Reader(line_format='user item timestamp rating', sep='\t') data = Dataset.load_from_file("um/separated/probe_training_data.dta", reader=reader) algo = SlopeOne() trainset = data.build_full_trainset() algo.train(trainset) #userid = 1 #itemid = 3912 output = open("um/output/slopeone.dta", "w") for u in range(1, 458294): for i in range(1, 17771): pred = algo.predict(uid=u, iid=i, verbose=2) output.write(str(pred) + "\n") output.close()
from surprise.model_selection import train_test_split # 加载movielens-100k数据集 data = Dataset.load_builtin('ml-100k') # 训练集和测试集划分 train, test = train_test_split(data, test_size=.15) # SlopeOne算法 slope = SlopeOne() slope.fit(train) # 预测第222用户对第750电影评分 uid = str(222) iid = str(750) pred = slope.predict(uid, iid, r_ui=5, verbose=True) # ######结果如下###### # user: 222 # item: 750 # r_ui = 5.00 # est = 3.97 # {'was_impossible': False} # 预测第222用户对第750电影评分为3.97 test_pred = slope.test(test) # RMSE和MAE print("RMSE: " + str(accuracy.rmse(test_pred, verbose=True))) print("MAE: " + str(accuracy.mae(test_pred, verbose=True)))