예제 #1
0
 def SlopeOne_train(self):
     '''
     seed:int-3划分训练集测试集的随机种子
     k:int-40,最大邻居数量
     options:dict-{'name': 'pearson', 'user_based': False},算法的选项,默认为Pearson相似度,基于项目的方法
     '''
     self.algos = []
     df = self.trainDatas
     names = locals()
     r = Reader(rating_scale=(1, 5))
     # 读取、划分数据;训练预测数据
     total = Dataset.load_from_df(df[['uid', 'iid', 'total']], reader=r)
     total_train = total.build_full_trainset()
     total_algo = SlopeOne()
     total_algo.fit(total_train)
     self.algos.append(total_algo)
     for i in range(1, self.no_of_criteria + 1):
         names['c' + str(i)] = Dataset.load_from_df(
             df[['uid', 'iid', 'c' + str(i)]], reader=r)
         names['c' + str(i) +
               '_train'] = names.get('c' + str(i)).build_full_trainset()
         names['algo_c' + str(i)] = SlopeOne()
         names.get('algo_c' + str(i)).fit(names.get('c' + str(i) +
                                                    '_train'))
         self.algos.append(names.get('algo_c' + str(i)))
예제 #2
0
def slope_one(test, train, all):
    start = time.time()
    reader = Reader(rating_scale=(0.5, 5))
    data = Dataset.load_from_df(train[['userId', 'movieId', 'rating']], reader)
    test_data = Dataset.load_from_df(test[['userId', 'movieId', 'rating']],
                                     reader)
    trainset = data.build_full_trainset()
    testset = test_data.build_full_trainset().build_testset()
    algo = SlopeOne()
    algo.fit(trainset)
    fit = time.time()
    fit_time = fit - start
    predictions = algo.test(testset)
    uid = []
    mid = []
    rate = []
    for i in range(len(predictions)):
        uid.append(predictions[i].uid)
        mid.append(predictions[i].iid)
        rate.append(predictions[i].est)
    out = {'userId': uid, 'movieId': mid, 'rating': rate}
    out = pd.DataFrame.from_dict(out)
    predict_time = time.time() - fit
    overall = predict_time + fit - start
    return out, [fit_time, predict_time, overall]
예제 #3
0
def surprise_slopeOne(train_file, test_file):
    """
    SlopeOne with Surprise library.
    Compute the predictions on a test_set after training on a train_set using the method SlopeOne from Surprise.
    Args:
        train_file (string): path to created test file
        test_file (string): path to created train file
    Hyperparameters:
        -
    Returns:
        numpy array: predictions
    """
    print("slopeone")
    algo = SlopeOne()
    fold = [(train_file, test_file)]
    reader = Reader(line_format='user item rating', sep=',')
    data = Dataset.load_from_folds(fold, reader=reader)
    pkf = PredefinedKFold()
    for trainset, testset in pkf.split(data):
        # Train
        algo.fit(trainset)

        # Predict
        predictions = algo.test(testset)
    pred = np.zeros(len(predictions))
    for i in range(len(predictions)):
        val = predictions[i].est
        pred[i] = val
    return pred
def SlopeOne_alg():
    print('Using SlopeOne')
    alg = SlopeOne()
    print(alg)
    alg.fit(trainset)
    predictions = alg.test(testset)
    print(accuracy.rmse(predictions))
예제 #5
0
	def get(self, user_id):
		# SQL query
		conn = mysql.connect()
		cursor = conn.cursor()
		df = pd.read_sql_query("SELECT * FROM story_reviews", conn)

		# Data and Model
		reader = Reader(rating_scale=(1, 5))
		data = Dataset.load_from_df(df[['user_id', 'story_id', 'star']], reader)
		model = SlopeOne()
		
		# Training
		training_set = data.build_full_trainset()
		model.fit(training_set)

		# Prediction
		anti_training_set = training_set.build_anti_testset()
		prediction_set = [x for x in anti_training_set if x[0]==user_id]
		predictions = model.test(prediction_set)
		
		# Return Top N Recommendations
		n = 10
		predictions.sort(key=lambda x:x.est, reverse=True)
		top_n_predictions = predictions[:n]

		story_recommendations = []
		
		for predictionItem in top_n_predictions:
			story_recommendations.append(predictionItem.iid)

		return jsonify(recommendations = story_recommendations)
예제 #6
0
def slopeOne(trainset, testset):
    # Slope One
    print("\n" + "-" * 5 + " SlopeOne algorithm using surprise package " +
          "-" * 5)
    algo = SlopeOne()
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)
    return rmse, mae, predictions
예제 #7
0
 def SlopeOne(self, namefile, uid, iid, rati, value_uid, value_iid):
     test_data = pd.read_csv('./container/' + namefile)
     dt = pd.DataFrame(test_data)
     # Retrieve the trainset.
     reader = Reader(rating_scale=(0, 100))
     data = Dataset.load_from_df(dt[[uid, iid, rati]], reader)
     trainset = data.build_full_trainset()
     algo = SlopeOne()
     algo.fit(trainset)
     pred = algo.predict(float(value_uid),
                         float(value_iid),
                         r_ui=1,
                         verbose=True)
     #var_rmse = accuracy.rmse(pred)
     #return result to json
     jsondata = {}
     jsondata["uid"] = pred.uid
     jsondata["idd"] = pred.iid
     jsondata["rati"] = round(pred.est, 2)
     return jsondata
예제 #8
0
    def SlopeOne_from_to(self, namefile, uid, iid, rati, from_uid, to_uid,
                         from_iid, to_iid):
        test_data = pd.read_csv('./container/' + namefile)
        dt = pd.DataFrame(test_data)
        # Retrieve the trainset.
        reader = Reader(rating_scale=(0, 100))
        data = Dataset.load_from_df(dt[[uid, iid, rati]], reader)
        trainset = data.build_full_trainset()
        algo = SlopeOne()
        algo.fit(trainset)

        arr = []
        for value_uid in range(from_uid, to_uid):
            for value_iid in range(from_iid, to_iid):
                pred = algo.predict(value_uid, value_iid, r_ui=1, verbose=True)
                tempdata = []
                tempdata.append(pred.uid)
                tempdata.append(pred.iid)
                tempdata.append(round(pred.est, 2))
                arr.append(tempdata)
        #return result to json
        return arr
def slopeone(train, test, ids, Xtest, Xids):
    """
    Item based algorithm, reduces overfitting
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """

    print('SlopeOne')
    algo = SlopeOne()

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids
예제 #10
0
 def slope_one(self):
     """
     SlopeOne to reflect how much one item is liked over than another.
     Returns:
         predictions_df: The predictions of the model on the test data in
             Pandas Data Frame format
     """
     algorithm = SlopeOne()
     predictions = algorithm.fit(self.train_data).test(self.test_data)
     predictions_df = self.data.test_df.copy()
     predictions_df['Rating'] = [x.est for x in predictions]
     if self.test_purpose: 
         self.evalueate_model(predictions_df['Rating'], 'Surprise slope_one')
     return predictions_df
예제 #11
0
def fit_slopeone(trainset):
    logging.info(f'Recommendation System: Fit model - SlopeOne')
    model = SlopeOne()
    model_fitted = model.fit(trainset)

    return model_fitted
from surprise import accuracy
from surprise.model_selection import KFold
import pandas as pd
import io
import pandas as pd

# 读取物品(电影)名称信息
def read_item_names():
    file_name = ('./movies.csv') 
    data = pd.read_csv('./movies.csv')
    rid_to_name = {}
    name_to_rid = {}
    for i in range(len(data['movieId'])):
        rid_to_name[data['movieId'][i]] = data['title'][i]
        name_to_rid[data['title'][i]] = data['movieId'][i]

    return rid_to_name, name_to_rid 

# 数据读取
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()


# 使用SlopeOne算法
algo = SlopeOne()
algo.fit(train_set)
# 对指定用户和商品进行评分预测
uid = str(196) 
iid = str(302) 
pred = algo.predict(uid, iid, r_ui=4, verbose=True)
예제 #13
0
# #### Run cross-validation with the best algo (in the specific case SlopeOne)

# In[14]:

# SlopeOne algorithm gave us the best rmse, therefore, we will train and predict with SlopeOne

print('Executing SlopeOne')
algo = SlopeOne()
cross_validate(algo, data, measures=['RMSE'], cv=3, verbose=False)

# #### Train and test the chosen algorithm

# In[15]:

trainset, testset = s_train_test_split(data, test_size=0.25)
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions)

# #### Get detailed results for predictions/recommendations

# In[16]:

#  inspect our predictions in details


def get_Iu(uid):
    """ return the number of items clicked by given user
    args: 
      uid: the id of the user
    returns: 
      the number of items clicked by the user
예제 #14
0
from surprise import Dataset
from surprise import accuracy
from surprise import SlopeOne
from surprise.model_selection import train_test_split

# Load the movielens-100k dataset  UserID::MovieID::Rating::Timestamp
data = Dataset.load_builtin('ml-1m')
trainset, testset = train_test_split(data, test_size=.15)

# Configura o algoritmo. K = número de vizinhos. Name = Tipo de medida de similiradade. User based = filtragem por usuário ou item.

print("Usando o algoritmo SlopeOne")
algoritmo = SlopeOne()

algoritmo.fit(trainset)

# Selecionamos o usuário e o filme que será analisado
# User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas
uid = str(49)  
# Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4
iid = str(2058)  # raw item id

# get a prediction for specific users and items.
print("Predição de avaliação: ")
pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True)

# run the trained model against the testset
test_pred = algoritmo.test(testset)

# Avalia RMSE
start = time.time()

alg_SVD.fit(data_train.build_full_trainset())

end = time.time()
print("***********************************************")
print("Exe time:")
print(end - start)

# %% Best Hyper-parameters Training - Slope One
alg_SL1 = SlopeOne()

start = time.time()

alg_SL1.fit(data_train.build_full_trainset())

end = time.time()
print("***********************************************")
print("Exe time:")
print(end - start)

# %% Best Hyper-parameters Training - KNN
sim_options = {
    'name': 'msd',
    'user_based': True  # compute  similarities between users
}
alg_KNN = KNNBasic(sim_options=sim_options)

start = time.time()
listOfRMSE.append(accuracy.rmse(predictionscoClustering))
models.append('CoClustering')


# ## SlopeOne Implementation

# In[89]:


from surprise import SlopeOne
slopeOne = SlopeOne()

# Train the algorithm on the trainset, and predict ratings for the testset
trainsetslopeOne  = dataCoClustering.build_full_trainset()

slopeOne.fit(trainsetslopeOne)

testslopeOne = trainsetslopeOne.build_anti_testset()
predictionsslopeOne = slopeOne.test(testslopeOne)

accuracy.rmse(predictionsslopeOne)

listOfRMSE.append(accuracy.rmse(predictionsslopeOne))
models.append('SlopeOne')


# In[91]:


models
예제 #17
0
for i in range(5):

    movieRecc = topMovies[i]
    movieRawID = movieRecc[0]
    movieName = movie[movieRawID]
    print(str(i+1) + '. ' + movieName )



#################predictions using Slope-One
print('')
print('Making more recommendations...')


algo2 = SlopeOne()
algo2.fit(trainset)

predictions2 = algo2.test(testset)
dictMovies2 = get_top_n(predictions2)
topMovies2 = dictMovies2.get(672)

print('')
print('Here are the top 5 recommendations based on Slope-One! ')

for i in range(5):

    movieRecc2 = topMovies2[i]
    movieRawID2 = movieRecc2[0]
    movieName2 = movie[movieRawID2]
    print(str(i+1) + '. ' + movieName2 )
# coding=utf-8
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise import NMF
from surprise import SlopeOne
from surprise.model_selection import train_test_split
from surprise import dump

filePath = r'E:\library\Final_lib\collaboration\coll2.csv'
reader = Reader(line_format='user item rating', sep=',', rating_scale=(1, 5))
data = Dataset.load_from_file(filePath, reader=reader)

trainset, testset = train_test_split(data, test_size=0.2)
# algo = BaselineOnly()
# algo.fit(trainset)
algo = SlopeOne()
algo.fit(trainset)

predictions = algo.test(testset)
accuracy.mae(predictions)
accuracy.rmse(predictions)
accuracy.fcp(predictions)

dump.dump(r'E:\library\Final_lib\collaboration\ungrouped_algo\SlopeOne.txt',
          predictions=predictions,
          algo=algo)
예제 #19
0
'''

from surprise import accuracy
from surprise import Dataset
from surprise import SlopeOne
from surprise.model_selection import train_test_split

# 加载movielens-100k数据集
data = Dataset.load_builtin('ml-100k')

# 训练集和测试集划分
train, test = train_test_split(data, test_size=.15)

# SlopeOne算法
slope = SlopeOne()
slope.fit(train)

# 预测第222用户对第750电影评分
uid = str(222)
iid = str(750)
pred = slope.predict(uid, iid, r_ui=5, verbose=True)
# ######结果如下######
# user: 222
# item: 750
# r_ui = 5.00
# est = 3.97
# {'was_impossible': False}

# 预测第222用户对第750电影评分为3.97

test_pred = slope.test(test)