Ejemplo n.º 1
0
def slope_one(trainset, testset, predset):
    
    modelname = 'slopeone'
    # Check if predictions already exist
    if is_already_predicted(modelname):
        return
    
    algo = SlopeOne()
    print('SlopeOne Model')
    algo.train(trainset)
    
    predictions = algo.test(trainset.build_testset())
    print('   RMSE on Train: ', accuracy.rmse(predictions, verbose=False))
    
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   RMSE on Test: ', rmse)
    preds = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds[j] = pred.est
    save_predictions(modelname, rmse, preds, 'test')

    print('  Evaluate predicted ratings...')
    predictions = algo.test(predset)
    preds = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds[j] = pred.est
    save_predictions(modelname, rmse, preds)
Ejemplo n.º 2
0
	def get(self, user_id):
		# SQL query
		conn = mysql.connect()
		cursor = conn.cursor()
		df = pd.read_sql_query("SELECT * FROM story_reviews", conn)

		# Data and Model
		reader = Reader(rating_scale=(1, 5))
		data = Dataset.load_from_df(df[['user_id', 'story_id', 'star']], reader)
		model = SlopeOne()
		
		# Training
		training_set = data.build_full_trainset()
		model.fit(training_set)

		# Prediction
		anti_training_set = training_set.build_anti_testset()
		prediction_set = [x for x in anti_training_set if x[0]==user_id]
		predictions = model.test(prediction_set)
		
		# Return Top N Recommendations
		n = 10
		predictions.sort(key=lambda x:x.est, reverse=True)
		top_n_predictions = predictions[:n]

		story_recommendations = []
		
		for predictionItem in top_n_predictions:
			story_recommendations.append(predictionItem.iid)

		return jsonify(recommendations = story_recommendations)
Ejemplo n.º 3
0
def surprise_slopeOne(train_file, test_file):
    """
    SlopeOne with Surprise library.
    Compute the predictions on a test_set after training on a train_set using the method SlopeOne from Surprise.
    Args:
        train_file (string): path to created test file
        test_file (string): path to created train file
    Hyperparameters:
        -
    Returns:
        numpy array: predictions
    """
    print("slopeone")
    algo = SlopeOne()
    fold = [(train_file, test_file)]
    reader = Reader(line_format='user item rating', sep=',')
    data = Dataset.load_from_folds(fold, reader=reader)
    pkf = PredefinedKFold()
    for trainset, testset in pkf.split(data):
        # Train
        algo.fit(trainset)

        # Predict
        predictions = algo.test(testset)
    pred = np.zeros(len(predictions))
    for i in range(len(predictions)):
        val = predictions[i].est
        pred[i] = val
    return pred
Ejemplo n.º 4
0
def slope_one(test, train, all):
    start = time.time()
    reader = Reader(rating_scale=(0.5, 5))
    data = Dataset.load_from_df(train[['userId', 'movieId', 'rating']], reader)
    test_data = Dataset.load_from_df(test[['userId', 'movieId', 'rating']],
                                     reader)
    trainset = data.build_full_trainset()
    testset = test_data.build_full_trainset().build_testset()
    algo = SlopeOne()
    algo.fit(trainset)
    fit = time.time()
    fit_time = fit - start
    predictions = algo.test(testset)
    uid = []
    mid = []
    rate = []
    for i in range(len(predictions)):
        uid.append(predictions[i].uid)
        mid.append(predictions[i].iid)
        rate.append(predictions[i].est)
    out = {'userId': uid, 'movieId': mid, 'rating': rate}
    out = pd.DataFrame.from_dict(out)
    predict_time = time.time() - fit
    overall = predict_time + fit - start
    return out, [fit_time, predict_time, overall]
def SlopeOne_alg():
    print('Using SlopeOne')
    alg = SlopeOne()
    print(alg)
    alg.fit(trainset)
    predictions = alg.test(testset)
    print(accuracy.rmse(predictions))
Ejemplo n.º 6
0
def slopeOne(trainset, testset):
    # Slope One
    print("\n" + "-" * 5 + " SlopeOne algorithm using surprise package " +
          "-" * 5)
    algo = SlopeOne()
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)
    return rmse, mae, predictions
def slopeone(train, test, ids, Xtest, Xids):
    """
    Item based algorithm, reduces overfitting
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """

    print('SlopeOne')
    algo = SlopeOne()

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids
# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(0.5, 5))

# The columns must correspond to user id, item id and ratings (in that order).
rating_train2 = Dataset.load_from_df(
    rating_train[['userID', 'itemID', 'rating']], reader)
rating_test2 = Dataset.load_from_df(
    rating_test[['userID', 'itemID', 'rating']], reader)

trainset = rating_train2.build_full_trainset()
testset = rating_test2.build_full_trainset().build_testset()

#SlopeOne Model
count = 1

start = dt.datetime.today()
print("================================================")
algo = SlopeOne()

algo.train(trainset)
#print("This is the #" + str(count) + " parameter combination")
predictions = algo.test(testset)

#print("n_factors="+str(i)+", n_epochs="+str(j)+", lr_all="+str(k)+", reg_all="+str(m))
accuracy.rmse(predictions, verbose=True)
accuracy.fcp(predictions, verbose=True)
accuracy.mae(predictions, verbose=True)
count = count + 1
end = dt.datetime.today()
print("Runtime: " + str(end - start))
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from surprise import Dataset
from surprise import SlopeOne
from surprise import accuracy

data = Dataset.load_builtin('ml-100k')

algo = SlopeOne()

trainset = data.build_full_trainset()
algo.train(trainset)

testset = trainset.build_testset()
predictions = algo.test(testset)
# RMSE should be low as we are biased
accuracy.rmse(predictions, verbose=True)  # ~ 0.68 (which is low)

# We can also do this during a cross-validation procedure!
print('CV procedure:')

data.split(3)
for i, (trainset_cv, testset_cv) in enumerate(data.folds()):
    print('fold number', i + 1)
    algo.train(trainset_cv)

    print('On testset,', end='  ')
    predictions = algo.test(testset_cv)
    accuracy.rmse(predictions, verbose=True)
Ejemplo n.º 10
0
# Load the movielens-100k dataset  UserID::MovieID::Rating::Timestamp
data = Dataset.load_builtin('ml-1m')
trainset, testset = train_test_split(data, test_size=.15)

# Configura o algoritmo. K = número de vizinhos. Name = Tipo de medida de similiradade. User based = filtragem por usuário ou item.

print("Usando o algoritmo SlopeOne")
algoritmo = SlopeOne()

algoritmo.fit(trainset)

# Selecionamos o usuário e o filme que será analisado
# User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas
uid = str(49)  
# Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4
iid = str(2058)  # raw item id

# get a prediction for specific users and items.
print("Predição de avaliação: ")
pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True)

# run the trained model against the testset
test_pred = algoritmo.test(testset)

# Avalia RMSE
print("Avaliação RMSE: ")
accuracy.rmse(test_pred, verbose=True)

# Avalia MAE
print("Avaliação MAE: ")
accuracy.mae(test_pred, verbose=True)
Ejemplo n.º 11
0
    movieRecc = topMovies[i]
    movieRawID = movieRecc[0]
    movieName = movie[movieRawID]
    print(str(i+1) + '. ' + movieName )



#################predictions using Slope-One
print('')
print('Making more recommendations...')


algo2 = SlopeOne()
algo2.fit(trainset)

predictions2 = algo2.test(testset)
dictMovies2 = get_top_n(predictions2)
topMovies2 = dictMovies2.get(672)

print('')
print('Here are the top 5 recommendations based on Slope-One! ')

for i in range(5):

    movieRecc2 = topMovies2[i]
    movieRawID2 = movieRecc2[0]
    movieName2 = movie[movieRawID2]
    print(str(i+1) + '. ' + movieName2 )


#############predictions using Co-Clustering
# ## SlopeOne Implementation

# In[89]:


from surprise import SlopeOne
slopeOne = SlopeOne()

# Train the algorithm on the trainset, and predict ratings for the testset
trainsetslopeOne  = dataCoClustering.build_full_trainset()

slopeOne.fit(trainsetslopeOne)

testslopeOne = trainsetslopeOne.build_anti_testset()
predictionsslopeOne = slopeOne.test(testslopeOne)

accuracy.rmse(predictionsslopeOne)

listOfRMSE.append(accuracy.rmse(predictionsslopeOne))
models.append('SlopeOne')


# In[91]:


models


# In[105]:
Ejemplo n.º 13
0
data = Dataset.load_builtin('ml-100k')

# 训练集和测试集划分
train, test = train_test_split(data, test_size=.15)

# SlopeOne算法
slope = SlopeOne()
slope.fit(train)

# 预测第222用户对第750电影评分
uid = str(222)
iid = str(750)
pred = slope.predict(uid, iid, r_ui=5, verbose=True)
# ######结果如下######
# user: 222
# item: 750
# r_ui = 5.00
# est = 3.97
# {'was_impossible': False}

# 预测第222用户对第750电影评分为3.97

test_pred = slope.test(test)

# RMSE和MAE
print("RMSE: " + str(accuracy.rmse(test_pred, verbose=True)))
print("MAE: " + str(accuracy.mae(test_pred, verbose=True)))

# ######结果如下######
# RMSE: 0.9517
# MAE: 0.7460