Ejemplo n.º 1
0
def slope_one(trainset, testset, predset):
    
    modelname = 'slopeone'
    # Check if predictions already exist
    if is_already_predicted(modelname):
        return
    
    algo = SlopeOne()
    print('SlopeOne Model')
    algo.train(trainset)
    
    predictions = algo.test(trainset.build_testset())
    print('   RMSE on Train: ', accuracy.rmse(predictions, verbose=False))
    
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   RMSE on Test: ', rmse)
    preds = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds[j] = pred.est
    save_predictions(modelname, rmse, preds, 'test')

    print('  Evaluate predicted ratings...')
    predictions = algo.test(predset)
    preds = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds[j] = pred.est
    save_predictions(modelname, rmse, preds)
# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(0.5, 5))

# The columns must correspond to user id, item id and ratings (in that order).
rating_train2 = Dataset.load_from_df(
    rating_train[['userID', 'itemID', 'rating']], reader)
rating_test2 = Dataset.load_from_df(
    rating_test[['userID', 'itemID', 'rating']], reader)

trainset = rating_train2.build_full_trainset()
testset = rating_test2.build_full_trainset().build_testset()

#SlopeOne Model
count = 1

start = dt.datetime.today()
print("================================================")
algo = SlopeOne()

algo.train(trainset)
#print("This is the #" + str(count) + " parameter combination")
predictions = algo.test(testset)

#print("n_factors="+str(i)+", n_epochs="+str(j)+", lr_all="+str(k)+", reg_all="+str(m))
accuracy.rmse(predictions, verbose=True)
accuracy.fcp(predictions, verbose=True)
accuracy.mae(predictions, verbose=True)
count = count + 1
end = dt.datetime.today()
print("Runtime: " + str(end - start))
trainset.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from surprise import Dataset
from surprise import SlopeOne
from surprise import accuracy

data = Dataset.load_builtin('ml-100k')

algo = SlopeOne()

trainset = data.build_full_trainset()
algo.train(trainset)

testset = trainset.build_testset()
predictions = algo.test(testset)
# RMSE should be low as we are biased
accuracy.rmse(predictions, verbose=True)  # ~ 0.68 (which is low)

# We can also do this during a cross-validation procedure!
print('CV procedure:')

data.split(3)
for i, (trainset_cv, testset_cv) in enumerate(data.folds()):
    print('fold number', i + 1)
    algo.train(trainset_cv)

    print('On testset,', end='  ')
Ejemplo n.º 4
0
def compute_recommendations(user_id, prediction_table,
                            numeric_prediction_table):

    algo = 'SlopeOne'

    algorithm = SlopeOne()

    # add_pageview(user_id=user_id, item_id=None, page="Model Predictions", activity_type="Initialize Predictions - " + algo, rating=None) #pageview

    engine = create_engine(config.DB_URI, echo=True)
    session = scoped_session(
        sessionmaker(bind=engine, autocommit=False, autoflush=False))

    #reading in the database

    df_ratings = pd.read_sql('SELECT * FROM ratings;', con=engine)
    df_ratings = df_ratings[['user_id', 'item_id', 'rating']]
    df_ratings = df_ratings.dropna()
    df_ratings = df_ratings.drop_duplicates()

    df_ratings2 = pd.read_csv('data/ratings.csv', low_memory=False)
    df_ratings2 = df_ratings2.rename(columns={'movie_id': 'item_id'})
    df_ratings2 = df_ratings2[['user_id', 'item_id', 'rating']]
    df_ratings2 = df_ratings2.dropna()
    df_ratings2 = df_ratings2.drop_duplicates()

    df_ratings = pd.concat([df_ratings, df_ratings2], axis=0)

    reader = Reader(line_format='user item rating',
                    sep=',',
                    rating_scale=(1, 10))
    data = Dataset.load_from_df(df_ratings, reader=reader)

    trainset = data.build_full_trainset()

    #     algorithm = eval(algo + "()")# set the algorithm...............................................

    algorithm.train(trainset)

    items = pd.read_sql('SELECT distinct id FROM items;', con=engine)
    df_user_items = df_ratings.loc[df_ratings['user_id'] == user_id]
    total_items = items.id.unique()
    user_items = df_user_items.item_id.unique()
    # user_id = str(user_id)
    prediction_items = [x for x in total_items if x not in user_items]

    predictions = pd.DataFrame(columns=['user_id', 'item_id', 'prediction'])

    predicted_ratings = []

    for i in prediction_items:
        a = user_id
        b = i
        est = algorithm.predict(a, b)
        predicted_ratings.append(est[3])

    predictions['item_id'] = prediction_items
    predictions['user_id'] = pd.Series(
        [user_id for x in range(len(predictions.index))],
        index=predictions.index)

    predictions['prediction'] = predicted_ratings

    predictions = predictions.sort_values('prediction', ascending=False)
    test_prediction = predictions
    predictions = predictions.head(n=10)

    cols = [
        'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5', 'pred_6', 'pred_7',
        'pred_8', 'pred_9', 'pred_10'
    ]

    df_pred = predictions[['item_id']].T

    df_pred.columns = cols

    df_pred['id'] = user_id

    df_pred = df_pred[[
        'id', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5', 'pred_6',
        'pred_7', 'pred_8', 'pred_9', 'pred_10'
    ]]

    df_pred['id'] = df_pred['id'].astype(int)

    df_pred.to_sql(prediction_table, engine, if_exists='append',
                   index=False)  #if_exists='append'
    session.commit()

    df_num_ratings = test_prediction

    df_num_ratings = df_num_ratings.head(n=20)

    df_num_ratings['algorithm'] = algo
    df_num_ratings.rename(columns={'prediction': 'predicted_rating'},
                          inplace=True)

    df_num_ratings.to_sql('numeric_predictions',
                          engine,
                          if_exists='append',
                          index=False)  #if_exists='append'
    session.commit()

    predcols = [
        'num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'num_6', 'num_7', 'num_8',
        'num_9', 'num_10'
    ]

    df_num_ratings_transpose = predictions[['prediction']].T
    df_num_ratings_transpose.columns = predcols

    df_num_ratings_transpose['id'] = user_id

    df_num_ratings_transpose = df_num_ratings_transpose[[
        'id', 'num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'num_6', 'num_7',
        'num_8', 'num_9', 'num_10'
    ]]

    df_num_ratings_transpose['id'] = df_num_ratings_transpose['id'].astype(int)

    df_num_ratings_transpose.to_sql(numeric_prediction_table,
                                    engine,
                                    if_exists='append',
                                    index=False)  #if_exists='append'
    session.commit()