Ejemplo n.º 1
0
def computeSlopeOne(data, test_np):
    """Compute the slope one method and return the predictions on the test
     The method has no parameter.
         
         data : data frame which represent the train set
         test_np : data frame on which the prediction will be returned
         
         return : test_np with a column of prediction named 'slopeone_rating'"""
    
    trainset, test = dataTrainSurprise(data, test_np)
    
    slopeone = SlopeOne().fit(trainset)
    
    test['slopeone_rating'] = test[['user_id', 'movie_id']] \
    .apply(lambda row: slopeone.predict(row['user_id'], row['movie_id'])[3], axis=1)
    
    return test
Ejemplo n.º 2
0
 def SlopeOne(self, namefile, uid, iid, rati, value_uid, value_iid):
     test_data = pd.read_csv('./container/' + namefile)
     dt = pd.DataFrame(test_data)
     # Retrieve the trainset.
     reader = Reader(rating_scale=(0, 100))
     data = Dataset.load_from_df(dt[[uid, iid, rati]], reader)
     trainset = data.build_full_trainset()
     algo = SlopeOne()
     algo.fit(trainset)
     pred = algo.predict(float(value_uid),
                         float(value_iid),
                         r_ui=1,
                         verbose=True)
     #var_rmse = accuracy.rmse(pred)
     #return result to json
     jsondata = {}
     jsondata["uid"] = pred.uid
     jsondata["idd"] = pred.iid
     jsondata["rati"] = round(pred.est, 2)
     return jsondata
Ejemplo n.º 3
0
    def SlopeOne_from_to(self, namefile, uid, iid, rati, from_uid, to_uid,
                         from_iid, to_iid):
        test_data = pd.read_csv('./container/' + namefile)
        dt = pd.DataFrame(test_data)
        # Retrieve the trainset.
        reader = Reader(rating_scale=(0, 100))
        data = Dataset.load_from_df(dt[[uid, iid, rati]], reader)
        trainset = data.build_full_trainset()
        algo = SlopeOne()
        algo.fit(trainset)

        arr = []
        for value_uid in range(from_uid, to_uid):
            for value_iid in range(from_iid, to_iid):
                pred = algo.predict(value_uid, value_iid, r_ui=1, verbose=True)
                tempdata = []
                tempdata.append(pred.uid)
                tempdata.append(pred.iid)
                tempdata.append(round(pred.est, 2))
                arr.append(tempdata)
        #return result to json
        return arr
def slopeone(train, test, ids, Xtest, Xids):
    """
    Item based algorithm, reduces overfitting
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """

    print('SlopeOne')
    algo = SlopeOne()

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids
from surprise.model_selection import KFold
import pandas as pd
import io
import pandas as pd

# 读取物品(电影)名称信息
def read_item_names():
    file_name = ('./movies.csv') 
    data = pd.read_csv('./movies.csv')
    rid_to_name = {}
    name_to_rid = {}
    for i in range(len(data['movieId'])):
        rid_to_name[data['movieId'][i]] = data['title'][i]
        name_to_rid[data['title'][i]] = data['movieId'][i]

    return rid_to_name, name_to_rid 

# 数据读取
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()


# 使用SlopeOne算法
algo = SlopeOne()
algo.fit(train_set)
# 对指定用户和商品进行评分预测
uid = str(196) 
iid = str(302) 
pred = algo.predict(uid, iid, r_ui=4, verbose=True)
Pred_Test_KNN = []
Pred_Test_BSL = []

start = time.time()
for line in data_test:
    Pred_Test_KNN.append(
        alg_KNN.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_SVD.append(
        alg_SVD.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_NMF.append(
        alg_NMF.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_SL1.append(
        alg_SL1.predict(str(line[1]), str(line[0]), clip=False).est)

    Pred_Test_BSL.append(
        alg_BSL.predict(str(line[1]), str(line[0]), clip=False).est)

end = time.time()
print("***********************************************")
print("Exe time:")
print(end - start)

X_Test = np.matrix([
    Pred_Test_SVD, Pred_Test_NMF, Pred_Test_SL1, Pred_Test_KNN, Pred_Test_BSL
])
X_Test = X_Test.T

# %% Prior Based
Ejemplo n.º 7
0
def compute_recommendations(user_id, prediction_table,
                            numeric_prediction_table):

    algo = 'SlopeOne'

    algorithm = SlopeOne()

    # add_pageview(user_id=user_id, item_id=None, page="Model Predictions", activity_type="Initialize Predictions - " + algo, rating=None) #pageview

    engine = create_engine(config.DB_URI, echo=True)
    session = scoped_session(
        sessionmaker(bind=engine, autocommit=False, autoflush=False))

    #reading in the database

    df_ratings = pd.read_sql('SELECT * FROM ratings;', con=engine)
    df_ratings = df_ratings[['user_id', 'item_id', 'rating']]
    df_ratings = df_ratings.dropna()
    df_ratings = df_ratings.drop_duplicates()

    df_ratings2 = pd.read_csv('data/ratings.csv', low_memory=False)
    df_ratings2 = df_ratings2.rename(columns={'movie_id': 'item_id'})
    df_ratings2 = df_ratings2[['user_id', 'item_id', 'rating']]
    df_ratings2 = df_ratings2.dropna()
    df_ratings2 = df_ratings2.drop_duplicates()

    df_ratings = pd.concat([df_ratings, df_ratings2], axis=0)

    reader = Reader(line_format='user item rating',
                    sep=',',
                    rating_scale=(1, 10))
    data = Dataset.load_from_df(df_ratings, reader=reader)

    trainset = data.build_full_trainset()

    #     algorithm = eval(algo + "()")# set the algorithm...............................................

    algorithm.train(trainset)

    items = pd.read_sql('SELECT distinct id FROM items;', con=engine)
    df_user_items = df_ratings.loc[df_ratings['user_id'] == user_id]
    total_items = items.id.unique()
    user_items = df_user_items.item_id.unique()
    # user_id = str(user_id)
    prediction_items = [x for x in total_items if x not in user_items]

    predictions = pd.DataFrame(columns=['user_id', 'item_id', 'prediction'])

    predicted_ratings = []

    for i in prediction_items:
        a = user_id
        b = i
        est = algorithm.predict(a, b)
        predicted_ratings.append(est[3])

    predictions['item_id'] = prediction_items
    predictions['user_id'] = pd.Series(
        [user_id for x in range(len(predictions.index))],
        index=predictions.index)

    predictions['prediction'] = predicted_ratings

    predictions = predictions.sort_values('prediction', ascending=False)
    test_prediction = predictions
    predictions = predictions.head(n=10)

    cols = [
        'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5', 'pred_6', 'pred_7',
        'pred_8', 'pred_9', 'pred_10'
    ]

    df_pred = predictions[['item_id']].T

    df_pred.columns = cols

    df_pred['id'] = user_id

    df_pred = df_pred[[
        'id', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5', 'pred_6',
        'pred_7', 'pred_8', 'pred_9', 'pred_10'
    ]]

    df_pred['id'] = df_pred['id'].astype(int)

    df_pred.to_sql(prediction_table, engine, if_exists='append',
                   index=False)  #if_exists='append'
    session.commit()

    df_num_ratings = test_prediction

    df_num_ratings = df_num_ratings.head(n=20)

    df_num_ratings['algorithm'] = algo
    df_num_ratings.rename(columns={'prediction': 'predicted_rating'},
                          inplace=True)

    df_num_ratings.to_sql('numeric_predictions',
                          engine,
                          if_exists='append',
                          index=False)  #if_exists='append'
    session.commit()

    predcols = [
        'num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'num_6', 'num_7', 'num_8',
        'num_9', 'num_10'
    ]

    df_num_ratings_transpose = predictions[['prediction']].T
    df_num_ratings_transpose.columns = predcols

    df_num_ratings_transpose['id'] = user_id

    df_num_ratings_transpose = df_num_ratings_transpose[[
        'id', 'num_1', 'num_2', 'num_3', 'num_4', 'num_5', 'num_6', 'num_7',
        'num_8', 'num_9', 'num_10'
    ]]

    df_num_ratings_transpose['id'] = df_num_ratings_transpose['id'].astype(int)

    df_num_ratings_transpose.to_sql(numeric_prediction_table,
                                    engine,
                                    if_exists='append',
                                    index=False)  #if_exists='append'
    session.commit()
Ejemplo n.º 8
0
from surprise import SlopeOne, BaselineOnly, Reader, evaluate, Dataset
import os
from surprise.model_selection import cross_validate

file_path = os.path.expanduser('um/separated/probe_training_data.dta')

reader = Reader(line_format='user item timestamp rating', sep='\t')

data = Dataset.load_from_file("um/separated/probe_training_data.dta",
                              reader=reader)

algo = SlopeOne()

trainset = data.build_full_trainset()
algo.train(trainset)

#userid = 1
#itemid = 3912

output = open("um/output/slopeone.dta", "w")
for u in range(1, 458294):
    for i in range(1, 17771):
        pred = algo.predict(uid=u, iid=i, verbose=2)
        output.write(str(pred) + "\n")
output.close()
Ejemplo n.º 9
0
from surprise.model_selection import train_test_split

# 加载movielens-100k数据集
data = Dataset.load_builtin('ml-100k')

# 训练集和测试集划分
train, test = train_test_split(data, test_size=.15)

# SlopeOne算法
slope = SlopeOne()
slope.fit(train)

# 预测第222用户对第750电影评分
uid = str(222)
iid = str(750)
pred = slope.predict(uid, iid, r_ui=5, verbose=True)
# ######结果如下######
# user: 222
# item: 750
# r_ui = 5.00
# est = 3.97
# {'was_impossible': False}

# 预测第222用户对第750电影评分为3.97

test_pred = slope.test(test)

# RMSE和MAE
print("RMSE: " + str(accuracy.rmse(test_pred, verbose=True)))
print("MAE: " + str(accuracy.mae(test_pred, verbose=True)))