Beispiel #1
0
    def CFZ(self):
        u_id = []
        I_id = []
        r_ui_ = np.array([])
        _est = np.array([])

        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)
                u_id.append(int(pred.uid))
                I_id.append(int(pred.iid))
                r_ui_ = np.append(r_ui_, pred.r_ui)
                _est = np.append(_est, pred.est)

        self.df_est = pd.DataFrame({
            'uid': u_id,
            'Iid': I_id,
            'r_ui': r_ui_,
            'est': _est
        })
        self.arr = self.df_est['uid'].unique()

        self.CFWZ_ndcg_ = self.Calculate_NDCG()
Beispiel #2
0
def collab_recommender(train_data,
                       test_data,
                       user_based=True,
                       normalization=False,
                       k=100,
                       sim='cosine'):
    """
    Input: 
    - train_data: dataframe, n*3, columns are ['userid','movieid','rating']
    - test_data: dataframe, n*2, columns are ['userid', 'movieid']
    - user_base: boolean, use user-based knn algorithm if True, use item-based knn algorithm if False
    - normalization: boolean, conduct z-score normalization on user/item matrix if True
    - k: int, number of nearest neighbors
    - sim: string, define the similarity matrix from ['cosine', 'pearson', 'msd', 'pearson_baseline']
    
    Output:
    - pred_rating: dataframe, n*2, columns are ['movieid', 'rating']
    """

    try:
        function_log.trace('Start collaborative recommendation function')

        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(train_data, reader)

        sim_options = {'name': sim, 'user_based': user_based}

        if normalization:
            algo = KNNWithZScore(k=k, sim_options=sim_options, verbose=False)
        else:
            algo = KNNWithMeans(k=k, sim_options=sim_options, verbose=False)

        train_set = data.build_full_trainset()
        algo.fit(train_set)

        pred_rating = {'movieid': [], 'rating': []}
        for idx in test_data.index:
            pred_rating['movieid'].append(test_data.loc[idx, 'movieid'])
            pred = algo.predict(test_data.loc[idx, 'userid'],
                                test_data.loc[idx, 'movieid'])
            pred_rating['rating'].append(pred.est)
        function_log.trace('Finish collaborative recommendation function')
        return pd.DataFrame(pred_rating)
    except ValueError:
        function_log.warn("Training and test data cannot be none.")
        raise ValueError
    except Exception as x:
        function_log.exception(
            f'collaborative recommendation function failed {x}')
    def CFZ(self):
        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)

        return pred
Beispiel #4
0
def computeKNNZScoreMovie(data, test_np):
    """Compute the k-NN with z score item based method and return the predictions on the test
     The method is on all the data and got the following settings:
         - Similarity function : Pearson baseline, item based
         - Number of closest neighbors : 108
         
         data : data frame which represent the train set
         test_np : data frame on which the prediction will be returned
         
         return : test_np with a column of prediction named 'knnzscore_item_rating'"""
    
    trainset, test = dataTrainSurprise(data, test_np)
    
    sim_options = {'name':'pearson_baseline','user_based': False}
    knnz_algo = KNNWithZScore(k = 108, sim_options =sim_options).fit(trainset)

    test['knnzscore_item_rating'] = test[['user_id', 'movie_id']] \
    .apply(lambda row: knnz_algo.predict(row['user_id'], row['movie_id'])[3], axis=1)
    
    return test
Beispiel #5
0
r_ui1 = 4
r_ui2 = 4
r_ui3 = 1
r_ui4 = 3

verboseFlag = True

# get a prediction for specific users and items.
print("KNNBaseLine:")
predBaseLine1 = algoBaseLine.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predBaseLine2 = algoBaseLine.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predBaseLine3 = algoBaseLine.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predBaseLine4 = algoBaseLine.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)

print("\nKNNBasic:")
predBasic1 = algoBasic.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predBasic2 = algoBasic.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predBasic3 = algoBasic.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predBasic4 = algoBasic.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)

print("\nKNNWithMeans:")
predWithMeans1 = algoWithMeans.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predWithMeans2 = algoWithMeans.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predWithMeans3 = algoWithMeans.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predWithMeans4 = algoWithMeans.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)

print("\nKNNWithZScore:")
predWithZScore1 = algoWithZScore.predict(uid1, iid1, r_ui = r_ui1, verbose = verboseFlag)
predWithZScore2 = algoWithZScore.predict(uid2, iid2, r_ui = r_ui2, verbose = verboseFlag)
predWithZScore3 = algoWithZScore.predict(uid3, iid3, r_ui = r_ui3, verbose = verboseFlag)
predWithZScore4 = algoWithZScore.predict(uid4, iid4, r_ui = r_ui4, verbose = verboseFlag)
Beispiel #6
0
predWithMeans2 = algoWithMeans.predict(uid2,
                                       iid2,
                                       r_ui=r_ui2,
                                       verbose=verboseFlag)
predWithMeans3 = algoWithMeans.predict(uid3,
                                       iid3,
                                       r_ui=r_ui3,
                                       verbose=verboseFlag)
predWithMeans4 = algoWithMeans.predict(uid4,
                                       iid4,
                                       r_ui=r_ui4,
                                       verbose=verboseFlag)

print("\nKNNWithZScore:")
predWithZScore1 = algoWithZScore.predict(uid1,
                                         iid1,
                                         r_ui=r_ui1,
                                         verbose=verboseFlag)
predWithZScore2 = algoWithZScore.predict(uid2,
                                         iid2,
                                         r_ui=r_ui2,
                                         verbose=verboseFlag)
predWithZScore3 = algoWithZScore.predict(uid3,
                                         iid3,
                                         r_ui=r_ui3,
                                         verbose=verboseFlag)
predWithZScore4 = algoWithZScore.predict(uid4,
                                         iid4,
                                         r_ui=r_ui4,
                                         verbose=verboseFlag)
Beispiel #7
0
algo3 = KNNWithMeans(k=40, min_k=1, sim_options=sim_options2, verbose=True)
algo3.fit(data_train.build_full_trainset())

pred1 = []
pred_f1 = []
pred2 = []
pred_f2 = []
pred3 = []
pred_f3 = []
with open("./data/testing.dat", "r", encoding='utf-8') as f:
    for line in f.readlines():
        line_data = line.strip().split(",")
        a = algo1.predict(str(line_data[0]), str(line_data[1]), None, True,
                          True)[3]
        b = algo2.predict(str(line_data[0]), str(line_data[1]), None, True,
                          True)[3]
        c = algo3.predict(str(line_data[0]), str(line_data[1]), None, True,
                          True)[3]
        pred1.append(int(round(a)))
        pred_f1.append(a)
        pred2.append(int(round(b)))
        pred_f2.append(b)
        pred3.append(int(round(c)))
        pred_f3.append(c)

with open("./雷雨轩_PB18111791_4.txt", "w") as f:
    for ratings in pred1:
        f.write(str(ratings) + "\n")

with open("./4_float.txt", "w") as f:
    for ratings in pred_f1:
Beispiel #8
0
pred1 = algo1.predict(uid, iid, verbose=True)
#KNNWithMeans
algo2 = KNNWithMeans(k=30,
                     sim_options={
                         'name': 'cosine',
                         'user_based': False
                     },
                     verbose=True)
algo2.fit(trainset)
pred2 = algo2.predict(uid, iid, verbose=True)

#KNNWithZScore f
algo3 = KNNWithZScore(k=30,
                      sim_options={
                          'name': 'MSD',
                          'user_based': True
                      },
                      verbose=True)
algo3.fit(trainset)
pred3 = algo3.predict(uid, iid, verbose=True)
#KNNBaseline
algo4 = KNNBaseline(k=30,
                    sim_options={
                        'name': 'MSD',
                        'user_based': True
                    },
                    verbose=True)
algo4.fit(trainset)
pred4 = algo4.predict(uid, iid, verbose=True)
Beispiel #9
0
reader = Reader(line_format='user item rating timestamp',
                sep=',',
                skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()

from surprise import KNNWithZScore
algo = KNNWithZScore(k=50,
                     sim_options={
                         'user_based': False,
                         'verbose': 'True'
                     })
algo.fit(train_set)
uid = str(196)
iid = str(332)
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

kf = KFold(n_splits=3)
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    #计算RMSE,AME
    accuracy.rmse(predictions, verbose=True)
    accuracy.mae(predictions, verbose=True)

### 使用协同过滤正态分布 User based
from surprise import KNNWithZScore
algo = KNNWithZScore(k=50,
                     sim_options={
                         'user_based': False,
                         'verbose': 'True'