def cal_KNNWithZScore(trainset, df):
            # KNN With ZScore

            sim_options = {'name': 'cosine', 'user-based': True}
            algo_knnz = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)
            algo_knnz.fit(trainset)
            users = []
            items = []
            real = []
            estimate = []
            for i in range(len(df)):
                uid = df[i:i + 1].user.values[0]
                users.append(uid)
                iid = df[i:i + 1].store.values[0]
                items.append(iid)
                r_ui = df[i:i + 1].stars.values[0]
                real.append(r_ui)
                pred = algo.predict(uid, iid, r_ui, verbose=True)
                estimate.append(pred)
            print("end")
            # knn With Means
            df5 = pd.DataFrame(columns=['user', 'item', 'r_ui', 'est'])
            df5['user'] = users
            df5['item'] = items
            df5['r_ui'] = real
            df5['est'] = estimate
            #df3.head()
            df5['est'] = df5['est'].apply(lambda x: x[-2])
            df5['err'] = abs(df5.est - df5.r_ui)
            df5.to_csv(save_file2)
Ejemplo n.º 2
0
    def CFZ(self):
        u_id = []
        I_id = []
        r_ui_ = np.array([])
        _est = np.array([])

        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)
                u_id.append(int(pred.uid))
                I_id.append(int(pred.iid))
                r_ui_ = np.append(r_ui_, pred.r_ui)
                _est = np.append(_est, pred.est)

        self.df_est = pd.DataFrame({
            'uid': u_id,
            'Iid': I_id,
            'r_ui': r_ui_,
            'est': _est
        })
        self.arr = self.df_est['uid'].unique()

        self.CFWZ_ndcg_ = self.Calculate_NDCG()
Ejemplo n.º 3
0
def collab_recommender(train_data,
                       test_data,
                       user_based=True,
                       normalization=False,
                       k=100,
                       sim='cosine'):
    """
    Input: 
    - train_data: dataframe, n*3, columns are ['userid','movieid','rating']
    - test_data: dataframe, n*2, columns are ['userid', 'movieid']
    - user_base: boolean, use user-based knn algorithm if True, use item-based knn algorithm if False
    - normalization: boolean, conduct z-score normalization on user/item matrix if True
    - k: int, number of nearest neighbors
    - sim: string, define the similarity matrix from ['cosine', 'pearson', 'msd', 'pearson_baseline']
    
    Output:
    - pred_rating: dataframe, n*2, columns are ['movieid', 'rating']
    """

    try:
        function_log.trace('Start collaborative recommendation function')

        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(train_data, reader)

        sim_options = {'name': sim, 'user_based': user_based}

        if normalization:
            algo = KNNWithZScore(k=k, sim_options=sim_options, verbose=False)
        else:
            algo = KNNWithMeans(k=k, sim_options=sim_options, verbose=False)

        train_set = data.build_full_trainset()
        algo.fit(train_set)

        pred_rating = {'movieid': [], 'rating': []}
        for idx in test_data.index:
            pred_rating['movieid'].append(test_data.loc[idx, 'movieid'])
            pred = algo.predict(test_data.loc[idx, 'userid'],
                                test_data.loc[idx, 'movieid'])
            pred_rating['rating'].append(pred.est)
        function_log.trace('Finish collaborative recommendation function')
        return pd.DataFrame(pred_rating)
    except ValueError:
        function_log.warn("Training and test data cannot be none.")
        raise ValueError
    except Exception as x:
        function_log.exception(
            f'collaborative recommendation function failed {x}')
    def CFZ(self):
        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)

        return pred
Ejemplo n.º 5
0
    def CFZ(self):
        kf = KFold(n_splits=5)
        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNWithZScore(k=40, min_k=1, sim_options=sim_options)

        for trainset, testset in kf.split(self.data):
            algo.fit(trainset)
            predictions = algo.test(testset)
            precisions, recalls = self.precision_recall_at_k(predictions)

            P = sum(prec for prec in precisions.values()) / len(precisions)
            R = sum(rec for rec in recalls.values()) / len(recalls)
            F1 = 2 * P * R / (P + R)

            print("Precision : ", P)
            print("Recall    : ", R)
            print("F1        : ", F1)
Ejemplo n.º 6
0
class KNN_Normalized(BaseSurpriseSTLEstimator):
    def __init__(self, k, name='KNN_Normalized'):
        super().__init__(name, 'non_feature_based')
        self.k = k
        self.model = KNNWithZScore(k=self.k, verbose=False)

    def _fit(self, x):
        self.model.fit(x)

    def _predict(self, x):
        return self.model.test(x)

    def get_hyper_params(self):
        hparams = {'k': {'type': 'integer', 'values': [2, 13]}}
        return hparams

    def set_hyper_params(self, **kwargs):
        self.k = kwargs['k']

    def similarity_matrix(self):
        return self.model.compute_similarities()
Ejemplo n.º 7
0
data = Dataset.load_builtin('ml-100k')

# Retrieve the trainset.
trainset = data.build_full_trainset()

# Create instances of the 4 KNN based algorithms
algoBaseLine = KNNBaseline()
algoBasic = KNNBasic()
algoWithMeans = KNNWithMeans()
algoWithZScore = KNNWithZScore()

# Train the 4 algorithms on the training set
algoBaseLine.fit(trainset)
algoBasic.fit(trainset)
algoWithMeans.fit(trainset)
algoWithZScore.fit(trainset)

uid1 = str(196)  # raw user id (as in the ratings file). They are **strings**!
uid2 = str(73)
uid3 = str(423)
uid4 = str(504)

iid1 = str(306)  # raw item id (as in the ratings file). They are **strings**!
iid2 = str(514)
iid3 = str(977)
iid4 = str(370)

r_ui1 = 4
r_ui2 = 4
r_ui3 = 1
r_ui4 = 3
Ejemplo n.º 8
0
}

bsl_options_SGD = {'method': 'sgd', 'learning_rate': .00005, 'n_epochs': 20}

bsl_options_ALS = {'method': 'als', 'n_epochs': 5, 'reg_u': 12, 'reg_i': 5}

#algo = KNNWithMeans(k=40, min_k=1, sim_options=sim_options, verbose=True)
algo1 = KNNBaseline(k=40,
                    min_k=1,
                    sim_options=sim_options1,
                    bsl_options=bsl_options_ALS,
                    verbose=True)
algo1.fit(data_train.build_full_trainset())

algo2 = KNNWithZScore(k=40, min_k=1, sim_options=sim_options1, verbose=True)
algo2.fit(data_train.build_full_trainset())

algo3 = KNNWithMeans(k=40, min_k=1, sim_options=sim_options2, verbose=True)
algo3.fit(data_train.build_full_trainset())

pred1 = []
pred_f1 = []
pred2 = []
pred_f2 = []
pred3 = []
pred_f3 = []
with open("./data/testing.dat", "r", encoding='utf-8') as f:
    for line in f.readlines():
        line_data = line.strip().split(",")
        a = algo1.predict(str(line_data[0]), str(line_data[1]), None, True,
                          True)[3]
ratings = [i[2] for i in all_ratings]

ratings_dict = {
    "user": user_ids,
    "item": food_ids,
    "rating": ratings,
}

df = pd.DataFrame(ratings_dict)
reader = Reader(rating_scale=(1, 5))

# Loads Pandas dataframe
training_user_data = Dataset.load_from_df(df[["user", "item", "rating"]],
                                          reader)
trainingSet_user = training_user_data.build_full_trainset()
user_based.fit(trainingSet_user)

training_item_data = Dataset.load_from_df(df[["item", "user", "rating"]],
                                          reader)
trainingSet_item = training_item_data.build_full_trainset()
item_based.fit(trainingSet_item)
"""""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """
"                     USER_BASED FOOD_ITEMS                         "
""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """"""


# Get all the food items based on the user's specified filters
def get_data_to_recommend(parameters):
    if parameters[5] == "true":
        available_query = """AND F.AVAILABLE = %s
                         AND NOW() BETWEEN RES.OPENING_TIME AND RES.CLOSING_TIME"""
Ejemplo n.º 10
0
                sep=",",
                rating_scale=(1, 5),
                skip_lines=1)
# * loading the csv
data = Dataset.load_from_file(
    file_path="../../ML_Dataset/ml-latest-small/ratings.csv", reader=reader)
# * dividing in train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

# * define a cross-validation iterator
kf = KFold(n_splits=5)

# * Choosing KNN with Z-Score as algorithm
algo = KNNWithZScore()

# * Train the algorithm on the trainset, and predict ratings for the testset
for trainset, testset in kf.split(data):
    predictions = algo.fit(trainset).test(testset)
    precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4)
    accuracy.rmse(predictions)
    accuracy.mae(predictions)
    accuracy.mse(predictions)
    accuracy.fcp(predictions)
    print("Precision: ",
          sum(prec for prec in precisions.values()) / len(precisions))
    print("Recall: ", sum(rec for rec in recalls.values()) / len(recalls))

df = pd.DataFrame(predictions, columns=["uid", "iid", "rui", "est", "details"])
df["err"] = abs(df.est - df.rui)
df.to_csv("predictions_KNNZScore.csv")
Ejemplo n.º 11
0
pred1 = algo1.predict(uid, iid, verbose=True)
#KNNWithMeans
algo2 = KNNWithMeans(k=30,
                     sim_options={
                         'name': 'cosine',
                         'user_based': False
                     },
                     verbose=True)
algo2.fit(trainset)
pred2 = algo2.predict(uid, iid, verbose=True)

#KNNWithZScore f
algo3 = KNNWithZScore(k=30,
                      sim_options={
                          'name': 'MSD',
                          'user_based': True
                      },
                      verbose=True)
algo3.fit(trainset)
pred3 = algo3.predict(uid, iid, verbose=True)
#KNNBaseline
algo4 = KNNBaseline(k=30,
                    sim_options={
                        'name': 'MSD',
                        'user_based': True
                    },
                    verbose=True)
algo4.fit(trainset)
pred4 = algo4.predict(uid, iid, verbose=True)
Ejemplo n.º 12
0
print(news.head)

# 数据读取
reader = Reader(line_format='user item rating timestamp',
                sep=',',
                skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()

from surprise import KNNWithZScore
algo = KNNWithZScore(k=50,
                     sim_options={
                         'user_based': False,
                         'verbose': 'True'
                     })
algo.fit(train_set)
uid = str(196)
iid = str(332)
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

kf = KFold(n_splits=3)
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    #计算RMSE,AME
    accuracy.rmse(predictions, verbose=True)
    accuracy.mae(predictions, verbose=True)

### 使用协同过滤正态分布 User based
from surprise import KNNWithZScore
algo = KNNWithZScore(k=50,
Ejemplo n.º 13
0
    We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40  
    We train the model on train set '''

algo2 = KNNBasic(sim_options=sim_options, k=40, min_k=1)
algo2.fit(trainset)

predictions2 = algo2.test(testset)
print("RMSE for KNNBasic:", accuracy.rmse(predictions2, verbose=True))

# In[ ]:
''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. 
    We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40  
    We train the model on train set '''

algo3 = KNNBaseline(sim_options=sim_options, k=40, min_k=1)
algo3.fit(trainset)

predictions3 = algo3.test(testset)
print("RMSE for KNNBaseline:", accuracy.rmse(predictions3, verbose=True))

# In[ ]:
''' We build the model by making use of KNNBasic which is collaborative filtering based algorithm. 
    We are setting minimum number of neighbous (min_k) 1 and maximum number of neighbours (k) = 40  
    We train the model on train set '''

algo4 = KNNWithZScore(sim_options=sim_options, k=40, min_k=1)
algo4.fit(trainset)

predictions4 = algo4.test(testset)
print("RMSE for KNNBasic:", accuracy.rmse(predictions4, verbose=True))
ratings = [i[2] for i in all_ratings]

ratings_dict = {
    "user"  : user_ids,
    "item"  : food_ids,
    "rating": ratings,
}

df = pd.DataFrame(ratings_dict)
reader = Reader(rating_scale=(1, 5))

# Loads Pandas dataframe
training_data = Dataset.load_from_df(df[["user", "item", "rating"]], reader)

trainingSet = training_data.build_full_trainset()
algo.fit(trainingSet)


"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
"                       GROUP RECOMMENDER                           "
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

# Function to retrieve the information for all the restaurants from the database
def get_all_restaurants():
    sql = """SELECT  RESTAURANT_ID, NAME, EMAIL, NUMBER, ADDRESS, RATING, OPENING_TIME, CLOSING_TIME FROM RESTAURANT"""
    mycursor.execute(sql)
    return mycursor.fetchall()

# Function to retrieve all the food items for a specific restaurant
def get_restaurant_foods(restaurant_id):
    parameters = (restaurant_id,)