Exemple #1
0
def step5_surprise2():
    data = surprise.Dataset.load_builtin('ml-100k')

    # user item rate



    # print(data)
    # print(data.raw_ratings)

    # 모든 컬럼에 있는 데이터를 가져와야함.
    # df = pd.DataFrame(data.raw_ratings, columns=['user', 'item', 'rate', 'id'])
    option1 = {'name': 'msd'}
    option2 = {'name': 'cosine'}
    option3 = {'name': 'pearson'}

    # 추천 목록을 만들기 위한 객체 생성
    # 본질을 흐리지 말것. 애매하게 하는게 제일 나쁘다.
    # 학습용 데이터를 생성한다.
    trainset = data.build_full_trainset()
    algo = surprise.KNNBasic(sim_options=option3)
    print('학습시작')
    algo.fit(trainset)

    # 추천 목록을 가져 온다.
    result = algo.get_neighbors(196, k=3)  # 추천 영화 목록
    print('result type:',type(result))
    for r1 in result:
        print(r1)
def step5_surprise2():
    # 사용할 영화 데이터 셋
    data = surprise.Dataset.load_builtin('ml-100k')
    # print(data)
    # print(data.raw_ratings)
    # df = pd.DataFrame(data.raw_ratings, columns=['user', 'item', 'rate', 'id'])
    # del df['id']
    # print(df)

    # 유사도 계산 방식을 설정
    # option1 = {'name': 'msd'}
    # option2 = {'name': 'cosine'}
    option3 = {'name': 'pearson'}

    # 추천 목록을 만들기 위한 객체 생성
    algo = surprise.KNNBasic(sim_options=option3)
    # 학습한다.
    trainset = data.build_full_trainset()
    algo.fit(trainset)

    # 추천 목록을 가져온다.
    # k == 추천받을 상품의 수
    result = algo.get_neighbors('196', k=5)

    for r1 in result:
        print(r1)
Exemple #3
0
def learn(id):
    print(id)
    dataset = getData()
    # 데이터 셋을 만든다.
    df = pd.DataFrame(dataset)
    # 데이터를 읽어와 surprise에서 사용하는 데이터 형태로
    # 만들어주는 객체, rating_sacle=(최소, 최대) <-평점기준
    reader = sp.Reader(rating_scale=(0.0, 5))
    # 딕셔너리에 담겨있는 데이터의 이름
    # 데이터셋을 만들 때 첫번재 이름이 사용자 구분값, 두번째
    # 이름이 상품 구분값, 세번째 이름이 평점으로 인식하여
    # 데이터를 읽어들이고 데이터셋으로 만든다.
    col_list = ['user_id', 'wine_id', 'points']

    data = sp.Dataset.load_from_df(df[col_list], reader)

    # 학습할 모델
    model = sp.KNNBasic(sim_options={'name': 'pearson'})
    # 학습한다.
    trainset = data.build_full_trainset()
    model.fit(trainset)
    result = model.get_neighbors(id, k=5)
    print(result)
    rec_list = list()
    for r in result:
        rec_list.append(str(dataset['wine_id'][r]))
        print(dataset['wine_id'][r])
    winelist = ','.join(rec_list)
    return winelist
def step5_surprise():
    name_list, movie_list, rating_dic = data_to_dic()
    # print(rating_dic)
    # 데이터 셋을 만든다.
    df = pd.DataFrame(rating_dic)
    # rating_scale : 데이터에 담긴 평점의 범위
    reader = surprise.Reader(rating_scale=(0.0, 5.0))
    
    # 딕셔너리에 담겨져 있는 리스트의 이름
    col_list = ['user_id', 'item_id', 'rating']
    data = surprise.Dataset.load_from_df(df[col_list], reader)

    trainset = data.build_full_trainset()

    # 학습한다.
    # 유사도 계산 방식을 설정
    # option1 = {'name': 'msd'}
    # option2 = {'name': 'cosine'}
    option3 = {'name': 'pearson'}

    # 추천 목록을 만들기 위한 객체 생성
    algo = surprise.KNNBasic(sim_options=option3)
    algo.fit(trainset)

    # 소이현에 대해 영화를 추천받는다.
    index = name_list.index('소이현')
    result = algo.get_neighbors(index, k=3)

    for r1 in result:
        # r1이 1부터 시작해서 -1을 해준다.
        print(movie_list[r1-1])
Exemple #5
0
def surprise_basicKNN(trainset, finalset):
    "Basic K Nearest Neighbours model"
    algo = spr.KNNBasic()

    algo.fit(trainset)
    predictions_final = algo.test(finalset)

    return spr_estimate_to_vect(predictions_final)
Exemple #6
0
def train(where, k):
    # df_to_dict = recur_dictify(pd.read_pickle('../../../data/over_10review_stores.pkl'))
    # store_list = []  # 사용자 목록을 담을 리스트
    # user_set = set()  # 음식점 목록을 담을 set
    #
    # # store 수 만큼 반복
    # for store_key in df_to_dict:
    #     store_list.append(store_key)
    #
    #     for user_key in df_to_dict[store_key]:
    #         user_set.add(user_key)
    #
    # user_list = list(user_set)

    df = pd.read_pickle("../../data/dic_to_train_stores.pkl")
    reader = surprise.Reader(rating_scale=(1, 5))

    col_list = ['store_id', 'user_id', 'score']
    data = surprise.Dataset.load_from_df(df[col_list], reader)
    # Train
    trainset = data.build_full_trainset()
    option = {'name': 'pearson'}
    algo = surprise.KNNBasic(sim_options=option)

    algo.fit(trainset)

    # 사용자의 음식점을 추천한다.
    # where = input('store id : ')
    print("\n")

    user_list = pd.read_pickle(
        "../../data/Item_based_user_list.pkl")[0].tolist()
    store_list = pd.read_pickle(
        "../../data/Item_based_store_list.pkl")[0].tolist()
    # user_list = dff.user.unique().tolist()
    # store_list = dff.store.unique().tolist()

    index = store_list.index(int(where))
    print('store_idx : ', index)
    print("\n")

    result = algo.get_neighbors(index, k=k)  # k=10
    print(where, "와 유사한 음식점은?")
    print(result)
    print("\n")

    # 음식점에 대한 유저를 추천한다.
    print(where, "를 평가한 당신에게 추천하는 친구:", "\n")
    recommend_user_list = []
    for r1 in result:
        max_rating = data.df[data.df["store_id"] == r1]["score"].max()
        user_id = data.df[(data.df["score"] == max_rating)
                          & (data.df["store_id"] == r1)]["user_id"].values

        for user in user_id:
            recommend_user_list.append(user_list[user])
            # print(user_list[user])
    return recommend_user_list
Exemple #7
0
def train(dataframe, k):
    # df_to_dict = recur_dictify(pd.read_pickle('../data/over_10review_peoples.pkl'))
    # name_list = []  # 사용자 목록을 담을 리스트
    # store_set = set()  # 음식점 목록을 담을 set
    #
    # # 유저 수 만큼 반복
    # for user_key in df_to_dict:
    #     name_list.append(user_key)
    #
    #     for sto_key in df_to_dict[user_key]:
    #         store_set.add(sto_key)
    #
    # store_list = list(store_set)

    df = dataframe
    reader = surprise.Reader(rating_scale=(1, 5))

    col_list = ['user_id', 'store_id', 'score']
    data = surprise.Dataset.load_from_df(df[col_list], reader)
    # Train
    trainset = data.build_full_trainset()
    option = {'name': 'pearson'}
    algo = surprise.KNNBasic(sim_options=option)

    algo.fit(trainset)
    user_id = input('유저 id:')
    # 사용자의 음식점을 추천한다.
    who = user_id
    print("\n")

    name_list = pd.read_pickle("../data/user_based_name_list.pkl")[0].tolist()
    store_list = pd.read_pickle(
        "../data/user_based_store_list.pkl")[0].tolist()
    # name_list = dff.user.unique().tolist()
    # store_list = dff.store.unique().tolist()

    index = name_list.index(int(who))
    print('user_idx : ', index)
    print("\n")

    result = algo.get_neighbors(index, k=k)  # k=5
    print(who, "에게 유사한 사용자는?")
    print(result)
    print("\n")

    # user 에 대해 음식점을 추천한다.
    print(who, "에게 추천하는 음식점:", "\n")

    for r1 in result:
        max_rating = data.df[data.df["user_id"] == r1]["score"].max()
        sto_id = data.df[(data.df["score"] == max_rating)
                         & (data.df["user_id"] == r1)]["store_id"].values

        for sto in sto_id:
            print(store_list[sto])
Exemple #8
0
    def trainModels(self):
        #when importing from a DF, need to specify the scale of the ratings in order to get best performance
        reader = surprise.Reader(rating_scale=(self.scale_low,
                                               self.scale_high))
        data = surprise.Dataset.load_from_df(self.rawdata, reader)

        trainset = data.build_full_trainset()
        testset = trainset.build_anti_testset()

        self.rmse = []
        self.predictions = []

        print("=== Training with Collaborative KNN ===")
        sim_options = {
            'name': 'cosine',
            'user_based': False
        }  # compute similarities between items
        self.cKNN = surprise.KNNBasic(k=40, sim_options=sim_options)
        self.cKNN.fit(trainset)
        self.predictions.append(self.cKNN.test(testset))
        self.rmse.append(
            surprise.accuracy.rmse(self.predictions[0], verbose=True))
        minR = self.rmse[0]
        self.algoIndex = 0

        print("=== Matrix Factorization ===")
        self.SVD = surprise.prediction_algorithms.matrix_factorization.SVD(
            n_factors=30, n_epochs=10, biased=True)
        self.SVD.fit(trainset)
        self.predictions.append(self.SVD.test(testset))
        self.rmse.append(
            surprise.accuracy.rmse(self.predictions[1], verbose=True))
        if (minR > self.rmse[1]):
            self.algoIndex = 1
            minR = self.rmse[1]

        print("=== Co-clustering ===")
        self.Co = surprise.prediction_algorithms.co_clustering.CoClustering(
            n_cltr_u=4, n_cltr_i=4, n_epochs=25)
        self.Co.fit(trainset)
        self.predictions.append(self.Co.test(testset))
        self.rmse.append(
            surprise.accuracy.rmse(self.predictions[2], verbose=True))
        if (minR > self.rmse[2]):
            self.algoIndex = 2
            minR = self.rmse[2]

        print("=== Slope One Collaborative Filtering ===")
        self.slope = surprise.prediction_algorithms.slope_one.SlopeOne()
        self.slope.fit(trainset)
        self.predictions.append(self.slope.test(testset))
        self.rmse.append(
            surprise.accuracy.rmse(self.predictions[3], verbose=True))
        if (minR > self.rmse[3]):
            self.algoIndex = 3
Exemple #9
0
def train(who, k):

    df = pd.read_pickle("../data/dic_to_train.pkl")
    reader = surprise.Reader(rating_scale=(1, 5))
    col_list = ['user_id', 'store_id', 'score']
    data = surprise.Dataset.load_from_df(df[col_list], reader)

    # benchmark = []
    # from surprise import SVD, SVDpp, SlopeOne, NMF, NormalPredictor, KNNBasic, KNNBaseline, KNNWithMeans, KNNWithZScore, BaselineOnly, CoClustering
    # # from sklearn.model_selection import cross_validate 사이킷런의 크로스벨리데이션이 아니다.
    # from surprise.model_selection import cross_validate

    # benchmark = []
    # # 모든 알고리즘을 literate화 시켜서 반복문을 실행시킨다.
    # for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:

    #     # 교차검증을 수행하는 단계.
    #     results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)

    #     # 결과 저장과 알고리즘 이름 추가.
    #     tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    #     tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    #     benchmark.append(tmp)

    # print(pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse') )

    # Train
    trainset = data.build_full_trainset()
    option = {'name': 'pearson'}

    algo = surprise.KNNBasic(sim_options=option)

    algo.fit(trainset)

    name_list = pd.read_pickle("../data/user_based_name_list.pkl")[0].tolist()
    store_list = pd.read_pickle(
        "../data/user_based_store_list.pkl")[0].tolist()
    # name_list = dff.user.unique().tolist()
    # store_list = dff.store.unique().tolist()

    index = name_list.index(int(who))

    neighbors = algo.get_neighbors(index, k=k)  # k=5

    # Recommend store to user
    recommend_store_list = []

    for i in neighbors:
        max_rating = data.df[data.df["user_id"] == i]["score"].max()

        store = data.df[(data.df["score"] == max_rating)
                        & (data.df["user_id"] == i)]["store_id"].values
        for idx in store:
            recommend_store_list.append(store_list[idx])
    return recommend_store_list
def basicKNN(train, test):
    """
    Run the basic KNN model from Surprise library.
    @param train: the training set in the Surprise format.
    @param test: the test set in the Surprise format.
    @return: the predictions in a numpy array.
    """
    algo = spr.KNNBasic()
    algo.fit(train)
    predictions = algo.test(test)
    return get_predictions(predictions)
Exemple #11
0
def main(args):

    user_item_based = 'item_based' if args.item_based else 'user_based'
    filename = '_'.join([
        args.exp_name, args.algorithm, args.sim_name, user_item_based,
        str(args.num_rows)
    ]) + '.pkl'

    output_file = Path(filename)
    if output_file.exists():
        print(f'ERROR! Output file {output_file} already exists. Exiting!')
        sys.exit(1)

    print(f'Saving scores in {output_file}\n')

    reader = surprise.Reader(rating_scale=(1, 5))
    df = pq.read_table('all_ratings_with_indices.parquet',
                       columns=['user_idx', 'movie_idx',
                                'rating']).to_pandas()
    df.user_idx = df.user_idx.astype(np.uint32)
    df.movie_idx = df.movie_idx.astype(np.uint16)
    df.rating = df.rating.astype(np.uint8)
    print(df.dtypes)
    data = surprise.Dataset.load_from_df(df[:args.num_rows], reader=reader)
    del df
    sim_options = {
        'name': args.sim_name,
        'user_based': False if args.item_based else True
    }

    if args.algorithm == 'knn':
        algo = surprise.KNNBasic(sim_options=sim_options)
    elif args.algorithm == 'baseline':
        algo = surprise.BaselineOnly()
    elif args.algorithm == 'normal':
        algo = surprise.NormalPredictor()
    elif args.algorithm == 'knn_zscore':
        algo = surprise.KNNWithZScore(sim_options=sim_options)
    elif args.algorithm == 'svd':
        algo = surprise.SVD()
    elif args.algorithm == 'nmf':
        algo = surprise.NMF()
    else:
        print(f'Algorithm {args.algorithm} is not a valid choice.')

    scores = surprise.model_selection.cross_validate(algo,
                                                     data,
                                                     cv=args.cv_folds,
                                                     verbose=True,
                                                     n_jobs=-1)

    pickle.dump(scores, open(output_file, 'wb'))
Exemple #12
0
def algo_tester(data_object):
    '''
  Produces a dataframe displaying all the different RMSE's, test & train times of the different surprise algorithms

  ---Parameters---
  data_object(variable) created from the read_data_surprise function

  ---Returns---
  returns a dataframe where you can compare the performance of different algorithms
  '''
    benchmark = []
    algos = [
        sp.SVDpp(),
        sp.SVD(),
        sp.SlopeOne(),
        sp.NMF(),
        sp.NormalPredictor(),
        sp.KNNBaseline(),
        sp.KNNBasic(),
        sp.KNNWithMeans(),
        sp.KNNWithZScore(),
        sp.BaselineOnly(),
        sp.CoClustering()
    ]

    # Iterate over all algorithms
    for algorithm in algos:
        # Perform cross validation
        results = cross_validate(algorithm,
                                 data_object,
                                 measures=['RMSE'],
                                 cv=3,
                                 verbose=False)

        # Get results & append algorithm name
        tmp = pd.DataFrame.from_dict(results).mean(axis=0)
        tmp = tmp.append(
            pd.Series([str(algorithm).split(' ')[0].split('.')[-1]],
                      index=['Algorithm']))
        benchmark.append(tmp)

    benchmark = pd.DataFrame(benchmark).set_index('Algorithm').sort_values(
        'test_rmse')
    return benchmark
Exemple #13
0
    def run(self):
        df = pd.read_csv(self.input_csv, delimiter=";")

        lower_rating = df['score'].min()
        upper_rating = df['score'].max()

        print('Score range: {0} to {1}'.format(lower_rating, upper_rating))

        reader = surprise.Reader(rating_scale=(df['score'].min(),
                                               df['score'].max()))
        data = surprise.Dataset.load_from_df(df, reader)
        trainset = data.build_full_trainset()

        chosen_k = math.ceil(math.sqrt(len(df['userId'].unique())) + 1)
        algo = surprise.KNNBasic(k=chosen_k,
                                 sim_options={
                                     'name': 'pearson_baseline',
                                     'user_based': True
                                 })
        algo.fit(trainset)
        testset = trainset.build_anti_testset()
        predictions = algo.test(testset)

        del df
        del reader

        top_n = self.get_top_n(predictions, n=10)

        df_top_rated = pd.DataFrame(columns=['userId', 'itemId', 'est'])

        for uid, user_ratings in top_n.items():
            for iid, est in user_ratings:
                df_top_rated.loc[len(df_top_rated)] = [uid, iid, est]

        df_top_rated.to_csv(self.output_csv,
                            sep=';',
                            encoding='utf-8',
                            index=False)

        return df_top_rated


#validate = surprise.model_selection.cross_validate(alg, data, verbose = True)
Exemple #14
0
    def __init__(self):

        warnings.filterwarnings('ignore')
        with open('./LogDic.pickle', 'rb') as f:
            df_to_dict = pickle.load(f)
        print("----------Let's dictionary----------", df_to_dict)
        cos_set = set()
        self.name_list = []
        for user_key in df_to_dict:
            self.name_list.append(user_key)

            for cos_key in df_to_dict[user_key]:
                cos_set.add(cos_key)

        self.cos_list = list(cos_set)

        rating_dic = {'Nickname': [], 'ProductIdx': [], 'rating': []}

        for name_key in df_to_dict:
            for cos_key in df_to_dict[name_key]:
                a1 = self.name_list.index(name_key)

                a2 = self.cos_list.index(cos_key)
                a3 = df_to_dict[name_key][cos_key]

                rating_dic['Nickname'].append(a1)
                rating_dic['ProductIdx'].append(a2)
                rating_dic['rating'].append(a3)

        df = pd.DataFrame(rating_dic)

        reader = surprise.Reader(rating_scale=(1, 5))

        col_list = ['Nickname', 'ProductIdx', 'rating']
        self.data = surprise.Dataset.load_from_df(df[col_list], reader)
        # print("----------Let's training----------")
        trainset = self.data.build_full_trainset()
        option = {'name': 'pearson'}
        self.algo = surprise.KNNBasic(sim_options=option)
        self.algo.fit(trainset)
    def model_fit(self):
        '''
        Train model using surprise.SVD algorithm. 
        '''
        self.build_trainset()
        algo = self._algo_choise
        if algo == 'SVD':
            self.algorithm = surprise.SVD()
        elif algo == 'Baseline':
            self.algorithm = surprise.BaselineOnly()
        elif algo == 'SlopeOne':
            self.algorithm = surprise.SlopeOne()
        elif algo == 'CoClustering':
            self.algorithm = surprise.CoClustering()
        else:
            self.algorithm = surprise.KNNBasic()

        print('Training Recommender System using %s...' % algo)

        self.algorithm.fit(self.trainset)
        self.ratings_changed = False
        print('Done')
Exemple #16
0
def step5_surprise():
    name_list, movie_list, rating_dic = data_to_dic()
    print(rating_dic)
    # 데이터 셋을 만든다.
    df = pd.DataFrame(rating_dic)
    # rating_scale : 데이터에 담긴 평점이 범위
    reader = surprise.Reader(rating_scale=(0.0, 5.0)) # 0~5점의 범위를 가지고 있다.
    # print(rating_dic.keys())
    col_list = [key for key in rating_dic.keys()]
    data = surprise.Dataset.load_from_df(df[col_list],reader)
    print(data)

    trainset = data.build_full_trainset()
    option1 = {'name':'pearson'}
    algo = surprise.KNNBasic(sim_options=option1)
    algo.fit(trainset)
    
    # 소이현에 대해 영화를 추천 받는다.
    index = name_list.index('소이현')
    result = algo.get_neighbors(index, k=3) #iid 자리 -> 대상 인간

    for r1 in result:
        print(movie_list[r1-1]) # r1이 1번부터 시작하나보네
def step5_surprise() :
    # 데이터를 가져온다.
    name_list, movie_list, rating_dic = data_to_dic()
    # print(rating_dic)
    # 데이터 셋을 만든다.
    df = pd.DataFrame(rating_dic)
    # rating_scale : 데이터에 담긴 평점의 범위
    reader = surprise.Reader(rating_scale=(0.0, 5.0))
    # 딕셔너리에 담겨져 있는 리스트의 이름
    col_list = ['user_id', 'item_id', 'rating']
    data = surprise.Dataset.load_from_df(df[col_list], reader)
    # 학습 한다.
    trainset = data.build_full_trainset()
    option1 = {'name' : 'pearson'}
    algo = surprise.KNNBasic(sim_options=option1)
    algo.fit(trainset)

    # 소이현에 대해 영화를 추천받는다.
    index = name_list.index('소이현')
    result = algo.get_neighbors(index, k=3)

    for r1 in result :
        print(movie_list[r1 - 1])
    'user_based': False  # compute  similarities between items
}
mean_ap = []
precision = []
recall = []
fscore = []
normalized_DCG = []
mean_ap_train = []
precision_train = []
recall_train = []
fscore_train = []
normalized_DCG_train = []

for k_val in ks:
    print(k_val)
    algo = surprise.KNNBasic(k=k_val, sim_options=sim_options)
    pr = 0
    re = 0
    fs = 0
    ap = 0
    nd = 0
    pr_train = 0
    re_train = 0
    fs_train = 0
    ap_train = 0
    nd_train = 0
    for trainset, testset in data.folds():
        algo.train(trainset)
        predictions_on_test = algo.test(testset)

        precisions_test, recalls_test = precision_recall_at_k(
Exemple #19
0
import surprise as sp
from surprise import Dataset
from surprise.model_selection import cross_validate
import NetflixDataLoad

#for 100000 rows for fast processing
data = Dataset.load_from_df(
    NetflixDataLoad.df_filterd[['Cust_Id', 'Movie_Id', 'Rating']][:100000])

n_folds = 5

for algo in [sp.SVD(), sp.SVDpp(), sp.KNNBasic(), sp.KNNWithMeans()]:
    print(
        cross_validate(algo,
                       data,
                       measures=['RMSE', 'MAE'],
                       cv=n_folds,
                       verbose=True))

# Output Example
# Evaluating RMSE, MAE of algorithm SVD on 5 split(s).
#
#             Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std
# RMSE        0.9311  0.9370  0.9320  0.9317  0.9391  0.9342  0.0032
# MAE         0.7350  0.7375  0.7341  0.7342  0.7375  0.7357  0.0015
# Fit time    6.53    7.11    7.23    7.15    3.99    6.40    1.23
# Test time   0.26    0.26    0.25    0.15    0.13    0.21    0.06
    return results


np.random.seed(0)
file_path = 'data/user_artists_log.dat'
reader = Reader(line_format='user item rating', sep='\t')
data = Dataset.load_from_file(file_path, reader=reader)

trainset = data.build_full_trainset()
testset = trainset.build_anti_testset()

# 2 - User-based Recommendation
uid_list = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
# TODO - 2-1-1. KNNBasic, cosine
sim_options = {'name': 'cosine'}
algo = surprise.KNNBasic(sim_options=sim_options)
algo.fit(trainset)
results = get_top_n(algo, testset, uid_list, n=10, user_based=True)
with open('2-1-1_results.txt', 'w') as f:
    for uid, ratings in sorted(results.items(), key=lambda x: int(x[0])):
        f.write('User ID %s top-10 results\n' % uid)
        for iid, score in ratings:
            f.write('Item ID %s\tscore %s\n' % (iid, str(score)))
        f.write('\n')

# TODO - 2-1-2. KNNWithMeans, pearson
sim_options2 = {'name': 'pearson'}
algo = surprise.KNNWithMeans(sim_options=sim_options2)
algo.fit(trainset)
results = get_top_n(algo, testset, uid_list, n=10, user_based=True)
with open('2-1-2_results.txt', 'w') as f:
Exemple #21
0
def KNN_train(BASE_DIR):
    print('KNN 학습 시작 : ', str(datetime.now())[10:19])
    conn = pymysql.connect(host=config('HOST'),
                           port=3306,
                           user=config('USER'),
                           password=config('PASSWORD'),
                           db=config('DB'))
    sql = 'SELECT * FROM wouldyouci.accounts_rating'
    data = pd.read_sql_query(sql, conn)
    conn.close()
    df = data[['user_id', 'movie_id', 'score']]

    # 리뷰 n개 이상 달린 영화
    # 5개 이상 달린 영화
    n1 = 5
    filter_movies = df['movie_id'].value_counts() >= n1
    filter_movies = filter_movies[filter_movies].index.tolist()

    # n개 이상 평가한 유저
    n2 = 5
    filter_users = df['user_id'].value_counts() >= n2
    filter_users = filter_users[filter_users].index.tolist()

    df_new = df[df['movie_id'].isin(filter_movies)
                & df['user_id'].isin(filter_users)]

    df_to_dict = recur_dictify(df_new)

    user_list = []
    movie_set = set()
    # 유저 수 만큼 반복한다
    for user in df_to_dict:
        user_list.append(user)

        # 현재 사용자가 본 영화 목록을 set에 담는다.
        for movie in df_to_dict[user]:
            movie_set.add(movie)

    movie_list = list(movie_set)

    # 학습할 데이터를 준비한다.
    rating_dic = {'user_id': [], 'movie_id': [], 'score': []}

    # 유저 수 만큼 반복
    for user in df_to_dict:
        # 해당 유저가 본 영화 수 만큼 반복
        for movie in df_to_dict[user]:
            # 유저 인덱스 번호를 추출
            u_index = user_list.index(user)

            # 영화 인덱스 번호를 추출
            m_index = movie_list.index(movie)

            # 평점을 가져온다
            score = df_to_dict[user][movie]

            # 딕셔너리에 담는다
            rating_dic['user_id'].append(u_index)
            rating_dic['movie_id'].append(m_index)
            rating_dic['score'].append(score)

    # 데이터셋 만들기
    df = pd.DataFrame(rating_dic)

    # 학습
    reader = surprise.Reader(rating_scale=(0.5, 5.0))

    # surprise에서 사용할 데이터셋을 구성할 때 필요한 이름
    # 데이터가 저장되어 있는 딕셔너리의 컬럼 이름
    col_list = ['user_id', 'movie_id', 'score']
    data = surprise.Dataset.load_from_df(df_new[col_list], reader)

    # 학습한다
    trainset = data.build_full_trainset()
    # Pearson similarity 사용
    option = {'name': 'pearson'}
    algo = surprise.KNNBasic(sim_options=option)
    algo.fit(trainset)

    recommand_dic = {
        'user_id': [],
        'movie_id': [],
    }

    for user_key in df_new['user_id'].unique():
        index = user_list.index(user_key)
        result = algo.get_neighbors(index, k=5)
        recom_set = set()
        for i in result:
            max_rating = data.df[data.df['user_id'] ==
                                 user_list[i]]['score'].max()
            recom_movies = data.df[(data.df['score'] == max_rating) & (
                data.df['user_id'] == user_list[i])]['movie_id'].values
            for item in recom_movies:
                recom_set.add(item)

            for item in recom_set:
                recommand_dic['user_id'].append(user_key)
                recommand_dic['movie_id'].append(item)

    pickle = pd.DataFrame(recommand_dic)

    path = os.path.join(BASE_DIR, 'KNN.p')
    pd.to_pickle(pickle, path)

    print('종료 : ', str(datetime.now())[10:19])
y = []
y_plot = []
counter = 0
x_plot = []
time_plot = []
mem_plot = []

for i in range(1, 10):


    reader = surprise.Reader(name=None, line_format='user item rating', sep=',', skip_lines=1)
    data = surprise.Dataset.load_from_file('/Users/keyadesai/Desktop/Recommendation Engine/ratings_woheader.csv',
                                           reader=reader)
    # data.split(5)

    algo = surprise.KNNBasic()
    param_grid = {'n_epochs': [i], 'lr_all': [0.005],
                  'reg_all': [0.02]}
    
    gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
    gs.fit(data)
    results_df = pd.DataFrame.from_dict(gs.cv_results)

    y.append(results_df.mean_test_rmse)
    y_plot.append(y[counter][0])
    x_plot.append(i)



print(y_plot)
plt.plot(x_plot, y_plot, 'ro')
          epochs=2,
          validation_split=0.1,
          shuffle=True)

y_pred = model.predict([df_hybrid_test['User'], df_hybrid_test['Movie'], test_tfidf])
y_true = df_hybrid_test['Rating'].values

rmse = np.sqrt(mean_squared_error(y_pred=y_pred, y_true=y_true))
print('\n\nTesting Result With Keras Hybrid Deep Learning: {:.4f} RMSE'.format(rmse))

# Load dataset into surprise specific data-structure
data = sp.Dataset.load_from_df(df_filterd[['User', 'Movie', 'Rating']].sample(20000), sp.Reader())

benchmark = []
# Iterate over all algorithms
for algorithm in [sp.SVD(), sp.SVDpp(), sp.SlopeOne(), sp.NMF(), sp.NormalPredictor(), sp.KNNBaseline(), sp.KNNBasic(), sp.KNNWithMeans(), sp.KNNWithZScore(), sp.BaselineOnly(), sp.CoClustering()]:
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE', 'MAE'], cv=3, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    
    # Store data
    benchmark.append(tmp)
    
    # Store results
surprise_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse', ascending=False)

# Get data
data = surprise_results[['test_rmse', 'test_mae']]
Exemple #24
0
    def get_queryset(self):
        # user_reviews = pd.DataFrame(list(models.Review.objects.all().values()))
        # df = user_reviews[['userid','storeid','bornyear']].head(100)
        # userid = self.request.query_params.get("user", "")
        # if  userid is not None:
        user_reviews = pd.DataFrame(list(models.Review.objects.all().values()))
        df = user_reviews[['userid', 'storeid', 'bornyear']].head(1000)

        df_to_dict = recur_dictify(df)
        print(df_to_dict)
        name_list = []  # 사용자 목록을 담을 리스트 # 중복불가
        cos_set = set()  # 맛집 목록을 담을 set # 중복 가능

        # user_key 는 음식점 id 가 나온다

        for user_key in df_to_dict:

            name_list.append(user_key)
            # name_list [1070][1070, 6757][1070, 6757, 8272]
            for cos_key in df_to_dict[user_key]:
                # cost_key = 음식점 id 가 나온다 (216, 58, 149)
                cos_set.add(cos_key)
                # cost_set = {216}{216, 58}
                # for user_score in a[score]:
                # user_gender.append[user_score]

                # 학습할 데이터를 준비
        rating_dic = {
            'user': [],
            'store': [],
            'born_year': []
            # 'gender': [],
            # 'bornyear': []
        }
        cos_list = list(cos_set)

        # 사용자의 수 만큼 반복
        for name_key in df_to_dict:
            # 해당 사용자가 본 맛집 수만큼 반복
            for cos_key in df_to_dict[name_key]:
                # 사용자 인덱스 번호를 추출한다
                a1 = name_list.index(name_key)
                # 맛집 인덱스 번호를 추출한다.
                a2 = cos_list.index(cos_key)
                # 나이를 가져온다.
                a3 = df_to_dict[name_key][cos_key]

                rating_dic['user'].append(a1)
                rating_dic['store'].append(a2)
                rating_dic['born_year'].append(a3)
                # rating_dic['gender'].append(a4)
                # rating_dic['bornyear'].append(a5)

                # print((rating_dic['user']))
                # print((rating_dic['store_name']))
                # print(len(rating_dic['score']))

        df = pd.DataFrame(rating_dic)

        reader = surprise.Reader(rating_scale=(1, 5))

        cos_list2 = ['user', 'store', 'born_year']

        data = surprise.Dataset.load_from_df(df[cos_list2], reader)

        trainset = data.build_full_trainset()
        option = {'name': 'pearson'}
        algo = surprise.KNNBasic(sim_options=option)
        algo.fit(trainset)

        index = name_list.index(1070)

        print('user_index: ', index)
        print("\n")
        result = algo.get_neighbors(index, k=5)
        print("당신과 유사한 사용자? :", result)
        print("\n")

        print("당신에게 추천할만한 맛집 :", "\n")

        a = []
        for r1 in result:

            max_rating = data.df[data.df["user"] == r1]["born_year"].max()
            cos_id = data.df[(data.df["born_year"] == max_rating)
                             & (data.df["user"] == r1)]["store"].values
            for cos_item in cos_id:

                a.append(cos_list[cos_item])

                # item_list = cos_list[a]
        print(a)
        queryset = models.Store.objects.all().filter(id=a[0])
        print(a[0])
        for i in a:
            select = models.Store.objects.all().filter(id=i)
            print(i)
            queryset = queryset | select

        print(queryset)
        return queryset
Exemple #25
0
rating_matrix = df.pivot_table(index="userId", columns="movieId", values="rating")
sparse_matrix = sparse.csr_matrix(rating_matrix)
'''
movies = pd.read_csv("exportedData/moviesExp.csv", usecols=["title","movieId"])
ratings = pd.read_csv("exportedData/ratingsExp.csv")
evalData = pd.merge(movies, ratings, on="movieId", how="inner")
df = pd.read_table("exportedData/ratingsExp.csv", sep= ',').drop("timestamp", axis=1)
df.head()
recoms = pd.DataFrame()
reader = surprise.Reader(rating_scale=(0.5,5.0))
data = surprise.Dataset.load_from_df(df, reader)

trainset, testset = train_test_split(data, test_size=0.25)

#alg = surprise.SVD()
alg = surprise.KNNBasic()
out = alg.fit(data.build_full_trainset())

def recom(uid, recomms_count):
    movieIds = df["movieId"].unique()
    rated_movies = df.loc[df["userId"] == uid, "movieId"]
    iid_to_pred = np.setdiff1d(movieIds, rated_movies)

    test_data = [[uid, iid, 5.0] for iid in iid_to_pred]
    predictions = alg.test(test_data)

    pred_ratings = np.array([pred.est for pred in predictions])
    indice_max = np.argpartition(pred_ratings, -recomms_count)[-recomms_count:]
    iid = iid_to_pred[indice_max]

    iid_to_title = [i for i in range(0, recomms_count)]
Exemple #26
0
                                                   measures=['RMSE', 'MAE'],
                                                   cv=5,
                                                   verbose=True)
print('SVD--------------')
print(svd_temp)
normalPredictor = surprise.NormalPredictor()
normalPredictor_temp = surprise.model_selection.cross_validate(
    normalPredictor, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
print('normalPredictor--------------')
print(normalPredictor_temp)
baselineOnly = surprise.BaselineOnly()
baselineOnly_temp = surprise.model_selection.cross_validate(
    baselineOnly, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
print('baselineOnly-----------------')
print(baselineOnly_temp)
knnBasic = surprise.KNNBasic()
knnBasic_temp = surprise.model_selection.cross_validate(
    knnBasic, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
print('knnBasic-----------------')
print(knnBasic_temp)
knnWithMeans = surprise.KNNWithMeans()
knnWithMeans_temp = surprise.model_selection.cross_validate(
    knnWithMeans, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
print('knnWithMeans-----------------')
print(knnWithMeans_temp)
knnBaseline = surprise.KNNBaseline()
knnBaseline_temp = surprise.model_selection.cross_validate(
    knnBaseline, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
print('knnBaseline-----------------')
print(knnBaseline_temp)
svdpp = surprise.SVDpp()
Exemple #27
0
        
        idcgs[uid] = sum(rel_true/discount_true)
        
    dcg = sum(dcgu for (_,dcgu) in dcgs.items())
    idcg = sum(idcgu for (_,idcgu) in idcgs.items())
    return dcg/idcg

data = pd.read_csv('sampled.csv')
print "Users: "+str(len(np.unique(data['User-ID'])))+ " items: "+str(len(np.unique(data['ISBN'])))
print "No. of ratings: "+str(len(data))

sim_options = {'name': 'pearson',
               'user_based': False
               }

algo_knn = surprise.KNNBasic(k=5, sim_options=sim_options)
algo_svd = surprise.SVD(n_factors = 10, lr_all= 0.001, reg_all =1)

#Around 80% train data for each of these splits
sample_sizes = [0.4, 0.2, 0.1,0.05, 0.01]


time_knn = []
time_svd = []
for s in sample_sizes:
    a = data.sample(frac = s, random_state = 111)
    print "s= "+str(len(a))
    
    print("Removing users with less than 20 ratings....")
    b = a.groupby('User-ID').filter(lambda x: len(x) >= 20)
    densityu = (float(len(b))/(len(np.unique(b['User-ID']))*len(np.unique(b['ISBN']))))*100
sparsityNew
img2 = plt.spy(imgpivotNew, markersize=0.1)

####################################################################################################

data = surprise.Dataset.load_from_df(df_ratings, reader)
user_based = {"name": "cosine", "user_based": True}
item_based = {"name": "cosine", "user_based": False}

#############
# User Based
#############

trainset = data.build_full_trainset()
# Using KNNBasic algorithm
alg = surprise.KNNBasic(sim_options=user_based)
# Training model
alg.fit(trainset)


def recom(uid, recomms_count):
    movieIds = df["movieId"].unique()
    rated_movies = df.loc[df["userId"] == uid, "movieId"]
    iid_to_pred = np.setdiff1d(movieIds, rated_movies)

    test_data = [[uid, iid, 5.0] for iid in iid_to_pred]
    predictions = alg.test(test_data)

    pred_ratings = np.array([pred.est for pred in predictions])
    indice_max = np.argpartition(pred_ratings, -recomms_count)[-recomms_count:]
    iid = iid_to_pred[indice_max]
Exemple #29
0
def main(train_df, target_df, cache_name="test", force_recompute=[]):
    """Train multiple models on train_df and predicts target_df

    Predictions are cached. If the indices don't match the indices of
    target_df, the cache is discarded.

    By default, if a method was already computed it is not recomputed again
    (except if the method name is listed in force_recompute). cache_name
    is the name to use to read and write the cache.

    Arguments:
        train_df {dataframe} -- Training dataframe
        target_df {dataframe} -- Testing dataframe

    Keyword Arguments:
        cache_name {str} -- Name to use for caching (default: {"test"})
        force_recompute {list} -- Name(s) of methods to recompute, whether or
        not it was already computed. Useful to only recompute single methods
        without discarding the rest. (default: {[]})

    Returns:
        Dataframe -- Dataframe with predictions for each methods as columns,
        IDs as indices
    """
    global algo_in_use
    CACHED_DF_FILENAME = os.path.dirname(
        os.path.abspath(__file__)) +\
        "/cache/cached_predictions_{}.pkl".format(cache_name)
    train_df = preprocess_df(train_df)
    trainset = pandas_to_data(train_df)
    ids_to_predict = target_df["Id"].to_list()

    # try to retrieve backup dataframe
    try:
        print("Retrieving cached predictions")
        all_algos_preds_df = pd.read_pickle(CACHED_DF_FILENAME)
        print("Ensuring cached IDs match given IDs")
        assert sorted(ids_to_predict) == sorted(
            all_algos_preds_df.index.values)
        print("Indices match, continuing")
    except (FileNotFoundError, AssertionError):
        print("No valid cached predictions found")
        all_algos_preds_df = pd.DataFrame(ids_to_predict, columns=["Id"])
        all_algos_preds_df.set_index("Id", inplace=True)

    all_algos = {
        "SVD": spr.SVD(n_factors=200, n_epochs=100),
        "Baseline": spr.BaselineOnly(),
        "NMF": spr.NMF(n_factors=30, n_epochs=100),
        "Slope One": spr.SlopeOne(),
        "KNN Basic": spr.KNNBasic(k=60),
        "KNN Means": spr.KNNWithMeans(k=60),
        "KNN Baseline": spr.KNNBaseline(),
        "KNN Zscore": spr.KNNWithZScore(k=60),
        "SVD ++": spr.SVDpp(n_factors=40, n_epochs=100),
        "Co Clustering": spr.CoClustering()
    }

    for name in all_algos:
        print("##### {} ####".format(name))
        if name in force_recompute and name in all_algos_preds_df.columns:
            all_algos_preds_df.drop(name, axis=1, inplace=True)
        if name in all_algos_preds_df.columns:
            print("Already computed {}, skipping".format(name))
            continue
        algo = all_algos[name]
        time.sleep(1)
        algo.fit(trainset)
        time.sleep(1)
        algo_in_use = algo
        print("Generating predictions...")
        predictions = parallelize_predictions(ids_to_predict, 80)
        print("Done. Merging with previous results")
        this_algo_preds_df = pd.DataFrame(predictions, columns=["Id", name])
        this_algo_preds_df.set_index("Id", inplace=True)
        all_algos_preds_df = pd.merge(all_algos_preds_df,
                                      this_algo_preds_df,
                                      left_index=True,
                                      right_index=True)
        all_algos_preds_df.to_pickle(CACHED_DF_FILENAME)
    print("DONE computing surprize")
    return all_algos_preds_df
Exemple #30
0
def recommend_new(request):
    test = Test()
    test.name = request.POST.get('name')
    test.local = request.POST.get('local')
    test.rating = request.POST.get('rating')
    test.save()
    if request.method == 'POST':
        if (request.POST.get('q1') == '1'):
            if (request.POST.get('q2') == '1'):
                if (request.POST.get('q3') == '1'):
                    f = open('뚜벅혼자관광.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('뚜벅혼자관광.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

                elif (request.POST.get('q3') == '2'):
                    f = open('뚜벅혼자휴양.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('뚜벅혼자휴양.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

            elif (request.POST.get('q2') == '2'):
                if (request.POST.get('q3') == '1'):
                    f = open('뚜벅2인관광.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('뚜벅2인관광.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

                elif (request.POST.get('q3') == '2'):
                    f = open('뚜벅2인휴양.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('뚜벅2인휴양.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

            elif (request.POST.get('q2') == '3'):
                if (request.POST.get('q3') == '1'):
                    f = open('뚜벅3인관광.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('뚜벅3인관광.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

                elif (request.POST.get('q3') == '2'):
                    f = open('뚜벅3인휴양.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('뚜벅3인휴양.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

        elif (request.POST.get('q1') == '2'):
            if (request.POST.get('q2') == '1'):
                if (request.POST.get('q3') == '1'):
                    f = open('자차혼자관광.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('자차혼자관광.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)
                elif (request.POST.get('q3') == '2'):
                    f = open('자차혼자휴양.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('자차혼자휴양.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)
            elif (request.POST.get('q2') == '2'):
                if (request.POST.get('q3') == '1'):
                    f = open('자차2인관광.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('자차2인관광.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

                elif (request.POST.get('q3') == '2'):
                    f = open('자차2인휴양.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('자차2인휴양.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)
            elif (request.POST.get('q2') == '3'):
                if (request.POST.get('q3') == '1'):
                    f = open('자차3인관광.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('자차3인관광.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)

                elif (request.POST.get('q3') == '2'):
                    f = open('자차3인휴양.csv', 'a', newline='', encoding='utf-8')
                    wr = csv.writer(f)
                    wr.writerow([test.name, test.local, test.rating])
                    f.close()

                    warnings.filterwarnings('ignore')
                    data = pd.read_csv('자차3인휴양.csv',
                                       encoding="utf-8",
                                       sep=",",
                                       error_bad_lines=False)
    df = data[['id', '여행지', 'rating']]
    df = df.drop_duplicates(['id', '여행지'], keep="last")

    def recur_dictify(frame):
        if len(frame.columns) == 1:
            if frame.values.size == 1: return frame.values[0][0]
            return frame.values.squeeze()
        grouped = frame.groupby(frame.columns[0])
        d = {k: recur_dictify(g.iloc[:, 1:]) for k, g in grouped}
        return d

    df_to_dict = recur_dictify(df)

    name_list = []
    local_set = set()

    for user_key in df_to_dict:
        name_list.append(user_key)
        for local_key in df_to_dict[user_key]:
            local_set.add(local_key)

    local_list = list(local_set)

    rating_dic = {'id': [], '여행지': [], 'rating': []}

    for name_key in df_to_dict:
        for cos_key in df_to_dict[name_key]:
            a1 = name_list.index(name_key)
            a2 = local_list.index(cos_key)
            a3 = df_to_dict[name_key][cos_key]

            rating_dic['id'].append(a1)
            rating_dic['여행지'].append(a2)
            rating_dic['rating'].append(a3)

    df = pd.DataFrame(rating_dic)

    reader = surprise.Reader(rating_scale=(1, 5))
    data = surprise.Dataset.load_from_df(df[['id', '여행지', 'rating']], reader)

    trainset = data.build_full_trainset()
    option = {'name': 'pearson'}
    algo = surprise.KNNBasic(sim_options=option)

    algo.fit(trainset)

    index = name_list.index(test.name)
    result = algo.get_neighbors(index, k=3)

    def localtest():
        for r1 in result:
            max_rating = data.df[data.df["id"] == r1]["rating"].max()
            local_id = data.df[(data.df["rating"] == max_rating)
                               & (data.df["id"] == r1)]["여행지"].values
            for local_item in local_id:
                return (local_list[local_item])

    return render(request, 'recommend_result.html', {'localtest': localtest})