def results():
    names = ['userID', 'itemID', 'rating']
    df = pd.read_csv('~/.surprise_data/ratings.csv', names=names)

    names1 = ['itemID', 'Profession', 'City']
    df1 = pd.read_csv('~/.surprise_data/workers1.csv', names=names1)

    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

    trainset = data.build_full_trainset()

    sim_options = {'name': 'cosine', 'user_based': False}
    algo = KNNBasic(k=40, min_k=1, sim_options={})
    algo.fit(trainset)
    testset = trainset.build_anti_testset()

    predictions = algo.test(testset)
    top_n = get_top_n(predictions, n=10)
    myArray = []
    for uid, user_ratings in top_n.items():
        abcd = []
        #abcd.append(iid for (iid, _) in user_ratings)
        for w in user_ratings:
            abcd.append(w)
        myArray.append([uid, abcd])

    print(myArray)

    return render_template('secondpage.html', returned={'data': myArray})
    return ('results working')
Example #2
0
class ItemCF():
    def __init__(self):
        file_path = os.path.expanduser('user_item_rate.csv')
        reader = Reader(line_format='user item rating', sep=',')
        surprise_data = Dataset.load_from_file(file_path, reader=reader)
        all_trainset = surprise_data.build_full_trainset()

        # 训练模型:基于项目相似度
        self.item_algo = KNNBasic(k=10,
                                  min_k=3,
                                  sim_options={'user_based': False})
        # sim_options={'name': 'cosine','user_based': True} cosine/msd/pearson/pearson_baseline
        self.item_algo.fit(all_trainset)

    def get_similar_items(self, top_k, item_id):
        """
        相似项目
        Args:
            top_k(int): 相似项目数量
            item_id(str): 项目id

        Returns:
            list generator
        """
        item_inner_id = self.item_algo.trainset.to_inner_iid(item_id)
        item_neighbors = self.item_algo.get_neighbors(item_inner_id, k=top_k)
        item_neighbor_ids = (self.item_algo.trainset.to_raw_iid(inner_id)
                             for inner_id in item_neighbors)
        return item_neighbor_ids
Example #3
0
def train_model():
    histories = requests.get(
        'https://whispering-refuge-67560.herokuapp.com/api/histories')
    history_data = json.loads(histories.content.decode('utf-8'))
    data_train = pd.DataFrame.from_dict(history_data, orient='columns')
    data_train.drop(
        columns=['booking_id', 'createdAt', 'history_id', 'updatedAt'])
    data_train = data_train[['tid', 'gid', 'rating']]

    sim_options = {'name': 'cosine', 'user_based': False}

    global algo
    algo = KNNBasic(sim_options=sim_options)

    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(0, 5))

    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(data_train[['tid', 'gid', 'rating']], reader)

    # sample random trainset and testset
    # test set is made of 25% of the ratings.
    trainingSet = data.build_full_trainset()
    #trainset, testset = train_test_split(data, test_size=.25)

    # Train the algorithm on the trainset, and predict ratings for the testset
    algo.fit(trainingSet)
    return jsonify(status="training in progress")

    global all_guides
    all_guides = []

    get_all_guides()
Example #4
0
    def content(self):
        # content based
        surprise_data = self.prepare_Data()
        if surprise_data == []:
            print("No data provided")
            return

        sim_options = {
            'name': 'cosine',
            'user_based': False  # compute  similarities between items
        }
        algo = KNNBasic(sim_options=sim_options)
        trainset = surprise_data.build_full_trainset()
        algo.fit(trainset)
        testset = trainset.build_testset()
        predictions = algo.test(testset)
        recommendation = self.get_top_n(predictions)
        new_list = []
        k = 0
        for i, j in recommendation[self.user_id]:
            data_to_append = {}
            data_to_append.update({'id': k})
            data_to_append.update({'business id': i})
            new_list.append(data_to_append)
            k += 1
        recommend = {}
        recommend = {item['id']: item for item in new_list}
        return (recommend)
Example #5
0
def collaborative_filtering():
    history_list = History.objects.all()
    with open('recommend/dataset_cf.csv', 'w', encoding='utf-8', newline='') as csv_file:
        header = ['history_id', 'user_id', 'alco_name', 'data_joined', 'review']
        writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
        writer.writerow(header)
        for history in history_list:
            row = []
            row += [history.history_id,
                    history.user_id,
                    history.alco_name,
                    history.data_joined,
                    history.review]
            writer.writerow(row)

    alco = pandas.read_csv("recommend/alcohol_cf.csv", encoding='utf-8')
    alco = alco.set_index('alco_name')

    data = pandas.read_csv("recommend/dataset_cf.csv", encoding='utf-8').fillna(0)
    data = data.drop('history_id', axis=1)
    data = data.drop('data_joined', axis=1)
    alcohol_id_list = []
    for i in range(len(data.index)):
        alcohol_id_list.append(alco.at[data['alco_name'][i], 'alcohol_id'])

    data = data.drop('alco_name', axis=1)
    data['alcohol_id'] = alcohol_id_list
    data = data.loc[:, ["user_id", "alcohol_id", "review"]]
    data.to_csv("recommend/dataset_cf.score", sep=' ', header=None, index=False, encoding='utf-8')

    reader = Reader(line_format='user item rating', sep=' ')
    dataset = Dataset.load_from_file("recommend/dataset_cf.score", reader=reader)
    trainset = dataset.build_full_trainset()
    sim_options = {
        'name': 'pearson',  # 類似度を計算する方法を指定( cosine,msd,pearson,pearson_baseline )
        'user_based': True  # False にするとアイテムベースに
    }
    algo = KNNBasic(k=5, min_k=1, sim_options=sim_options)
    algo.fit(trainset)
    # algo = SVD()
    # algo.train(trainset)
    # print(algo.sim)

    alcohol_num = Alcohol.objects.latest('alcohol_id').alcohol_id
    user_num = History.objects.latest('user_id').user_id

    with open('recommend/answer_cf.csv', 'w', encoding='utf-8', newline='') as csv_file:
        header = ['user_id', 'alcohol_id', 'predicted_value']
        writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
        writer.writerow(header)
        for j in range(1, user_num + 1):
            user_id = j
            for i in range(1, alcohol_num + 1):
                item_id = i
                pred = algo.predict(uid=str(user_id), iid=str(item_id))
                row = []
                row += [pred.uid,
                        pred.iid,
                        pred.est]
                writer.writerow(row)
Example #6
0
def collaborative_filter(id, new_words):
    ratings_dict = calc_collaborative_param(new_words, id)

    df = pd.DataFrame(ratings_dict)

    # A reader is still needed but only the rating_scale param is required.
    reader = Reader(rating_scale=(0.0, 5.0))
    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
    # define a cross-validation iterator
    kf = KFold(n_splits=3)

    algo = KNNBasic()

    for trainset, testset in kf.split(data):
        # train and test algorithm.
        algo.fit(trainset)
        kf_predictions = algo.test(testset)
        # Compute and print Root Mean Squared Error
        accuracy.rmse(kf_predictions, verbose=True)

    trainset = data.build_full_trainset()

    new_data = trainset.build_anti_testset()
    predictions = algo.test(new_data)

    top_n = get_top_n(predictions, n=3)

    with open('top_n.json', 'w') as fp:
        dump(top_n, fp, indent=4)

    return top_n
Example #7
0
def run_KNN(x_train, x_test, k):
    reader = Reader(rating_scale=(1, 5))
    data_train_df = Dataset.load_from_df(
        x_train[['userId', 'movieId', 'rating']], reader)
    data_test_df = Dataset.load_from_df(
        x_test[['userId', 'movieId', 'rating']], reader)
    data_train = data_train_df.build_full_trainset()
    data_test = data_test_df.build_full_trainset()
    data_testset = data_test.build_testset()
    algo = KNNBasic()
    algo.fit(data_train)
    pr = algo.test(data_testset)
    rec = format_baselines(pr)
    seen = format_baselines_apk(pr, x_test)
    predicted, actual = format_baselines_third(pr, x_test)
    print(predicted)
    print(actual)
    print(f'Alternative Precision {recommender_precision(predicted, actual)}')
    print(f'Alternative Recall {recommender_recall(predicted, actual)}')
    print(f'APK: {yallah(seen, k)}')
    precisions, recalls = precision_recall_at_k(rec, k)
    print(
        f'|KNN : Precision| = {sum(prec for prec in precisions.values()) / len(precisions)}'
    )
    print(
        f'|KNN : Recall| = {sum(rec for rec in recalls.values()) / len(recalls)}'
    )
Example #8
0
def user_based_rec_loader(data, ml, userID, no_recs):
    trainSet = data.build_full_trainset()
    sim_options = {'name': 'cosine',
               'user_based': True
               }
    model = KNNBasic(sim_options=sim_options)
    model.fit(trainSet)
    similarity_matrix = model.compute_similarities()
    userIDInnerID = trainSet.to_inner_uid(userID)
    similiarty_row = similarity_matrix[userIDInnerID]

    # removing the testUser from the similiarty_row
    similarUsers = []
    for innerID, score in enumerate(similiarty_row):
        if (innerID != userIDInnerID):
            similarUsers.append( (innerID, score))

    # find the k users largest similarities
    k = 15
    kNeighbours = heapq.nlargest(k, similarUsers, key=lambda t: t[1])

#     or can tune for ratings > threshold
#     kNeighbours = []
#     for rating in similarUsers:
#        if rating[1] > 4.0:
#            kNeighbours.append(rating)

    results = get_recommendations(ml, no_recs, trainSet, similarity_matrix, kNeighbours, userIDInnerID, rec_type = 'user')
    return results
Example #9
0
def item_based_rec_loader(data, ml, userID, no_recs):

    trainSet = data.build_full_trainset()
    # note that user_base: False here, thus we are telling KNN that
    # we want to generate an item-item based similarity matrix
    sim_options = {'name': 'cosine',
                   'user_based': False
                   }
    model = KNNBasic(sim_options=sim_options)
    model.fit(trainSet)
    similarity_matrix = model.compute_similarities()
    userIDInnerID = trainSet.to_inner_uid(userID)

    # Get the top K items we rated
    k = 15
    userIDRatings = trainSet.ur[userIDInnerID]
    kNeighbours = heapq.nlargest(k, userIDRatings, key=lambda t: t[1])

    # kNeighbours = []
    # userIDRatings = trainSet.ur[userIDInnerID]
    # for rating in userIDRatings:
    #    if rating[1] > 4.0:
    #        kNeighbours.append(rating)

    results = get_recommendations(ml, no_recs, trainSet, similarity_matrix, kNeighbours, userIDInnerID, rec_type = 'item')
    return results
def use_cosine_similarity():
    start = time.time()
    performance = []

    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()

    print('Using cosine similarity')
    sim_options = {
        'name': 'cosine',
        'user_based': False  # compute  similarities between items
    }
    algo_cosine = KNNBasic(sim_options=sim_options)
    algo_cosine.fit(trainset)

    testset = trainset.build_anti_testset()
    predictions_KNN = algo_cosine.test(testset)

    accuracy_rmse = accuracy.rmse(predictions_KNN)
    accuracy_mae = accuracy.mae(predictions_KNN)
    performance.append(accuracy_rmse)
    performance.append(accuracy_mae)

    end = time.time()
    performance.append(end - start)

    return performance
Example #11
0
def KNN_Tester(trainset, testset, algo):
    param_grid = {
        'k': [50, 100],
        'sim_options': {
            'name': ['msd', 'cosine', 'pearson']
        }
    }

    gs = GridSearchCV(algo, param_grid, measures=['rmse'], cv=5)
    gs.fit(data)
    params = gs.best_params['rmse']
    algo = KNNBasic(k=params['k'], sim_options=params['sim_options'])
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=4)
    avg_precision = sum(prec for prec in precisions.values()) / len(precisions)
    avg_recall = sum(rec for rec in recalls.values()) / len(recalls)
    metrics = {
        'rmse': rmse,
        'avg_precision': avg_precision,
        'avg_recall': avg_recall,
        'best_parameters': params
    }
    return metrics
Example #12
0
class KNN_Basic(BaseSurpriseSTLEstimator):
    """
    Args:
        :attr:`k` (int):
            number of neighbors
        :attr:`sim_options` (optional):
            option from surprise for a similarity metric
    
    """
    def __init__(self, k, name='KNN_Basic', sim_options=None):
        super().__init__(name, 'non_feature_based')
        self.k = k
        if sim_options is not None:
            self.model = KNNBasic(k=self.k,
                                  verbose=False,
                                  sim_options=sim_options)
        else:
            self.model = KNNBasic(k=self.k, verbose=False)

    def _fit(self, x):
        self.model.fit(x)

    def _predict(self, x):
        return self.model.test(x)

    def get_hyper_params(self):
        hparams = {'k': {'type': 'integer', 'values': [2, 13]}}
        return hparams

    def set_hyper_params(self, **kwargs):
        self.k = kwargs['k']

    def similarity_matrix(self):
        return self.model.compute_similarities()
Example #13
0
def train_item_rec_sys():
    """
    Trains KNNBasic Model.

    Yields
    ------
        similar_items_algo.pkl
    """
    item_rec_sys_data = pd.read_csv("datasets/item_rec_sys_data.csv")

    # Creating Data object.
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df=item_rec_sys_data, reader=reader)
    trainset = data.build_full_trainset()

    # Training Algorithm.
    sim_options = {"name": "cosine", "user_based": False}
    algo = KNNBasic(k=10, sim_options=sim_options, verbose=False)
    algo.fit(trainset)

    # Extract inner id mappings.
    _compute_inner_item_ids(item_rec_sys_data, algo=algo, trainset=trainset)

    # Saving Algorithm.
    file_path = Path.cwd() / "models/similar_items_algo.pkl"
    dump.dump(file_path, algo=algo)
    return
        def cal_KNNBasic(trainset, df):
            # KNNBasic

            sim_options = {'name': 'cosine', 'user-based': True}
            algo_knnb = KNNBasic(k=40, min_k=1, sim_options=sim_options)
            algo_knnb.fit(trainset)
            users = []
            items = []
            real = []
            estimate = []
            for i in range(len(df)):
                uid = df[i:i + 1].user.values[0]
                users.append(uid)
                iid = df[i:i + 1].store.values[0]
                items.append(iid)
                r_ui = df[i:i + 1].stars.values[0]
                real.append(r_ui)
                pred = algo.predict(uid, iid, r_ui, verbose=True)
                estimate.append(pred)
            print("end")
            # knn basic
            df3 = pd.DataFrame(columns=['user', 'item', 'r_ui', 'est'])
            df3['user'] = users
            df3['item'] = items
            df3['r_ui'] = real
            df3['est'] = estimate
            #df3.head()
            df3['est'] = df3['est'].apply(lambda x: x[-2])
            df3['err'] = abs(df3.est - df3.r_ui)
            df3.to_csv(save_file2)
Example #15
0
def predict_ratings(data):
    """
	可以简单地将算法适合整个数据集,
	而不是运行交叉验证。
	这可以通过使用build_full_trainset()将创建trainset对象的方法来完成
	可以通过直接调用该predict()方法来预测收视率
	:return:
	"""
    trainset = data.build_full_trainset()

    svg = SVD()
    svg.fit(trainset)
    testset = trainset.build_anti_testset()
    predictions = svg.test(testset)

    algo = KNNBasic()
    algo.fit(trainset)

    #收视率预测:假设对用户196和项目302感兴趣(确保它们在trainset中!),并且知道真实的评分rui=4
    uid = str(196)
    iid = str(302)

    # algo.predict(uid,iid,r_ui=4,verbose=True)

    return predictions
Example #16
0
def create_KNNmodel(trainset,
                    k=50,
                    min_k=5,
                    user_based=True,
                    random_state=12345):
    """Train the KNN model given a training set and model parameters
    
    Arguments:
        trainset {surprise.trainset.Trainset} -- training set, output from build_trainset
        k_tuned {int} -- number of neighbors, parameter for algorithm (default: {50})
        min_k_tuned{int} -- minimum neighbors, parameter for algorithm (default: {5})
        user_based_tuned {bool} -- user based or not, parameter for algorithm (default: {True})
        seed {int} -- random seed (default: {12345})

    Returns:
        model {surprise.prediction_algorithms.knns.KNNBasic} -- trained model object
    """
    model = KNNBasic(k=k,
                     min_k=min_k,
                     user_based=user_based,
                     random_state=random_state)
    #created KNNmodel
    model.fit(trainset)

    return model
    def train(self, df, model_path=''):
        '''
        协同过滤模型训练
        :param df: 格式包含该三列 --》 userid,iteamid,rating
        :param k: 聚类得类别数量
        :param min_k: 最小聚类数量
        :param sim_name:相似度量指标,默认余弦相似度
        :param user_based:协同过滤基准,默认 itemBase 的协同过滤
        :param model_path:模型持久化地址,默认为空,不执行持久化
        :return: 训练好的模型
        '''
        print('begin to train')
        # 数据类型转换为 surprise 需要的格式
        data = Dataset.load_from_df(df, self.reader)
        trainset = data.build_full_trainset()

        # itemBase 的协同过滤KNN模型的训练和持久化
        algo_knnbasic = KNNBasic(k=self.k,
                                 min_k=self.min_k,
                                 sim_options={
                                     'name': self.sim_name,
                                     'user_based': self.user_based
                                 },
                                 verbose=True)
        algo_knnbasic.fit(trainset)
        if model_path:
            surprise.dump.dump(model_path, algo=algo_knnbasic, verbose=1)

        return algo_knnbasic
Example #18
0
    def Basic_CF(self):
        u_id = []
        I_id = []
        r_ui_ = np.array([])
        _est = np.array([])

        sim_options = {'name': 'cosine', 'user_based': True}
        algo = KNNBasic(k=40, min_k=1, sim_options=sim_options)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)
                u_id.append(int(pred.uid))
                I_id.append(int(pred.iid))
                r_ui_ = np.append(r_ui_, pred.r_ui)
                _est = np.append(_est, pred.est)

        self.df_est = pd.DataFrame({
            'uid': u_id,
            'Iid': I_id,
            'r_ui': r_ui_,
            'est': _est
        })
        self.arr = self.df_est['uid'].unique()

        self.CF_ndcg_ = self.Calculate_NDCG()
def use_pearson_baseline():
    start = time.time()
    performance = []

    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()

    print('Using Pearson baseline')
    sim_options = {
        'name': 'pearson_baseline',
        'shrinkage': 0  # no shrinkage
    }
    algo_pearson = KNNBasic(sim_options=sim_options)
    algo_pearson.fit(trainset)

    testset = trainset.build_anti_testset()
    predictions_KNN = algo_pearson.test(testset)

    accuracy_rmse = accuracy.rmse(predictions_KNN)
    accuracy_mae = accuracy.mae(predictions_KNN)
    performance.append(accuracy_rmse)
    performance.append(accuracy_mae)

    end = time.time()
    performance.append(end - start)

    return performance
def rodar_modelo(data, teste_tamanho, sim_opcoes, k):
    treina, testa = train_test_split(data, teste_tamanho)
    knn = KNNBasic(k=k, sim_options=sim_opcoes)
    knn.fit(treina)
    knn_predicoes = knn.test(testa)
    accuracy.rmse(knn_predicoes)
    return knn
Example #21
0
def detail(request, post_id):
    # 예상평점 알고리즘 넣기
    file_path = os.path.expanduser('stars.csv')
    reader = Reader(line_format='user item rating', sep=',')
    data = Dataset.load_from_file(file_path, reader=reader)
    trainset = data.build_full_trainset()
    algo = KNNBasic()
    algo.fit(trainset)
    uid = str(request.user.is_authenticated)  # 유저아이디 적어야함
    iid = str(post_id)  # raw item id (as in the ratings file). They are **strings**!

    pred = algo.predict(uid, iid, r_ui=4, verbose=True)   # 예상평점

    group = Matzip_list.objects.get(id=post_id)
    if not request.user.is_anonymous:
        if request.user.star_set.all().filter(matzip_id=post_id).first():
            my_rate = request.user.star_set.all().filter(matzip_id=post_id).first().rate
            is_rated = 1
        else:
            my_rate = pred
            is_rated = 0
    else:
        my_rate = "로그인을 해주세요"
        is_rated = 2
    images = re.sub("]|\[|'", "", group.images_url_preprocess).strip().split(',')


    context = {
        'group': group,
        'images': images,
        'my_rate': my_rate,
        'is_rated': is_rated,
        'pred': pred,
    }
    return render(request, 'posts/detail.html', context)
def simpleItemCFGive(id):
    testSubject = str(id)
    k = 10

    ml = MovieLens()
    data = ml.loadMovieLensLatestSmall()

    trainSet = data.build_full_trainset()

    sim_options = {'name': 'cosine', 'user_based': False}

    model = KNNBasic(sim_options=sim_options)
    model.fit(trainSet)
    simsMatrix = model.compute_similarities()

    testUserInnerID = trainSet.to_inner_uid(testSubject)

    # Get the top K items we rated
    testUserRatings = trainSet.ur[testUserInnerID]
    kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])

    # Get similar items to stuff we liked (weighted by rating)
    candidates = defaultdict(float)
    for itemID, rating in kNeighbors:
        similarityRow = simsMatrix[itemID]
        for innerID, score in enumerate(similarityRow):
            candidates[innerID] += score * (rating / 5.0)

    # Build a dictionary of stuff the user has already seen
    watched = {}
    for itemID, rating in trainSet.ur[testUserInnerID]:
        watched[itemID] = 1

    # Get top-rated items from similar users:
    s = "\n" + str(id)
    pos = 0
    for itemID, ratingSum in sorted(candidates.items(),
                                    key=itemgetter(1),
                                    reverse=True):
        if not itemID in watched:
            movieID = trainSet.to_raw_iid(itemID)
            s += "," + ml.getMovieName(int(movieID))
            pos += 1
            if (pos > 10):
                break
    file = open("E:\\Neeraj\\SimpleItemCFBase.txt", "r")
    alld = file.readlines()
    file.close()
    file1 = open("E:\\Neeraj\\SimpleItemCFBase.txt", "w")
    for r1 in alld:
        print(r1)
        u = r1.find(",")
        if (r1[0:u] == str(id)):
            pass
        else:
            file1.write(r1)
    file1.write(s)
    file1.close()
    print("\nDone")
def get_accuracy(df,
                 genre,
                 neighbors=30,
                 min_neighbors=5,
                 seed=12345,
                 kfolds=5,
                 k=5,
                 threshold=4):
    """ Gets the precision and accuracy of the model for each genre using cross validation
        
        Args:
            df (pandas.DataFrame): the dataset of actual ratings
            genre (str): the genre for the model
            neighbors (int): the number of neighbors to take into account when training the model
                             Default is 30.
            min_neighbors (int): the number of neighbors a user must have in order to get a prediction.
                                Default is 5.
            seed (int): setting the random state. Default is 12345.
            kfolds (int): the number of folds for cross validation. Default is 5.
            k (int): number of recommendations for each user. default is 5.
            threshold (int): the cutoff rating at which an item will be considered 'enjoyed.'
        Returns:
            prec (int): The average of precision across the kfolds cross validation
            rec (int): The average of recall across the kfolds cross validation
 	"""

    data = df[df['genre'] == genre]
    data = data[['user_id', 'book_id', 'rating']]
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(data[['user_id', 'book_id', 'rating']], reader)
    algo_KNNbasic = KNNBasic(k=neighbors,
                             min_k=min_neighbors,
                             random_state=seed)

    kf = KFold(n_splits=kfolds, random_state=seed)
    prec_list = []
    recalls_list = []
    for trainset, testset in kf.split(data):
        algo_KNNbasic.fit(trainset)
        predictions = algo_KNNbasic.test(testset)
        precisions, recalls = precision_recall_at_k(predictions,
                                                    k=k,
                                                    threshold=threshold)

        # Precision and recall can then be averaged over all users
        logger.info("Precision:")
        logger.info(
            sum(prec for prec in precisions.values()) / len(precisions))
        precision = (sum(prec
                         for prec in precisions.values()) / len(precisions))
        logger.info("Recall")
        logger.info(sum(rec for rec in recalls.values()) / len(recalls))
        recall = (sum(rec for rec in recalls.values()) / len(recalls))
        prec_list.append(precision)
        recalls_list.append(recall)

    prec = (sum(prec_list) / len(prec_list))
    rec = (sum(recalls_list) / len(recalls_list))
    return prec, rec
class KNN_ensemble:
    def __init__(self, mode=0):
        # self.movie = Movie_KNN_recommender()
        self.user = Personal_KNN_recommender()
        self.index = pd.read_csv('../data/personal/movies.csv')
        self.reader = Reader()
        self.ratings = pd.read_csv('../data/personal/ratings.csv')
        data = Dataset.load_from_df(
            self.ratings[['userId', 'movieId', 'rating']], self.reader)
        trainset = data.build_full_trainset()
        sim_options = {'name': 'pearson_baseline', 'user_based': False}
        if mode == 0:
            self.algo = KNNBaseline(sim_options=sim_options)
        elif mode == 1:
            self.algo = KNNWithMeans(sim_options=sim_options)
        elif mode == 2:
            self.algo = KNNBasic(sim_options=sim_options)
        else:
            exit(0)

        self.algo.fit(trainset)
        self.sim = self.algo.compute_similarities()

    def cal_similarity(self, movieID, waitingID):
        movie_inner_id = self.algo.trainset.to_inner_iid(movieID)
        waiting_inner_id = self.algo.trainset.to_inner_iid(waitingID)
        return self.sim[movie_inner_id, waiting_inner_id]

    def showSeenMovies(self, usrID):
        print("\n\nThe user has seen movies below: ")
        movies = []
        for i in range(len(self.ratings['userId'])):
            if self.ratings['userId'][i] == usrID:
                movies.append(self.index[self.index.movieId ==
                                         self.ratings['movieId'][i]]['title'])
        for i in movies:
            print(i.values[0])

    def showInputMovie(self, movieID):
        print("\n\nThe user's input movie is: ")
        print(self.index[self.index.movieId == movieID]['title'])
        print('\n\n')

    def recommend(self, usrID, movieID, num=10):
        self.showSeenMovies(usrID)
        self.showInputMovie(movieID)
        _, first_ids = self.user.recommend(usrID, 50)

        similarity = {}
        for i in first_ids:
            similarity[i] = self.cal_similarity(movieID, i)
        result = sorted(similarity.items(), key=lambda x: x[1],
                        reverse=True)  # 对相似度进行排序
        result = result[:num]
        movie = []
        for i in result:
            movie.append(self.index[self.index.movieId == i[0]]['title'])
        return movie
def item_based_rec_loader(data, testUser, no_recs):

    trainSet = data.build_full_trainset()

    # note that user_base: False here, thus we are telling KNN that
    # we want to generate an item-item based similarity matrix
    sim_options = {'name': 'cosine', 'user_based': False}

    model = KNNBasic(sim_options=sim_options)
    model.fit(trainSet)

    similarity_matrix = model.compute_similarities()

    testUserInnerID = trainSet.to_inner_uid(testUser)
    # Get the top K items we rated
    #     k = 10
    #     testUserRatings = trainSet.ur[testUserInnerID]
    #     kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])

    #or look for items with rating > threshold
    kNeighbors = []
    testUserRatings = trainSet.ur[testUserInnerID]
    for rating in testUserRatings:
        if rating[1] > 4.0:
            kNeighbors.append(rating)

    # Get similar items to stuff we liked (weighted by rating)
    candidates = defaultdict(float)
    for itemID, rating in kNeighbors:
        similarity_row = similarity_matrix[itemID]
        for innerID, score in enumerate(similarity_row):
            candidates[innerID] += score * (rating / 5.0)

    # Build a dictionary of stuff the user has already seen
    excluded = {}
    for itemID, rating in trainSet.ur[testUserInnerID]:
        excluded[itemID] = 1

    # Build a dictionary for results
    results = {'book': [], 'rating_sum': []}

    # Get top-rated items from similar users:
    print('\n')
    pos = 0
    for itemID, ratingSum in sorted(candidates.items(),
                                    key=itemgetter(1),
                                    reverse=True):
        if not itemID in excluded:
            bookID = trainSet.to_raw_iid(itemID)
            #             print(ml.getItemName(int(bookID)), ratingSum)
            results['book'].append(ml.getItemName(int(bookID)))
            results['rating_sum'].append(ratingSum)
            pos += 1
            if (pos > no_recs - 1):
                break
    return pd.DataFrame(results)
Example #26
0
def main():
    data = Dataset.load_builtin('ml-100k')
    trainset, testset = train_test_split(data, test_size=.25)
    algo = KNNBasic()
    # Train the algorithm on the trainset, and predict ratings for the testset
    algo.fit(trainset)
    predictions = algo.test(testset)
    # Then compute RMSE
    score = accuracy.rmse(predictions)
    print('rmse: ', score)
Example #27
0
def f_rs_cr_sim_matrix(train_set, xlsx_file, user_base, mtx_measure, df_idx):  
    sim_opt = {'name': mtx_measure, 'user_based': user_base }    
    model = KNNBasic(sim_options=sim_opt,  verbose = False)
    model.fit(train_set)
    simsMatrix = model.compute_similarities()
    Print ("Complete Matrix -",  mtx_measure) 
    df = pd.DataFrame(simsMatrix)
    df.columns = df_idx; df_index = df_idx
    df.to_excel(xlsx_file)
    return df
Example #28
0
def train_model():
    file_path = os.path.expanduser('user_item_rate.csv')
    reader = Reader(line_format='user item rating', sep=',')
    surprise_data = Dataset.load_from_file(file_path, reader=reader)

    all_trainset = surprise_data.build_full_trainset()
    algo = KNNBasic(
        k=40, min_k=3, sim_options={'user_based': True}
    )  # sim_options={'name': 'cosine','user_based': True} cosine/msd/pearson/pearson_baseline
    algo.fit(all_trainset)
    return algo
    def fit(self):

        self.dl = DataLoader()
        data = self.dl.load_rating_matrix()

        self.train_set = data.build_full_trainset()

        sim_options = {'name': 'cosine', 'user_based': True}
        knn_basic = KNNBasic(sim_options=sim_options)
        knn_basic.fit(self.train_set)

        self.sim_matrix = knn_basic.compute_similarities()
Example #30
0
def run_collaborative_filtering():
    global top_recommendations
    global knn
    data = Dataset.load_builtin("ml-100k")
    training_set = data.build_full_trainset()
    sim_options = {'name': 'pearson_baseline', 'user_based': True}
    knn = KNNBasic(sim_options=sim_options)
    knn.fit(training_set)
    test_set = training_set.build_anti_testset()
    predictions = knn.test(test_set)
    top_recommendations = get_top_recommendations(predictions)
    return 'OK'
Example #31
0
def test_nearest_neighbors():
    """Ensure the nearest neighbors are different when using user-user
    similarity vs item-item."""

    reader = Reader(line_format='user item rating', sep=' ', skip_lines=3)

    data_file = os.path.dirname(os.path.realpath(__file__)) + '/custom_train'
    data = Dataset.load_from_file(data_file, reader, rating_scale=(1, 5))
    trainset = data.build_full_trainset()

    algo_ub = KNNBasic(sim_options={'user_based': True})
    algo_ub.fit(trainset)
    algo_ib = KNNBasic(sim_options={'user_based': False})
    algo_ib.fit(trainset)
    assert algo_ub.get_neighbors(0, k=10) != algo_ib.get_neighbors(0, k=10)