Beispiel #1
0
# algo = KNNBaseline(sim_options=sim_options)
algo = KNNBaseline()
algo.train(trainset)

current_playlist = name_id_dic.keys()[39]
print("歌单名称", current_playlist)

# 取出近邻
# 映射名字到id
playlist_id = name_id_dic[current_playlist]
print("歌单id", playlist_id)
# 取出来对应的内部user id => to_inner_uid
playlist_inner_id = algo.trainset.to_inner_uid(playlist_id)
print("内部id", playlist_inner_id)

playlist_neighbors = algo.get_neighbors(playlist_inner_id, k=10)

# 把歌曲id转成歌曲名字
# to_raw_uid映射回去
playlist_neighbors = (algo.trainset.to_raw_uid(inner_id)
                      for inner_id in playlist_neighbors)
playlist_neighbors = (id_name_dic[playlist_id]
                      for playlist_id in playlist_neighbors)

print()
print("和歌单 《", current_playlist, "》 最接近的10个歌单为:\n")
for playlist in playlist_neighbors:
    print(playlist, algo.trainset.to_inner_uid(name_id_dic[playlist]))

# 开始对用户进行预测
# 重建歌曲id到歌曲名的映射字典
# 获取 电影名->电影id 和 电影id->电影名 的映射
rid_to_name, name_to_rid = read_item_names()

# 查找Toy Story这部电影对应的rid(原始数据raw id)
toy_story_raw_id = name_to_rid['Toy Story (1995)']
print('Toy Story的raw id:', toy_story_raw_id)
# >>> u'1'

# 查找Toy Story这部电影对应的iid(内部inner id)
toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id)
print('Toy Story的inner id:', toy_story_inner_id)
# >>> 24

# 找到最近的10个邻居
toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10)
print(toy_story_neighbors)
# >>> 输出10个inner id

# 从近邻的iid->电影名称
# 先由iid->raw id
toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id)
                       for inner_id in toy_story_neighbors)
# 再由raw id->电影名称
toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors)

print('The 10 nearest neighbors of Toy Story are:')
for movie in toy_story_neighbors:
    print(movie)
'''
    >>>
Beispiel #3
0
    return rid_to_name, name_to_rid


# First, train the algortihm to compute the similarities between items
trainset = df.build_full_trainset()
sim_options = {'name': 'pearson_baseline', 'user_based': False}
algo = KNNBaseline(sim_options=sim_options)
algo.fit(trainset)

# Read the mappings raw id <-> movie name
rid_to_name, name_to_rid = read_item_names()

# Retrieve inner id of the movie Toy Story
#*********************Movie Recommended for Movie**********
toy_story_raw_id = name_to_rid['Toy Story']
toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id)

# Retrieve inner ids of the nearest neighbors of Toy Story.
toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10)

# Convert inner ids of the neighbors into names.
toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id)
                       for inner_id in toy_story_neighbors)
toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors)

print()
print('The 10 nearest neighbors of Toy Story are:')
for movie in toy_story_neighbors:
    print(movie)
Beispiel #4
0
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split

reader = Reader(rating_scale=(1, 5), line_format="user item rating")
df_data = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating'])
data = Dataset.load_from_df(df_data, reader)
trainset, testset = train_test_split(data, 0.2)
model = SVD(n_factors=30)
model.fit(trainset)
print(model.pu.shape)
print(model.qi.shape)
print(model.get_neighbors(10, 5))
# print(model.compute_similarities())

# n_factors=30
# pu=np.random.RandomState(0).normal(0.1,1,(610,30))
# qi=np.random.RandomState(0).normal(0.1,1,(9743,30))
# n_epochs=1000
# bu=np.random.RandomState(0).normal(0.1,1,(610,30))
# bi=np.random.RandomState(0).normal(0.1,1,(9743,30))
#
# lr_qi=lr_pu=lr_bu=lr_bi=0.1 #learning rate
# reg_qi=reg_pu=reg_bi=reg_bu=0.1 #正则化参数
#
# #train 函数
# for current_epoch in range(n_epochs):
#     for u, i, r in trainset.all_ratings():
#         # 计算(5)的第一项
#         dot = 0  # <q_i, p_u>
#         for f in range(30):
Beispiel #5
0
# Use movielens-100K
data = Dataset.load_builtin('ml-100k')

param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}

KNNBasic().trainset

KNNBaseline().sim

param_grid = {'k': [40, 100], 'min_k': [5, 10],}

gs = GridSearchCV(KNNBasic, param_grid, measures=['rmse', 'mae'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

be = gs.best_estimator
s = SVD()
s.get_neighbors()
from surprise.trainset import Trainset
t= Trainset()
s.get_neighbors()
Dataset.load_from_df()
print(be)