# algo = KNNBaseline(sim_options=sim_options) algo = KNNBaseline() algo.train(trainset) current_playlist = name_id_dic.keys()[39] print("歌单名称", current_playlist) # 取出近邻 # 映射名字到id playlist_id = name_id_dic[current_playlist] print("歌单id", playlist_id) # 取出来对应的内部user id => to_inner_uid playlist_inner_id = algo.trainset.to_inner_uid(playlist_id) print("内部id", playlist_inner_id) playlist_neighbors = algo.get_neighbors(playlist_inner_id, k=10) # 把歌曲id转成歌曲名字 # to_raw_uid映射回去 playlist_neighbors = (algo.trainset.to_raw_uid(inner_id) for inner_id in playlist_neighbors) playlist_neighbors = (id_name_dic[playlist_id] for playlist_id in playlist_neighbors) print() print("和歌单 《", current_playlist, "》 最接近的10个歌单为:\n") for playlist in playlist_neighbors: print(playlist, algo.trainset.to_inner_uid(name_id_dic[playlist])) # 开始对用户进行预测 # 重建歌曲id到歌曲名的映射字典
# 获取 电影名->电影id 和 电影id->电影名 的映射 rid_to_name, name_to_rid = read_item_names() # 查找Toy Story这部电影对应的rid(原始数据raw id) toy_story_raw_id = name_to_rid['Toy Story (1995)'] print('Toy Story的raw id:', toy_story_raw_id) # >>> u'1' # 查找Toy Story这部电影对应的iid(内部inner id) toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id) print('Toy Story的inner id:', toy_story_inner_id) # >>> 24 # 找到最近的10个邻居 toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10) print(toy_story_neighbors) # >>> 输出10个inner id # 从近邻的iid->电影名称 # 先由iid->raw id toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id) for inner_id in toy_story_neighbors) # 再由raw id->电影名称 toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors) print('The 10 nearest neighbors of Toy Story are:') for movie in toy_story_neighbors: print(movie) ''' >>>
return rid_to_name, name_to_rid # First, train the algortihm to compute the similarities between items trainset = df.build_full_trainset() sim_options = {'name': 'pearson_baseline', 'user_based': False} algo = KNNBaseline(sim_options=sim_options) algo.fit(trainset) # Read the mappings raw id <-> movie name rid_to_name, name_to_rid = read_item_names() # Retrieve inner id of the movie Toy Story #*********************Movie Recommended for Movie********** toy_story_raw_id = name_to_rid['Toy Story'] toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id) # Retrieve inner ids of the nearest neighbors of Toy Story. toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10) # Convert inner ids of the neighbors into names. toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id) for inner_id in toy_story_neighbors) toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors) print() print('The 10 nearest neighbors of Toy Story are:') for movie in toy_story_neighbors: print(movie)
import pandas as pd from surprise import SVD, Dataset, Reader from surprise.model_selection import cross_validate, train_test_split reader = Reader(rating_scale=(1, 5), line_format="user item rating") df_data = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating']) data = Dataset.load_from_df(df_data, reader) trainset, testset = train_test_split(data, 0.2) model = SVD(n_factors=30) model.fit(trainset) print(model.pu.shape) print(model.qi.shape) print(model.get_neighbors(10, 5)) # print(model.compute_similarities()) # n_factors=30 # pu=np.random.RandomState(0).normal(0.1,1,(610,30)) # qi=np.random.RandomState(0).normal(0.1,1,(9743,30)) # n_epochs=1000 # bu=np.random.RandomState(0).normal(0.1,1,(610,30)) # bi=np.random.RandomState(0).normal(0.1,1,(9743,30)) # # lr_qi=lr_pu=lr_bu=lr_bi=0.1 #learning rate # reg_qi=reg_pu=reg_bi=reg_bu=0.1 #正则化参数 # # #train 函数 # for current_epoch in range(n_epochs): # for u, i, r in trainset.all_ratings(): # # 计算(5)的第一项 # dot = 0 # <q_i, p_u> # for f in range(30):
# Use movielens-100K data = Dataset.load_builtin('ml-100k') param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6]} KNNBasic().trainset KNNBaseline().sim param_grid = {'k': [40, 100], 'min_k': [5, 10],} gs = GridSearchCV(KNNBasic, param_grid, measures=['rmse', 'mae'], cv=3) gs.fit(data) # best RMSE score print(gs.best_score['rmse']) # combination of parameters that gave the best RMSE score print(gs.best_params['rmse']) be = gs.best_estimator s = SVD() s.get_neighbors() from surprise.trainset import Trainset t= Trainset() s.get_neighbors() Dataset.load_from_df() print(be)