K = 10 data = Dataset.load_builtin('ml-100k') trainset = data.build_full_trainset() trash, testset = train_test_split(data, test_size=.2) # sim_options = {'name': 'pearson_baseline', 'user_based': False} sim_options = {'user_based': False} algo = KNNBaseline(sim_options=sim_options) # algo = KNNBaseline() D = 5 - 1 s = (trainset.n_users + 1, trainset.n_items + 1) W = np.ones(s) algo.weightUpdate(W) predictions = algo.fit(trainset).test(testset) # predictions = algo.fit(trainset) PredictM = np.zeros(s) for it in predictions: PredictM[int(it[0]), int(it[1])] = it[3] print(PredictM) PM = pd.DataFrame(PredictM) PM.to_csv("PredictionMatrix.csv") # print (trainset.all_items()) NeighborM = np.zeros([trainset.n_items, K]) for i, item in enumerate(trainset.all_items()): NeighborM[i] = algo.get_neighbors(item, k=K) NM = pd.DataFrame(NeighborM)
# PM = pd.DataFrame(testset) # PM.to_csv("TestSet.csv") # # # Use the famous SVD algorithm. sim_options = {'name': 'pearson_baseline', 'user_based': False} algo = KNNBaseline(sim_options=sim_options) s = (sizefinder.n_users + 1, sizefinder.n_items + 1) # trainM = np.zeros(s) # for uid,iid,rating in sizefinder.all_ratings(): # trainM[uid][iid] = rating # PM = pd.DataFrame(trainM) # PM.to_csv("TrainSet.csv") # weight = np.ones(s) algo.weightUpdate(weight) # algo.fit(trainset) # est,details,k_neighbor = [algo.estimate(uid,iid) for (uid, iid, _) in testset[1]] # NeighborM = (trainset.n_users+1)*[(trainset.n_items+1)*[None]] # for uid,iid,_ in trainset.all_ratings(): # _,_,K_neighbor = algo.estimate(uid,iid) # NeighborM[uid][iid] = np.array(K_neighbor)[:,0][:10] # uid = trainset.to_inner_uid(testset[1][0]) # iid = trainset.to_inner_iid(testset[1][1]) # print(len(trainset.ur[uid])) # print(NeighborM[uid][iid]) # est = algo.estimate(uid,iid) # print(est,isinstance(est, tuple))