def Evaluate(self, evaluationData, doTopN, n=10, verbose=True): metrics = {} # Compute accuracy if (verbose): print("Evaluating accuracy...") self.algorithm.fit(evaluationData.GetTrainSet()) predictions = self.algorithm.test(evaluationData.GetTestSet()) metrics["RMSE"] = RecommenderMetrics.RMSE(predictions) metrics["MAE"] = RecommenderMetrics.MAE(predictions) if (doTopN): # Evaluate top-10 with Leave One Out testing if (verbose): print("Evaluating top-N with leave-one-out...") self.algorithm.fit(evaluationData.GetLOOCVTrainSet()) leftOutPredictions = self.algorithm.test( evaluationData.GetLOOCVTestSet()) # Build predictions for all ratings not in the training set allPredictions = self.algorithm.test( evaluationData.GetLOOCVAntiTestSet()) # Compute top 10 recs for each user topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) if (verbose): print("Computing hit-rate and rank metrics...") # See how often we recommended a movie the user actually rated metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions) # See how often we recommended a movie the user actually liked metrics["cHR"] = RecommenderMetrics.CumulativeHitRate( topNPredicted, leftOutPredictions) # Compute ARHR metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank( topNPredicted, leftOutPredictions) #Evaluate properties of recommendations on full training set if (verbose): print("Computing recommendations with full data set...") self.algorithm.fit(evaluationData.GetFullTrainSet()) allPredictions = self.algorithm.test( evaluationData.GetFullAntiTestSet()) topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) if (verbose): print("Analyzing coverage, diversity, and novelty...") # Print user coverage with a minimum predicted rating of 4.0: metrics["Coverage"] = RecommenderMetrics.UserCoverage( topNPredicted, evaluationData.GetFullTrainSet().n_users, ratingThreshold=4.0) # Measure diversity of recommendations: metrics["Diversity"] = RecommenderMetrics.Diversity( topNPredicted, evaluationData.GetSimilarities()) # Measure novelty (average popularity rank of recommendations): metrics["Novelty"] = RecommenderMetrics.Novelty( topNPredicted, evaluationData.GetPopularityRankings()) if (verbose): print("Analysis complete.") return metrics
def Evaluate(self, evaluationData, doTopN, n=10, verbose=True): metrics = {} # Compute accuracy if (verbose): print("Evaluating accuracy...") self.algorithm.fit(evaluationData.GetTrainSet()) predictions = self.algorithm.test(evaluationData.GetTestSet()) metrics["RMSE"] = RecommenderMetrics.RMSE(predictions) metrics["MAE"] = RecommenderMetrics.MAE(predictions) if (doTopN): # Evaluate top-10 with Leave One Out testing if (verbose): print("leave-one-out法でtop-Nを計算...") self.algorithm.fit(evaluationData.GetLOOCVTrainSet()) leftOutPredictions = self.algorithm.test( evaluationData.GetLOOCVTestSet()) # 訓練データではない方で allPredictions = self.algorithm.test( evaluationData.GetLOOCVAntiTestSet()) # 各ユーザーに10個の表示を行う topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) if (verbose): print("Computing hit-rate and rank metrics...") # HitRate: どのくらいの頻度で、ユーザーが評価しうているかの指標 metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions) # cHitRate: ユーザーが実際にlikedしたかを確認する metrics["cHR"] = RecommenderMetrics.CumulativeHitRate( topNPredicted, leftOutPredictions) # ARHR metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank( topNPredicted, leftOutPredictions) #すべての訓練データセットでのレコメンデーションの特徴量を評価する if (verbose): print("すべてのデータセットでレコメンドの計算をしています...") self.algorithm.fit(evaluationData.GetFullTrainSet()) allPredictions = self.algorithm.test( evaluationData.GetFullAntiTestSet()) topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) if (verbose): print("Analyzing coverage, diversity, and novelty...") # 4.0を閾値として表示する metrics["Coverage"] = RecommenderMetrics.UserCoverage( topNPredicted, evaluationData.GetFullTrainSet().n_users, ratingThreshold=4.0) # レコメンドの多様性の測定 metrics["Diversity"] = RecommenderMetrics.Diversity( topNPredicted, evaluationData.GetSimilarities()) # レコメンドされた人気の高い評価の平均値を計算する metrics["Novelty"] = RecommenderMetrics.Novelty( topNPredicted, evaluationData.GetPopularityRankings()) if (verbose): print("Analysis complete.") return metrics
def get_hit_rates(trainSet, testSet, simsMatrix, num_nearest_neighbors=10): # Build up dict to lists of (int(movieID), predictedrating) pairs topN = defaultdict(list) k = num_nearest_neighbors for uiid in range(trainSet.n_users): # Get top N similar users to this one similarityRow = simsMatrix[uiid] similarUsers = [] for innerID, score in enumerate(similarityRow): if (innerID != uiid): similarUsers.append((innerID, score)) kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1]) # Get the stuff they rated, and add up ratings for each item, weighted by user similarity candidates = defaultdict(float) for similarUser in kNeighbors: innerID = similarUser[0] userSimilarityScore = similarUser[1] theirRatings = trainSet.ur[innerID] for rating in theirRatings: candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore # Build a dictionary of stuff the user has already seen watched = {} for itemID, rating in trainSet.ur[uiid]: watched[itemID] = 1 # Get top-rated items from similar users: pos = 0 for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True): if not itemID in watched: movieID = trainSet.to_raw_iid(itemID) topN[int(trainSet.to_raw_uid(uiid))].append( (int(movieID), 0.0)) pos += 1 if (pos > 40): break HR = RecommenderMetrics.HitRate(topN, testSet) CHR = RecommenderMetrics.CumulativeHitRate(topN, testSet, ratingCutoff=4.0) RHR = RecommenderMetrics.RatingHitRate(topN, testSet) ARHR = RecommenderMetrics.AverageReciprocalHitRank(topN, testSet) return HR, CHR, RHR, ARHR
def Evaluate(self, evaluationData, doTopN, n=10, verbose=True): metrics = {} # Compute accuracy if (verbose): print("Evaluating: ", self.name) self.algorithm.fit(evaluationData.GetTrainSet()) predictions = self.algorithm.test(evaluationData.GetTestSet()) metrics["RMSE"] = RecommenderMetrics.RMSE(predictions) metrics["MAE"] = RecommenderMetrics.MAE(predictions) if (doTopN): # Evaluate top-10 with Leave One Out testing self.algorithm.fit(evaluationData.GetLOOCVTrainSet()) leftOutPredictions = self.algorithm.test( evaluationData.GetLOOCVTestSet()) # Build predictions for all ratings not in the training set allPredictions = self.algorithm.test( evaluationData.GetLOOCVAntiTestSet()) # Compute top 10 recs for each user topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) # See how often we recommended a movie the user actually rated metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions) #Evaluate properties of recommendations on full training set self.algorithm.fit(evaluationData.GetFullTrainSet()) allPredictions = self.algorithm.test( evaluationData.GetFullAntiTestSet()) topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) # Print user coverage with a minimum predicted rating of 6.0: metrics["Coverage"] = RecommenderMetrics.UserCoverage( topNPredicted, evaluationData.GetFullTrainSet().n_users, ratingThreshold=7.0) # Measure novelty (average popularity rank of recommendations): metrics["Novelty"] = RecommenderMetrics.Novelty( topNPredicted, evaluationData.GetPopularityRankings()) return metrics
fullTrainSet = data.build_full_trainset() sim_options = {'name': 'pearson_baseline', 'user_based': False} simsAlgo = KNNBaseline(sim_options=sim_options) simsAlgo.fit(fullTrainSet) print("\nBuilding recommendation model...") trainSet, testSet = train_test_split(data, test_size=.25, random_state=1) algo = SVD(random_state=10) algo.fit(trainSet) print("\nComputing recommendations...") predictions = algo.test(testSet) print("\nEvaluating accuracy of model...") print("RMSE: ", RecommenderMetrics.RMSE(predictions)) print("MAE: ", RecommenderMetrics.MAE(predictions)) print("\nEvaluating top-10 recommendations...") # Set aside one rating per user for testing LOOCV = LeaveOneOut(n_splits=1, random_state=1) for trainSet, testSet in LOOCV.split(data): print("Computing recommendations with leave-one-out...") # Train model without left-out ratings algo.fit(trainSet) # Predicts ratings for left-out ratings only print("Predict ratings for left-out set...")
def Evaluate(self, evaluationData, doTopN, n=10, verbose=True, load=False): metrics = {} # Compute accuracy if (verbose): print("Evaluating accuracy...") pr = None alg = None if load: file_name = self.name + '_acc' pr, alg, _ = MyDump.Load(file_name, 1) if pr == None or alg == None: self.algorithm.fit(evaluationData.GetTrainSet()) # 75% predictions = self.algorithm.test(evaluationData.GetTestSet()) if load: MyDump.Save(file_name, predictions, self.algorithm, None, 1) else: self.algorithm = alg predictions = pr metrics["RMSE"] = RecommenderMetrics.RMSE(predictions) metrics["MAE"] = RecommenderMetrics.MAE(predictions) # if load: # MyDump.Save(file_name, predictions, self.algorithm, None, 1) if (doTopN): # if False: # Evaluate top-10 with Leave One Out testing if (verbose): print("Evaluating top-N with leave-one-out...") pr_top = None alg_top = None if load: file_name = self.name + '_top' + str(n) pr_top, alg_top, _ = MyDump.Load(file_name, 1) if pr_top == None or alg_top == None: self.algorithm.fit(evaluationData.GetLOOCVTrainSet()) leftOutPredictions = self.algorithm.test( evaluationData.GetLOOCVTestSet()) # Build predictions for all ratings not in the training set allPredictions = self.algorithm.test( evaluationData.GetLOOCVAntiTestSet()) # Compute top 10 recs for each user topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) pr_data = { 'leftOutPredictions': leftOutPredictions, 'topNPredicted': topNPredicted } if load: MyDump.Save(file_name, pr_data, self.algorithm, None, 1) else: self.algorithm = alg_top leftOutPredictions = pr_top['leftOutPredictions'] topNPredicted = pr_top['topNPredicted'] if (verbose): print("Computing hit-rate and rank metrics...") # See how often we recommended a movie the user actually rated metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions) # See how often we recommended a movie the user actually liked metrics["cHR"] = RecommenderMetrics.CumulativeHitRate( topNPredicted, leftOutPredictions) # Compute ARHR metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank( topNPredicted, leftOutPredictions) #Evaluate properties of recommendations on full training set if (verbose): print("Computing recommendations with full data set...") pr_top = None alg_top = None if load: file_name = self.name + '_fulldata' pr_top, alg_top, _ = MyDump.Load(file_name, 1) if pr_top == None or alg_top == None: self.algorithm.fit(evaluationData.GetFullTrainSet()) allPredictions = self.algorithm.test( evaluationData.GetFullAntiTestSet()) topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) pr_data = { 'allPredictions': allPredictions, 'topNPredicted': topNPredicted } if load: MyDump.Save(file_name, pr_data, self.algorithm, None, 1) else: self.algorithm = alg_top allPredictions = pr_top['allPredictions'] topNPredicted = pr_top['topNPredicted'] if (verbose): print("Analyzing coverage, diversity, and novelty...") # Print user coverage with a minimum predicted rating of 4.0: metrics["Coverage"] = RecommenderMetrics.UserCoverage( topNPredicted, evaluationData.GetFullTrainSet().n_users, ratingThreshold=4.0) # Measure diversity of recommendations: metrics["Diversity"] = RecommenderMetrics.Diversity( topNPredicted, evaluationData.GetSimilarities()) # Measure novelty (average popularity rank of recommendations): metrics["Novelty"] = RecommenderMetrics.Novelty( topNPredicted, evaluationData.GetPopularityRankings()) if (verbose): print("Analysis complete.") # SaveAlg() return metrics
kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1]) # Get the stuff they rated, and add up ratings for each item, weighted by user similarity candidates = defaultdict(float) for similarUser in kNeighbors: innerID = similarUser[0] userSimilarityScore = similarUser[1] theirRatings = trainSet.ur[innerID] for rating in theirRatings: candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore # Build a dictionary of stuff the user has already seen watched = {} for itemID, rating in trainSet.ur[uiid]: watched[itemID] = 1 # Get top-rated items from similar users: pos = 0 for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True): if not itemID in watched: movieID = trainSet.to_raw_iid(itemID) topN[int(trainSet.to_raw_uid(uiid))].append( (int(movieID), 0.0) ) pos += 1 if (pos > 40): break # Measure print("HR", RecommenderMetrics.HitRate(topN, leftOutTestSet))
ml = MovieLens() print('\nLoading movie ratings...') data = ml.loadMovieLensLatestSmall() print('\nBuilding Recommendation model...') trainSet, testSet = train_test_split(data, test_size = .25, random_state = 1) algo = SVD(random_state = 10) algo.fit(trainSet) print('\nComputing Recommendations...') predictions = algo.test(testSet) print('\nEvaluating accuracy of the recommendation model...') print('\n 1. RMSE (Root Mean Squared Error): {}'.format(RecommenderMetrics.RMSE(predictions))) print('2. MAE (Mean Absolute Error): {}'.format(RecommenderMetrics.MAE(predictions))) print('\nEvaluating top 10 recommendations...') LOOCV = LeaveOneOut(n_splits=1, random_state=1) for trainSet, testSet in LOOCV.split(data): print('\nComputing Recommendation with LOOCV...') algo.fit(trainSet) leftOutPredictions = algo.test(testSet) bigTestSet = trainSet.build_anti_testset() allPredictions = algo.test(bigTestSet)
candidates = defaultdict(float) for itemID, rating in kNeighbors: similarityRow = simsMatrix[itemID] for innerID, score in enumerate(similarityRow): candidates[innerID] += score * (rating / 5.0) # Build a dictionary of stuff the user has already seen watched = {} for itemID, rating in trainSet.ur[uiid]: watched[itemID] = 1 # Get top-rated items from similar users: pos = 0 for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True): if not itemID in watched: productID = trainSet.to_raw_iid(itemID) topN[int(trainSet.to_raw_uid(uiid))].append((int(productID), 0.0)) pos += 1 if (pos > 40): break # Measure print("HR", RecommenderMetrics.HitRate(topN, leftOutTestSet)) print("ARHR", RecommenderMetrics.AverageReciprocalHitRank(topN, leftOutTestSet)) print("Coverage", RecommenderMetrics.UserCoverage(topN, 100)) #print("Diversity", RecommenderMetrics.Diversity(topN,simsAlgo)) print("Novelty", RecommenderMetrics.Novelty(topN, rankings))
print(BeginGREEN + "Building recommendation model..." + EndGREEN) trainSet, testSet = train_test_split(data, test_size=.25, random_state=1) algo = SVD(random_state=10) algo.fit(trainSet) print(BeginGREEN + "Computing recommendations..." + EndGREEN) predictions = algo.test(testSet) print(BeginGREEN + "Evaluating accuracy of model...\n" + EndGREEN) print(BeginBgBLUE + "# Root Mean Squared Error. Lower values mean better accuracy. #" + EndBgBLUE) print(BeginBLUE + "RMSE: " + EndBLUE, BeginYELLO + "", RecommenderMetrics.RMSE(predictions), "" + EndYELLO) print(BeginBgBLUE + "# Mean Absolute Error. Lower values mean better accuracy. #" + EndBgBLUE) print(BeginBLUE + "MAE: " + EndBLUE, BeginYELLO + "", RecommenderMetrics.MAE(predictions), "" + EndYELLO) print(BeginGREEN + "\nEvaluating top-10 recommendations..." + EndGREEN) LOOCV = LeaveOneOut(n_splits=1, random_state=1) for trainSet, testSet in LOOCV.split(data): print(BeginGREEN + "Computing recommendations with leave-one-out..." + EndGREEN) # Train model without left-out ratings
from EvaluationData import EvaluationData class EvaluatedAlgorithm: def __init__(self, algorithm, name): self.algorithm = algorithm self.name = name def Evaluate(self, evaluationData, doTopN, n=10, verbose=True): metrics = {} # Compute accuracy if (verbose): print("Evaluating accuracy...") self.algorithm.fit(evaluationData.GetTrainSet()) predictions = self.algorithm.test(evaluationData.GetTestSet()) metrics["RMSE"] = RecommenderMetrics.RMSE(predictions) metrics["MAE"] = RecommenderMetrics.MAE(predictions) if (doTopN): # Evaluate top-10 with Leave One Out testing if (verbose): print("Evaluating top-N with leave-one-out...") self.algorithm.fit(evaluationData.GetLOOCVTrainSet()) leftOutPredictions = self.algorithm.test(evaluationData.GetLOOCVTestSet()) # Build predictions for all ratings not in the training set allPredictions = self.algorithm.test(evaluationData.GetLOOCVAntiTestSet()) # Compute top 10 recs for each user topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n) if (verbose): print("Computing hit-rate and rank metrics...") # See how often we recommended a movie the user actually rated
# Get the stuff they rated, and add up ratings for each item, weighted by user similarity candidates = defaultdict(float) for similarUser in kNeighbors: innerID = similarUser[0] userSimilarityScore = similarUser[1] theirRatings = trainSet.ur[innerID] for rating in theirRatings: candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore # Build a dictionary of stuff the user has already seen watched = {} for itemID, rating in trainSet.ur[uiid]: watched[itemID] = 1 # Get top-rated items from similar users: pos = 0 for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True): if not itemID in watched: productID = trainSet.to_raw_iid(itemID) topN[int(trainSet.to_raw_uid(uiid))].append((int(productID), 0.0)) pos += 1 if (pos > 40): break # Measure print("HR", RecommenderMetrics.HitRate(topN, leftOutTestSet)) print("ARHR", RecommenderMetrics.AverageReciprocalHitRank(topN, leftOutTestSet))
def Evaluate(self, evaluationData, doTopN, n=10, verbose=True, sample_topN_for_userIDs=[]): sample_topN = {} self.evaluated_metrics = {} self.N = n output = {} self.recommender_data = evaluationData # sample_TopN_all = {} # sample_TopN_user = [] # # creating dictionary like: userID -> algorithms -> Top N # for userID in sample_topN_for_userIDs: # algos = {} # for algorithm, name in self.algo_comparison_set: # algos.update({name: []) # sample_TopN_all[userID] = algos # Use train-test-split dataset for RMSE and MAE scores self.recommender_algorithm.fit(self.recommender_data.GetTrainSet()) predictions = self.recommender_algorithm.test( self.recommender_data.GetTestSet()) self.evaluated_metrics["RMSE"] = RecommenderMetrics.RMSE(predictions) self.evaluated_metrics["MAE"] = RecommenderMetrics.MAE(predictions) # do only if we want top N recommendations. Compute intensive operation if (doTopN): # use Leave one out algorithm self.recommender_algorithm.fit( self.recommender_data.GetLOOCVTrainSet()) leftout_predictions = self.recommender_algorithm.test( self.recommender_data.GetLOOCVTestSet()) predictions_all_minus_train = self.recommender_algorithm.test( self.recommender_data.GetLOOCVAntiTestSet()) topN_predictions = RecommenderMetrics.GetTopN( predictions_all_minus_train, self.N, minimumRating=4.0) self.evaluated_metrics["HitRate"] = RecommenderMetrics.HitRate( topN_predictions, leftout_predictions) self.evaluated_metrics[ "CumulativeHitRate"] = RecommenderMetrics.CumulativeHitRate( topN_predictions, leftout_predictions) # self.evaluated_metrics["RatingHitRate"] = RecommenderMetrics.RatingHitRate(topN_predictions, leftout_predictions) self.evaluated_metrics[ "AverageReciprocalHitRank"] = RecommenderMetrics.AverageReciprocalHitRank( topN_predictions, leftout_predictions) # use full dataset for these metrics: UserCoverage, Diversity, Novelty trainset_full = self.recommender_data.GetFullTrainSet() self.recommender_algorithm.fit(trainset_full) predictions_all = self.recommender_algorithm.test( self.recommender_data.GetFullAntiTestSet()) topN_predictions = RecommenderMetrics.GetTopN(predictions_all, self.N) if len(sample_topN_for_userIDs) != 0: sample_topN = self.FilterTopN(topN_predictions, sample_topN_for_userIDs) self.evaluated_metrics[ "UserCoverage"] = RecommenderMetrics.UserCoverage( topN_predictions, trainset_full.n_users, ratingThreshold=4.0) # Diversity uses the similarity matrix self.evaluated_metrics["Diversity"] = RecommenderMetrics.Diversity( topN_predictions, self.recommender_data.GetSimilarities()) # Novelty uses the Popularity rankings self.evaluated_metrics["Novelty"] = RecommenderMetrics.Novelty( topN_predictions, self.recommender_data.GetPopularityRankings()) # format: {Algorithm: {evaluated metrics}} output[self.recommender_name] = {"metrics": self.evaluated_metrics} if len(sample_topN_for_userIDs) != 0: # format: {TopN: {userID: {Algorithm: [Sample_TopN movies recommended]}}} output[self.recommender_name].update({"sample_topn": sample_topN}) return output