def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie ml = Reviews () typeR = ml.getRestaurantType() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 50 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating+1, self.trainset.n_items): thisplaceID = int(self.trainset.to_raw_iid(thisRating)) otherplaceID = int(self.trainset.to_raw_iid(otherRating)) typeSimilarity = self.computeTypeSimilarity(thisplaceID, otherplaceID, typeR) distancesSimilarity = self.computeDistanceSimilarity(thisplaceID, otherplaceID) #mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes) self.similarities[thisRating, otherRating] = typeSimilarity * distancesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def LoadRevsData(): ml = Reviews() print("Loading restaurants ratings...") data = ml.loadRevDataSet() print( "\nComputing movie popularity ranks so we can measure novelty later..." ) rankings = ml.getPopularityRanks() return (ml, data, rankings)
Created on Fri May 3 16:29:32 2019 @author: hedi """ from ReviewsHandeling import Reviews from surprise import KNNBasic import heapq from collections import defaultdict from operator import itemgetter testSubject = '1068' k = 10 # Load our data set and compute the user similarity matrix ml = Reviews() data = ml.loadRevDataSet() trainSet = data.build_full_trainset() sim_options = {'name': 'cosine', 'user_based': True} model = KNNBasic(sim_options=sim_options) model.fit(trainSet) simsMatrix = model.compute_similarities() # Get top N similar users to our test subject # (Alternate approach would be to select users up to some similarity threshold - try it!) testUserInnerID = trainSet.to_inner_uid(testSubject) similarityRow = simsMatrix[testUserInnerID]
# -*- coding: utf-8 -*- """ Created on Wed Apr 24 14:59:58 2019 @author: hedi """ from ReviewsHandeling import Reviews from KnnContentFiltering import KnnContentFiltering from EvaluationData import EvaluationData ml = Reviews() data = ml.loadRevDataSet() rankings = ml.getPopularityRanks() tt = EvaluationData(data, rankings) tt.GetTrainSet()