def fit(self, trainset): AlgoBase.fit(self, trainset) n = trainset.n_users m = trainset.n_items #print(n,m) self.K = agnp.zeros((n,m)) self.R = agnp.zeros((n,m)) for u, i, rating in trainset.all_ratings(): ru, ri = self.add_to_known(u,i) self.K[ru,ri]=1 self.R[ru,ri]=rating self.U = agnp.random.normal(size = (n, self.latent_dimension)) self.M = agnp.random.normal(size = (self.latent_dimension, m)) self.C = agnp.array([[self.cat_products[self.from_known_ri(ri)] == c for c in range(len(self.cat_target))] for ri in range(m)]) self.fun_U = lambda U : (agnp.sum(self.K*(self.R - agnp.dot(U,self.M))**2)+ self.mu * (agnp.sum(U**2) + agnp.sum(self.M**2)) +self.lamb*agnp.sum((1/n * agnp.dot(agnp.dot(agnp.ones(n),agnp.dot(U, self.M)),self.C) -self.cat_target)**2)) self.fun_M = lambda M : (agnp.sum(self.K*(self.R - agnp.dot(self.U,M))**2)+ self.mu * (agnp.sum(self.U**2) + agnp.sum(M**2)) +self.lamb*agnp.sum((1/n * agnp.dot(agnp.dot(agnp.ones(n),agnp.dot(self.U, M)),self.C) -self.cat_target)**2)) self.grad_U = grad(self.fun_U) self.grad_M = grad(self.fun_M) for epoch in range(self.nb_main_epochs): self.M = gradient_descent(self.M, self.grad_M, N = 1, lr = self.lr, alpha = 1) self.U = gradient_descent(self.U, self.grad_U, N = 1, lr = self.lr, alpha = 1) self.lr*=self.alpha return self
def fit(self, trainset): # Here again: call base method before doing anything. AlgoBase.fit(self, trainset) self.algo.fit(trainset) if self.algo.verbose: print( 'Ignore the above similiary matrix generation message, its not used in this algorithm' ) print('Calculating AgreeTrust matrix ...') start = time.time() # tr, comon, noncom = agree_trust_opitmal_a_b(trainset, self.beta, self.epsilon, self.algo.sim, ptype=self.ptype, istrainset=True, activity=False) tr, comon, noncom = agree_trust_opitmal_a_b(trainset, self.beta, self.epsilon, self.algo.sim, ptype=self.ptype, istrainset=True, activity=False) self.algo.sim = tr**self.lambdak - (self.epsilon * noncom) # self.algo.sim[self.algo.sim > 1] = 1 print(time.time() - start) print('agree_trust_opitmal_a_b fit time') return self
def fit(self, trainset): # Here again: call base method before doing anything. AlgoBase.fit(self, trainset) # Compute the average rating. We might as well use the trainset.global_mean. self.the_mean = np.mean( [r for (_, _, r) in self.trainset.all_ratings()]) return self
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie ml = Reviews () typeR = ml.getRestaurantType() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 50 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating+1, self.trainset.n_items): thisplaceID = int(self.trainset.to_raw_iid(thisRating)) otherplaceID = int(self.trainset.to_raw_iid(otherRating)) typeSimilarity = self.computeTypeSimilarity(thisplaceID, otherplaceID, typeR) distancesSimilarity = self.computeDistanceSimilarity(thisplaceID, otherplaceID) #mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes) self.similarities[thisRating, otherRating] = typeSimilarity * distancesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset, movies): AlgoBase.fit(self, trainset) self.movies = movies mean_user_vectors = {} for u in trainset.ur: weights_sum = 0.0 user_movies = {} for movieId, score in trainset.ur[u]: movieRawId = trainset.to_raw_iid(movieId) if movieRawId in self.movies: user_movies[movieRawId] = self.movies[movieRawId] user_movies[movieRawId]["score"] = score sum_vector = [ 0.0 for _ in user_movies[list(user_movies)[0]]["embedding"] ] for _, movie in user_movies.items(): sum_vector = [ x + y * movie["score"] for x, y in zip(sum_vector, movie["embedding"]) ] weights_sum += score mean_vector = [x / weights_sum for x in sum_vector] mean_user_vectors[u] = mean_vector self.mean_user_vectors = mean_user_vectors return self
def fit(self,trainset): AlgoBase.fit(self,trainset) #self.the_mean = np.mean([r for (_,_,r) in self.trainset.all_ratings()]) self.verbose = False self.bu,self.bi = self.compute_baselines() self.sim = self.compute_similarities() return self
def fit(self, trainset): AlgoBase.fit(self, trainset) self.u_mean = defaultdict(lambda: trainset.global_mean) for u, ratings in trainset.ur.items(): self.u_mean[u] = np.mean([rating for i, rating in ratings]) return self
def fit(self, trainset): trainset.rating_scale = (1, 13) AlgoBase.fit(self, trainset) # print("Computing content-based similarity matrix...") # # # Compute genre distance for every movie combination as a 2x2 matrix # self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) # # for thisRating in range(self.trainset.n_items): # if (thisRating % 100 == 0): # print(thisRating, " of ", self.trainset.n_items) # # thisMovieID = int(self.trainset.to_raw_iid(thisRating)) # thisMovieGenreProfile = self.createMovieGenreProfile(thisMovieID) # thisMovieActorProfile = self.createMovieActorProfile(thisMovieID) # # thisMovieProfile= thisMovieActorProfile + thisMovieGenreProfile # # for otherRating in range(thisRating+1, self.trainset.n_items): # otherMovieID = int(self.trainset.to_raw_iid(otherRating)) # otherMovieGenreProfile = self.createMovieGenreProfile(otherMovieID) # otherMovieActorProfile = self.createMovieActorProfile(otherMovieID) # # otherMovieProfile=otherMovieActorProfile + otherMovieGenreProfile # # # similarity = self.computeSimilarity(thisMovieProfile, otherMovieProfile) # self.similarities[thisRating, otherRating] = similarity # self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie ml = MovieLens() genres = ml.getGenres() years = ml.getYears() mes = ml.getMiseEnScene() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating+1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) genreSimilarity = self.computeGenreSimilarity(thisMovieID, otherMovieID, genres) yearSimilarity = self.computeYearSimilarity(thisMovieID, otherMovieID, years) mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes) self.similarities[thisRating, otherRating] = genreSimilarity * yearSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems, 10], dtype = np.float32) for (uid, iid, rating) in trainset.all_ratings(): adjustedRating = int(float(rating)*2.0) - 1 trainingMatrix[int(uid), int(iid), adjustedRating] = 1 trainingMatrix = np.reshape(trainingMatrix,[trainingMatrix.shape[0], - 1]) rbm = RBM(trainingMatrix.shape[1], hiddenDimensions = self.hiddenDim, learningRate = self.learningRate, batchSize = self.batchSize) rbm.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype = np.float32) for uiid in range(trainset.n_users): if(uiid % 50 == 0): print("Procissing user ", uiid) recs = rbm.GetRecommendations([trainingMatrix[uiid]]) recs = np.reshape(recs, [numItems, 10]) for itemID, rec in enumerate(recs): normalized = self.softmax(rec) rating = np.average(np.arange(10), weights = normalized) self.predictedRatings[uiid,itemID] = (rating + 1)* 0.5 return self
def fit(self, trainset): AlgoBase.fit(self, trainset) for algorithm in self.algorithms: algorithm.fit(trainset) return self
def fit(self, trainset): AlgoBase.fit(self, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems, 10], dtype=np.float32) for (uid, iid, rating) in trainset.all_ratings(): adjustedRating = int(float(rating)*2.0) - 1 trainingMatrix[int(uid), int(iid), adjustedRating] = 1 # Flatten to a 2D array, with nodes for each possible rating type on each possible item, for every user. trainingMatrix = np.reshape(trainingMatrix, [trainingMatrix.shape[0], -1]) # Create an RBM with (num items * rating values) visible nodes rbm = RBM(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs) rbm.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32) for uiid in range(trainset.n_users): if (uiid % 50 == 0): print("Processing user ", uiid) recs = rbm.GetRecommendations([trainingMatrix[uiid]]) recs = np.reshape(recs, [numItems, 10]) for itemID, rec in enumerate(recs): # The obvious thing would be to just take the rating with the highest score: #rating = rec.argmax() # ... but this just leads to a huge multi-way tie for 5-star predictions. # The paper suggests performing normalization over K values to get probabilities # and take the expectation as your prediction, so we'll do that instead: normalized = self.softmax(rec) rating = np.average(np.arange(10), weights=normalized) self.predictedRatings[uiid, itemID] = (rating + 1) * 0.5 return self
def fit(self, trainset): AlgoBase.fit(self, trainset) ml = MovieLens() genres = ml.getGenres() years = ml.getYears() mes = ml.getMiseEnScene() print("Computing content-based similarity matrix...") self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating + 1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) genreSimilarity = self.computeGenreSimilarity( thisMovieID, otherMovieID, genres) yeaarSimilarity = self.computeYearSimilarity( thisMovieID, otherMovieID, years) mesSimilarity = self.computeMiseEnSceneSimilarity( thisMovieID, otherMovieID, mes) self.similarities[ thisRating, otherRating] = genreSimilarity * yeaarSimilarity * mesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) lf = LoadFoods() cuisines = lf.getCuisines() #orderTime = lf.getOrderTime() print("Now computing content-based similarity matrix. Please wait ...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0 and thisRating != 0): print("processed ", thisRating, " of ", self.trainset.n_items, " items") for otherRating in range(thisRating + 1, self.trainset.n_items): thisFoodID = (self.trainset.to_raw_iid(thisRating)) otherFoodID = (self.trainset.to_raw_iid(thisRating)) cuisineSimilarity = self.computeCuisineSimilarity( thisFoodID, otherFoodID, cuisines) #orderTimeSimilarity = self.computeOrderTimeSimilarity(thisFoodID,otherFoodID,orderTime) self.similarities[thisRating, otherRating] = cuisineSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("done computing the matrix...") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie movies = MoviesContent(False, False) genres = movies.getGenres() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if thisRating % 100 == 0: print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating + 1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) if len(genres[thisMovieID]) > 0 and len( genres[otherMovieID]) > 0: genreSimilarity = self.computeGenreSimilarity( thisMovieID, otherMovieID, genres) self.similarities[thisRating, otherRating] = genreSimilarity self.similarities[otherRating, thisRating] = self.similarities[ thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute baselines and similarities self.bu, self.bi = self.compute_baselines() self.sim = self.compute_similarities()
def fit(self, trainset): AlgoBase.fit(self, trainset) ml = MovieLensData() genres = ml.returnGenres() years = ml.returnYears() mes = ml.returnMES() self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): for otherRating in range(thisRating + 1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) # print("Computing Genre Similarity") genreSimilarity = self.computeGenreSimilarity( thisMovieID, otherMovieID, genres) # print("Computing Year Similarity") yearSimilarity = self.computeYearSimilarity( thisMovieID, otherMovieID, years) # print("Computing Mise En Scene Similarity") mesSimilarity = self.computeMiseEnSceneSimilarity( thisMovieID, otherMovieID, mes) self.similarities[ thisRating, otherRating] = genreSimilarity * yearSimilarity * mesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] return self
def fit(self, trainset): AlgoBase.fit(self, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems], dtype=np.float32) for (uid, iid, rating) in trainset.all_ratings(): trainingMatrix[int(uid), int(iid)] = rating / 5.0 # Create an RBM with (num items * rating values) visible nodes autoRec = AutoRec(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs) autoRec.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32) for uiid in range(trainset.n_users): if (uiid % 50 == 0): print("Processing user ", uiid) recs = autoRec.GetRecommendations([trainingMatrix[uiid]]) for itemID, rec in enumerate(recs): self.predictedRatings[uiid, itemID] = rec * 5.0 return self
def fit(self, trainset): AlgoBase.fit(self, trainset) ml = MovieLens() genres = ml.getGenres() years = ml.getYears() print("Computing content-based similarity matrix") # Compute genre distance for every movie combination as a 2x2 matrix # create a matrix with all zeros and size of all entries in the dataset self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for this_rating in range(self.trainset.n_items): if (this_rating % 1000 == 0): print(this_rating, " of ", self.trainset.n_items) for next_rating in range(this_rating + 1, self.trainset.n_items): this_movieId = int(self.trainset.to_raw_iid(this_rating)) other_movieId = int(self.trainset.to_raw_iid(next_rating)) genreSimilarity = self.similarityBasedOnGenre( this_movieId, other_movieId, genres) yearSimilarity = self.similarityBasedOnYear( this_movieId, other_movieId, years) self.similarities[ this_rating, next_rating] = genreSimilarity * yearSimilarity self.similarities[next_rating, this_rating] = self.similarities[this_rating, next_rating] return self
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute similarities self.sim = self.compute_similarities() return self
def fit(self, trainset): # Here again: call base method before doing anything. AlgoBase.fit(self, trainset) # Compute the average rating self.the_mean = np.mean( [r for (_, _, r) in self.trainset.all_ratings()])
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute baselines and similarities self.bu, self.bi = self.compute_baselines() self.sim = self.compute_similarities() return self
def fit(self, trainset): """ Fit an algorithm to a RBM model. Parameters ---------- trainset: The data the used in training Returns ------- model: RBMAlgorithm Returns the class instance """ AlgoBase.fit(self, trainset) num_users = trainset.n_users num_items = trainset.n_items train_matrix = np.zeros([num_users, num_items, 10], dtype=np.float32) for uid, iid, rating in trainset.all_ratings(): adjusted_rating = int(float(rating) * 2.0) - 1 train_matrix[int(uid), int(iid), adjusted_rating] = 1 # Flatten to a 2D array, with nodes for each possible rating type on each possible item, for every user. train_matrix = np.reshape(train_matrix, [train_matrix.shape[0], -1]) # Create an RBM with (num items * rating values) visible nodes rbm = RBM(train_matrix.shape[1], hidden_dimensions=self.hidden_dim, learning_rate=self.learning_rate, batch_size=self.batch_size, epochs=self.epochs) rbm.train(train_matrix) self.predicted_ratings = np.zeros([num_users, num_items], dtype=np.float32) for uiid in range(trainset.n_users): if uiid % 50 == 0: print("Processing user ", uiid) recs = rbm.get_recommendations([train_matrix[uiid]]) recs = np.reshape(recs, [num_items, 10]) for item_id, rec in enumerate(recs): # The obvious thing would be to just take the rating with the highest score: # rating = rec.argmax() # ... but this just leads to a huge multi-way tie for 5-star predictions. # The paper suggests performing normalization over K values to get probabilities # and take the expectation as our prediction. normalized = self.softmax(rec) rating = np.average(np.arange(10), weights=normalized) self.predicted_ratings[uiid, item_id] = (rating + 1) * 0.5 return self
def fit(self, trainset): '''Computes the signature matrix for the training set.''' AlgoBase.fit(self, trainset) self.lsh = MinHashLSH(threshold=self.tr, num_perm=self.n_perm) for user in tqdm(self.trainset.ur, desc='Computing LSH'): self.lsh.insert(user, self.compute_minhash_signature(user)) return self
def fit(self, trainset): trainset.rating_scale = (1, 13) AlgoBase.fit(self, trainset) self.algorithm1.fit(trainset) self.algorithm2.fit(trainset) print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) ratings = [r for _, _, r in trainset.all_ratings()] self._model = powerlaw.Fit(ratings) self._alpha = self._model.power_law.alpha self._xmin = self._model.power_law.xmin self._D = self._model.power_law.D self._std = self._model.power_law.sigma
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up vectors for every hotel helper = DatabaseHelper() guestRatings = helper.getHotelGuestRatings() hotelRatings = helper.getHotelRatings() foodAndDrinks = helper.getFoodAndDrink() thingsToDo = helper.getThingsToDo() homeComforts = helper.getHomeComforts() sleepWell = helper.getSleepWell() thingsToEnjoy = helper.getThingsToEnjoy() freshenUp = helper.getFreshenUp() beEntertained = helper.getBeEntertained() stayConnected = helper.getStayConnected() #Computing content-based similarity matrix self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0): self.log.debug(thisRating, ' of ', self.trainset.n_items) for otherRating in range(thisRating + 1, self.trainset.n_items): thisHotelID = int(self.trainset.to_raw_iid(thisRating)) otherHotelID = int(self.trainset.to_raw_iid(otherRating)) foodAndDrinkSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, foodAndDrinks) thingsToDoSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, thingsToDo) homeComfortsSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, homeComforts) sleepWellSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, sleepWell) thingsToEnjoySimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, thingsToEnjoy) freshenUpSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, freshenUp) beEntertainedSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, beEntertained) stayConnectedSimilarity = self.computeMultiDataSimularity( thisHotelID, otherHotelID, stayConnected) guestRatingSimilarity = self.computeRatingSimilarity( thisHotelID, otherHotelID, guestRatings) hotelRatingsSimilarity = self.computeRatingSimilarity( thisHotelID, otherHotelID, hotelRatings) self.similarities[thisRating, otherRating] = foodAndDrinkSimilarity * ( thingsToDoSimilarity * 1.5 ) * homeComfortsSimilarity * sleepWellSimilarity * thingsToEnjoySimilarity * freshenUpSimilarity * beEntertainedSimilarity * stayConnectedSimilarity * guestRatingSimilarity * hotelRatingsSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] return self
def fit(self, trainset): # Here again: call base method before doing anything. AlgoBase.fit(self, trainset) # Compute the average rating. We might as well use the # trainset.global_mean attribute ;) self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()]) return self
def fit(self, trainset): AlgoBase.fit(self, trainset) self._m_uid.clear() for uid, iid, rating in self.trainset.all_ratings(): if uid in self._m_uid: m = self._m_uid[uid][0] n = self._m_uid[uid][1] + 1 m += (rating - m) / n self._m_uid[uid] = (m, n) else: self._m_uid[uid] = (rating, 1)
def fit(self, trainset): AlgoBase.fit(self, trainset) ub = self.sim_options['user_based'] self.n_x = self.trainset.n_users if ub else self.trainset.n_items self.n_y = self.trainset.n_items if ub else self.trainset.n_users self.xr = self.trainset.ur if ub else self.trainset.ir self.yr = self.trainset.ir if ub else self.trainset.ur return self
def train(self, algo: AlgoBase, data): """ 训练模型 :param algo: 算法 :param data: 数据集 :return: """ # 训练集合 train_set = data.build_full_trainset() algo.fit(trainset=train_set) return algo
def fit(self, trainset): AlgoBase.fit(self, trainset) print(list(trainset.all_users())) # build the Ratmatrix # build the categoryMatrix # clusters # get clusters Mat # self.sim = self.compute_similarities() return self
def fit(self, trainset): AlgoBase.fit(self, trainset) self.est = 3 self.bu, self.bi = 1, 1 self.cnt += 1