def make_recommendations(self, _id, dot_prod, _id_type='movie', rec_num=5): if _id_type == 'user': if _id in self.user_ids_series: message = 'Glad to see you again! recommended for you:\n' idx = np.where(self.user_ids_series == _id)[0][0] # predict items # take the dot product of that row and the V matrix preds = np.dot(self.user_mat[idx, :], self.item_mat) # pull the top items according to the prediction indices = preds.argsort()[-rec_num:][::-1] rec_ids = self.items_ids_series[indices] rec_names = rf.get_item_names(rec_ids, self.df_items, self.item_id_colname, self.item_name_colname) else: message = "Hey, you are new here, this is for you:\n" # if we don't have this user, give just top ratings back rec_names = rf.popular_recommendations(_id, self.ranked_items, rec_num) else: if _id in self.items_ids_series: message = 'Similar items for this rated item:\n' rec_names = (list( rf.find_similar_items(_id, self.df_items, self.item_name_colname, dot_prod))[:rec_num]) else: print("Please update the database with this item") return rec_ids, rec_names, message
def make_recommendations(self, _id, dot_prod_user, tfidf_matrix, _id_type='item', rec_num=5): """ This function make recommendations for a particular user or a particular item regarding the value that you've putted in the _id_type argument. If you choose _id_type='user': the _id argument will be considered as a user id and the recommendation is given using matrix factorization if the user has already rated some movies before. If the user is a new user the recommendation is given using the most popular movies in the data (Ranked based recommendation). If you choose _id_type='item': the _id argument will be considered as a item id and the recommendation is given using similarity between movies if the item exist in the data (Content Based Recommendation). If the item is not present in the data (so no information about the genre, years, ect.) it will return a message to update the data with this item. Input: - _id: either a user or item id (int) - dot_prod_user: the dot product matrix computed by your own to find similar users - _id_type: either 'user' or 'item', Default:'item' (str) - rec_num: number of recommendation that you want Default:5 (int) Output: - recommendation ids - recommendation names - and a personalized message """ if _id_type == 'user': if _id in self.user_ids_series: message = 'Glad to see you again! recommended for you:\n' idx = np.where(self.user_ids_series == _id)[0][0] # predict items # take the dot product of that row and the V matrix preds = np.dot(self.user_mat[idx, :], self.item_mat) # pull the top items according to the prediction indices = preds.argsort()[-rec_num:][::-1] rec_ids = self.items_ids_series[indices] rec_names = rf.get_item_names(rec_ids, self.df_items, self.item_id_colname, self.item_name_colname) rec_user_user_ids = rf.find_similar_user( _id, self.df_reviews, self.user_id_colname, dot_prod_user) rec_user_item_names = rf.user_user_cf(rec_user_user_ids, self.user_item_df, self.df_reviews, self.item_id_colname, self.item_name_colname) else: message = "Hey, you are new here, this is for you:\n" # if we don't have this user, give just top ratings back rec_ids = rf.popular_recommendations(_id, self.ranked_items, self.item_id_colname, rec_num) rec_names = rf.get_item_names(rec_ids, self.df_items, self.item_id_colname, self.item_name_colname) rec_user_user_ids = None rec_user_item_names = None else: if _id in self.items_ids_series: name_item_for_message = rf.get_item_names( [_id], self.df_items, self.item_id_colname, self.item_name_colname) message = (f"Similar items for id:{_id}, corresponding to " f"{name_item_for_message[0]}:\n") rec_ids = (rf.find_similar_items(_id, self.df_items, self.item_id_colname, tfidf_matrix))[:rec_num] rec_names = rf.get_item_names(rec_ids, self.df_items, self.item_id_colname, self.item_name_colname) rec_user_user_ids = None rec_user_item_names = None else: message = ( "We can't make recommendation for this item, please make" "sure the data was updated with this item.\n") rec_ids = None rec_names = None rec_user_user_ids = None rec_user_item_names = None return rec_ids, rec_names, message, rec_user_user_ids, rec_user_item_names
def make_recommendations(self, _id, _id_type='item', rec_num=5, latent_features=12, learning_rate=0.001, iters=10): """ This function make recommendations for a particular user or a particular item regarding the value that you've putted in the _id_type argument. If you choose _id_type='user': the _id argument will be considered as a user id and the recommendation is given using matrix factorization if the user has already rated some movies before. If the user is a new user the recommendation is given using the most popular movies in the data (Ranked based recommendation). If you choose _id_type='item': the _id argument will be considered as a item id and the recommendation is given using similarity between movies if the item exist in the data (Content Based Recommendation). If the item is not present in the data (so no information about the genre, years, ect.) it will return a message to update the data with this item. Input: - _id: either a user or item id (int) - dot_prod_user: the dot product matrix computed by your own to find similar users - _id_type: either 'user' or 'item', Default:'item' (str) - rec_num: number of recommendation that you want Default:5 (int) Output: - recommendation ids - recommendation names - and a personalized message """ self.latent_features = latent_features self.learning_rate = learning_rate self.iters = iters user_item_reset = self.user_item_grouped.reset_index() self.user_ids = user_item_reset[self.user_id_colname].unique() current_user = ( user_item_reset[user_item_reset[self.user_id_colname] == _id] ) current_user = ( current_user.groupby([self.user_id_colname, self.item_id_colname])[self.rating_col_name].max() ) current_user_item_df = current_user.unstack() self.current_user_item_df = current_user_item_df self.user_item_mat = np.array(self.current_user_item_df) # Set up some useful values for later self.n_users = self.user_item_mat.shape[0] self.n_items = self.user_item_mat.shape[1] self.num_ratings = np.count_nonzero(~np.isnan(self.user_item_mat)) self.user_ids_series = np.array(user_item_reset[self.user_id_colname].unique()) self.items_ids_series = np.array(user_item_reset[self.item_id_colname].unique()) print('Train data with Funk Singular Value Decomposition...') #### FunkSVD #### # initialize the user and item matrices with random values user_mat = np.random.rand(self.n_users, self.latent_features) item_mat = np.random.rand(self.latent_features, self.n_items) sse_accum = 0 print("Iterations \t\t Mean Squared Error ") for iteration in range(self.iters): old_sse = sse_accum sse_accum = 0 for i in range(self.n_users): for j in range(self.n_items): # if the rating exists (so we train only on non-missval) if self.user_item_mat[i, j] > 0: # compute the error as the actual minus the dot # product of the user and item latent features diff = ( self.user_item_mat[i, j] - np.dot(user_mat[i, :], item_mat[:, j]) ) # Keep track of the sum of squared errors for the # matrix sse_accum += diff**2 for k in range(self.latent_features): user_mat[i, k] += ( self.learning_rate * (2*diff*item_mat[k, j]) ) item_mat[k, j] += ( self.learning_rate * (2*diff*user_mat[i, k]) ) print(f"\t{iteration+1} \t\t {sse_accum/self.num_ratings} ") self.mse=sse_accum/self.num_ratings # Create ranked items self.ranked_items = rf.ranked_df(self.df_reviews, self.item_id_colname, self.rating_col_name, self.date_col_name) if _id in self.user_ids_series: message = 'Glad to see you again! recommended for you:\n' idx = np.where(self.user_ids_series == _id)[0][0] # predict items # take the dot product of that row and the V matrix preds = np.dot(user_mat[idx,:],item_mat) # pull the top items according to the prediction indices = preds.argsort()[-rec_num:][::-1] rec_ids = self.items_ids_series[indices] rec_names = rf.get_item_names(rec_ids, self.df_items, self.item_id_colname, self.item_name_colname) else: message = "Hey, you are new here, this is for you:\n" # if we don't have this user, give just top ratings back rec_ids = rf.popular_recommendations(_id, self.ranked_items, self.item_id_colname, rec_num) rec_names = rf.get_item_names(rec_ids, self.df_items, self.item_id_colname, self.item_name_colname) return rec_ids, rec_names, message