def get_user_articles(self, user_id): ''' INPUT: user_id - (int) a user id user_item - (pandas dataframe) matrix of users by articles: 1's when a user has interacted with an article, 0 otherwise OUTPUT: article_ids - (list) a list of the article ids seen by the user article_names - (list) a list of article names associated with the list of article ids (this is identified by the doc_full_name column in df_content) Description: Provides a list of the article_ids and article titles that have been seen by a user ''' user_row = np.where(self.user_item.index == user_id)[0][0] user_articles = np.where(self.user_item.iloc[user_row] == 1)[0] article_ids = [] for article in user_articles: article_id = self.user_item.iloc[:, article].name article_ids.append(str(article_id)) # to match the expected str type as output article_names = Utils.get_article_names(article_ids, self.interactions_df, 'title') return article_ids, article_names # return the ids and names
def make_SVD_recommendations(self, user_id, num_recommendations=10): preds = np.around(np.dot(np.dot(self.u_matrix, self.s_matrix), self.vt_matrix)) articles_idx = preds.argsort()[-num_recommendations:][::-1] rec_ids = self.user_item.columns[articles_idx] recommended_articles = Utils.get_article_names(rec_ids, self.interactions_df, 'title') recommended_articles = recommended_articles[:num_recommendations] return recommended_articles
def make_content_user_recommendations(self, _id, num_recommendations=10): ''' INPUT: _id, the id of the user we want recommended articles for self.content_similarity_matrix, the similarity matrix of the articles, by default cosine matrix computed separately self.interactions_df, the dataframe with the interactions of users with articles self.article_content_df - the df containing details about the articles num_recommendations, the number of recommendations expected as an output, by default 10 OUTPUT: recommended_articles, a list of recommended articles, given by name ''' # get the articles a user read user_articles_id, user_articles_names = self.get_user_articles(_id) # filter out the articles that are not in the df of article details user_articles_id = [float(i) for i in user_articles_id] user_articles = self.article_content_df[self.article_content_df['article_id'].isin(user_articles_id)]['article_id'].values # sort the articles_id per number of interactions user_article_inter_dict = {} for article in user_articles: interact = len(self.interactions_df[(self.interactions_df['user_id'] == _id) & (self.interactions_df['article_id'] == article)]) article_title = self.interactions_df[self.interactions_df['article_id'] == article]['title'].values[0] user_article_inter_dict[article] = {'num_interactions': interact, 'title': article_title} top_user_articles_df = pd.DataFrame.from_dict(user_article_inter_dict, orient='index') top_user_articles_df = top_user_articles_df.sort_values(by='num_interactions', ascending=False) # find similar articles in order recommended_articles = [] for article in top_user_articles_df.index: articles_sim = self.find_similar_articles(article) unread_articles = np.setdiff1d(articles_sim, top_user_articles_df.index, assume_unique=True) for unread_article in unread_articles: if unread_article not in recommended_articles: recommended_articles.append(unread_article) if len(recommended_articles) > num_recommendations: break recommended_articles = recommended_articles[:num_recommendations] recommended_articles = Utils.get_article_names(recommended_articles, self.article_content_df, 'doc_full_name') return recommended_articles
def make_user_user_recommendations(self, user_id, num_recommendations=10): ''' INPUT: user_id - (int) a user id num_recommendations - (int) the number of recommendations you want for the user OUTPUT: recs - (list) a list of recommendations for the user by article id rec_names - (list) a list of recommendations for the user by article title Description: Loops through the users based on closeness to the input user_id For each user - finds articles the user hasn't seen before and provides them as recs Does this until m recommendations are found Notes: * Choose the users that have the most total article interactions before choosing those with fewer article interactions. * Choose articles with the articles with the most total interactions before choosing those with fewer total interactions. ''' recs = [] neighbors_df = self.get_top_sorted_users(user_id) user_articles_id, user_articles_names = self.get_user_articles(user_id) for neighbor in neighbors_df.index: neighbor_articles_id, neighbor_articles_names = self.get_user_articles(neighbor) sorted_neighbor_article_ids = Utils.get_top_articles_df(neighbor_articles_id, self.interactions_df) sorted_neighbor_article_ids = sorted_neighbor_article_ids.index.values article_not_read = np.setdiff1d(sorted_neighbor_article_ids, user_articles_id, assume_unique=True) article_not_read = [str(i) for i in article_not_read] recs = np.unique(np.concatenate([article_not_read, recs], axis=0)) if len(recs) >= num_recommendations: break if len(recs) >= num_recommendations: recs = recs[:num_recommendations] recommended_articles = Utils.get_article_names(recs, self.interactions_df, 'title') return recommended_articles
def make_content_article_recommendations(self, _id, num_recommendations=10): ''' INPUT: _id, the id of the article we want similar articles for self.content_similarity_matrix, the similarity matrix of the articles, by default cosine matrix computed separately self.interactions_df, the dataframe with the interactions of users with articles self.article_content_df - the df containing details about the articles num_recommendations, the number of recommendations expected as an output, by default 10 OUTPUT: recommended_articles, a list of similar articles, given by name ''' recommended_articles = self.find_similar_articles(_id) recommended_articles = recommended_articles[:num_recommendations] recommended_articles = Utils.get_article_names(recommended_articles, self.article_content_df, 'doc_full_name') return recommended_articles