Esempio n. 1
0
    def get_user_articles(self, user_id):
        '''
        INPUT:
        user_id - (int) a user id
        user_item - (pandas dataframe) matrix of users by articles:
                    1's when a user has interacted with an article, 0 otherwise

        OUTPUT:
        article_ids - (list) a list of the article ids seen by the user
        article_names - (list) a list of article names associated with the list of article ids
                        (this is identified by the doc_full_name column in df_content)

        Description:
        Provides a list of the article_ids and article titles that have been seen by a user
        '''
        user_row = np.where(self.user_item.index == user_id)[0][0]
        user_articles = np.where(self.user_item.iloc[user_row] == 1)[0]
        article_ids = []

        for article in user_articles:
            article_id = self.user_item.iloc[:, article].name
            article_ids.append(str(article_id))  # to match the expected str type as output

        article_names = Utils.get_article_names(article_ids, self.interactions_df, 'title')

        return article_ids, article_names  # return the ids and names
Esempio n. 2
0
    def make_SVD_recommendations(self, user_id, num_recommendations=10):
        preds = np.around(np.dot(np.dot(self.u_matrix, self.s_matrix), self.vt_matrix))
        articles_idx = preds.argsort()[-num_recommendations:][::-1]

        rec_ids = self.user_item.columns[articles_idx]
        recommended_articles = Utils.get_article_names(rec_ids, self.interactions_df, 'title')
        recommended_articles = recommended_articles[:num_recommendations]

        return recommended_articles
Esempio n. 3
0
    def make_content_user_recommendations(self, _id, num_recommendations=10):
        '''
        INPUT:
        _id, the id of the user we want recommended articles for
        self.content_similarity_matrix, the similarity matrix of the articles, by default cosine matrix computed separately
        self.interactions_df, the dataframe with the interactions of users with articles
        self.article_content_df - the df containing details about the articles
        num_recommendations, the number of recommendations expected as an output, by default 10

        OUTPUT:
        recommended_articles, a list of recommended articles, given by name
        '''

        # get the articles a user read
        user_articles_id, user_articles_names = self.get_user_articles(_id)

        # filter out the articles that are not in the df of article details
        user_articles_id = [float(i) for i in user_articles_id]
        user_articles = self.article_content_df[self.article_content_df['article_id'].isin(user_articles_id)]['article_id'].values

        # sort the articles_id per number of interactions
        user_article_inter_dict = {}
        for article in user_articles:
            interact = len(self.interactions_df[(self.interactions_df['user_id'] == _id) & (self.interactions_df['article_id'] == article)])
            article_title = self.interactions_df[self.interactions_df['article_id'] == article]['title'].values[0]
            user_article_inter_dict[article] = {'num_interactions': interact, 'title': article_title}

        top_user_articles_df = pd.DataFrame.from_dict(user_article_inter_dict, orient='index')
        top_user_articles_df = top_user_articles_df.sort_values(by='num_interactions', ascending=False)

        # find similar articles in order
        recommended_articles = []
        for article in top_user_articles_df.index:
            articles_sim = self.find_similar_articles(article)
            unread_articles = np.setdiff1d(articles_sim, top_user_articles_df.index, assume_unique=True)
            for unread_article in unread_articles:
                if unread_article not in recommended_articles:
                    recommended_articles.append(unread_article)

            if len(recommended_articles) > num_recommendations:
                break

        recommended_articles = recommended_articles[:num_recommendations]
        recommended_articles = Utils.get_article_names(recommended_articles, self.article_content_df, 'doc_full_name')

        return recommended_articles
Esempio n. 4
0
    def make_user_user_recommendations(self, user_id, num_recommendations=10):
        '''
        INPUT:
        user_id - (int) a user id
        num_recommendations - (int) the number of recommendations you want for the user

        OUTPUT:
        recs - (list) a list of recommendations for the user by article id
        rec_names - (list) a list of recommendations for the user by article title

        Description:
        Loops through the users based on closeness to the input user_id
        For each user - finds articles the user hasn't seen before and provides them as recs
        Does this until m recommendations are found

        Notes:
        * Choose the users that have the most total article interactions
        before choosing those with fewer article interactions.

        * Choose articles with the articles with the most total interactions
        before choosing those with fewer total interactions.

        '''

        recs = []

        neighbors_df = self.get_top_sorted_users(user_id)
        user_articles_id, user_articles_names = self.get_user_articles(user_id)

        for neighbor in neighbors_df.index:
            neighbor_articles_id, neighbor_articles_names = self.get_user_articles(neighbor)
            sorted_neighbor_article_ids = Utils.get_top_articles_df(neighbor_articles_id, self.interactions_df)
            sorted_neighbor_article_ids = sorted_neighbor_article_ids.index.values
            article_not_read = np.setdiff1d(sorted_neighbor_article_ids, user_articles_id, assume_unique=True)
            article_not_read = [str(i) for i in article_not_read]
            recs = np.unique(np.concatenate([article_not_read, recs], axis=0))

            if len(recs) >= num_recommendations:
                break

        if len(recs) >= num_recommendations:
            recs = recs[:num_recommendations]

        recommended_articles = Utils.get_article_names(recs, self.interactions_df, 'title')

        return recommended_articles
Esempio n. 5
0
    def make_content_article_recommendations(self, _id, num_recommendations=10):
        '''
        INPUT:
        _id, the id of the article we want similar articles for
        self.content_similarity_matrix, the similarity matrix of the articles, by default cosine matrix computed separately
        self.interactions_df, the dataframe with the interactions of users with articles
        self.article_content_df - the df containing details about the articles
        num_recommendations, the number of recommendations expected as an output, by default 10

        OUTPUT:
        recommended_articles, a list of similar articles, given by name
        '''

        recommended_articles = self.find_similar_articles(_id)
        recommended_articles = recommended_articles[:num_recommendations]
        recommended_articles = Utils.get_article_names(recommended_articles, self.article_content_df, 'doc_full_name')

        return recommended_articles