Ejemplo n.º 1
0
    def create_test_and_train_user_item(self):
        '''
        INPUT:
        df_train - training dataframe
        df_test - test dataframe

        OUTPUT:
        user_item_train - a user-item matrix of the training dataframe 
                          (unique users for each row and unique articles for each column)
        user_item_test - a user-item matrix of the testing dataframe 
                        (unique users for each row and unique articles for each column)

        '''
        num_interactions = len(self.interactions_df)
        len_train = int(70*num_interactions/100)  # 70% of the df for train
        len_test = num_interactions - len_train  # 30% of the df for test
        df_train = self.interactions_df.head(len_train)
        df_test = self.interactions_df.tail(len_test)

        # we reuse the create_user_item_matrix we defined earlier
        user_item_train = Utils.create_user_item_matrix(df_train)
        user_item_test = Utils.create_user_item_matrix(df_test)

        return (user_item_train, user_item_test)
Ejemplo n.º 2
0
from classfile import RecommendationEngine
from utility import Utils

# import and cleandata sources
interactions_df = pd.read_csv('data/user-item-interactions.csv')
article_content_df = pd.read_csv('data/articles_community.csv')
del interactions_df['Unnamed: 0']
del article_content_df['Unnamed: 0']

email_encoded = Utils.email_mapper(interactions_df['email'])
del interactions_df['email']
interactions_df['user_id'] = email_encoded

# create a matrix of user-article interactions
user_item = Utils.create_user_item_matrix(interactions_df)

# create an instance of the Recommendation Engine that can be used for multiple situations
rec_engine = RecommendationEngine(interactions_df, article_content_df,
                                  user_item)

# test the code for a few situations (expected returned output of 10 article titles)
# recommendations for an article
_id_type = 'article'
_id = 10
recommended_articles = rec_engine.make_recommendations(_id, _id_type)
print('Test article')
print(
    'The following articles are recommended based on your query for {} id {}:'.
    format(_id_type, _id))
print(recommended_articles)