def create_test_and_train_user_item(self): ''' INPUT: df_train - training dataframe df_test - test dataframe OUTPUT: user_item_train - a user-item matrix of the training dataframe (unique users for each row and unique articles for each column) user_item_test - a user-item matrix of the testing dataframe (unique users for each row and unique articles for each column) ''' num_interactions = len(self.interactions_df) len_train = int(70*num_interactions/100) # 70% of the df for train len_test = num_interactions - len_train # 30% of the df for test df_train = self.interactions_df.head(len_train) df_test = self.interactions_df.tail(len_test) # we reuse the create_user_item_matrix we defined earlier user_item_train = Utils.create_user_item_matrix(df_train) user_item_test = Utils.create_user_item_matrix(df_test) return (user_item_train, user_item_test)
from classfile import RecommendationEngine from utility import Utils # import and cleandata sources interactions_df = pd.read_csv('data/user-item-interactions.csv') article_content_df = pd.read_csv('data/articles_community.csv') del interactions_df['Unnamed: 0'] del article_content_df['Unnamed: 0'] email_encoded = Utils.email_mapper(interactions_df['email']) del interactions_df['email'] interactions_df['user_id'] = email_encoded # create a matrix of user-article interactions user_item = Utils.create_user_item_matrix(interactions_df) # create an instance of the Recommendation Engine that can be used for multiple situations rec_engine = RecommendationEngine(interactions_df, article_content_df, user_item) # test the code for a few situations (expected returned output of 10 article titles) # recommendations for an article _id_type = 'article' _id = 10 recommended_articles = rec_engine.make_recommendations(_id, _id_type) print('Test article') print( 'The following articles are recommended based on your query for {} id {}:'. format(_id_type, _id)) print(recommended_articles)