def test_sparse_to_df(test_specs, python_dataset): # initialize the splitter header = { "col_user": DEFAULT_USER_COL, "col_item": DEFAULT_ITEM_COL, "col_rating": DEFAULT_RATING_COL, } # instantiate the the affinity matrix am = AffinityMatrix(DF=python_dataset, **header) # generate the sparse matrix representation X, _, _ = am.gen_affinity_matrix() # use the inverse function to generate a pandas df from a sparse matrix ordered by userID DF = am.map_back_sparse(X, kind="ratings") # tests: check that the two dataframes have the same elements in the same positions. assert ( DF.userID.values.all() == python_dataset.sort_values(by=["userID"]).userID.values.all() ) assert ( DF.itemID.values.all() == python_dataset.sort_values(by=["userID"]).itemID.values.all() ) assert ( DF.rating.values.all() == python_dataset.sort_values(by=["userID"]).rating.values.all() )
def test_sparse_to_df(test_specs, python_dataset): # initialize the splitter header = { "col_user": DEFAULT_USER_COL, "col_item": DEFAULT_ITEM_COL, "col_rating": DEFAULT_RATING_COL, } # instantiate the the affinity matrix am = AffinityMatrix(DF=python_dataset, **header) # generate the sparse matrix representation X = am.gen_affinity_matrix() # use the inverse function to generate a pandas df from a sparse matrix ordered by userID DF = am.map_back_sparse(X, kind="ratings") # tests: check that the two dataframes have the same elements in the same positions. assert ( DF.userID.values.all() == python_dataset.sort_values(by=["userID"]).userID.values.all() ) assert ( DF.itemID.values.all() == python_dataset.sort_values(by=["userID"]).itemID.values.all() ) assert ( DF.rating.values.all() == python_dataset.sort_values(by=["userID"]).rating.values.all() )
def RBMtrain(): data = pd.read_csv("SnacksData100.csv") header = { "col_user": "******", "col_item": "Product_Id", "col_rating": "Ratings", } am = AffinityMatrix(DF=data, **header) X = am.gen_affinity_matrix() Xtr, Xtst = numpy_stratified_split(X) model = RBM(hidden_units=600, training_epoch=30, minibatch_size=60, keep_prob=0.9, with_metrics=True) model.fit(Xtr, Xtst) top_k, test_time = model.recommend_k_items(Xtst) top_k_df = am.map_back_sparse(top_k, kind='prediction') test_df = am.map_back_sparse(Xtst, kind='ratings') joblib.dump(top_k_df, 'testdata')
We will now fit the model and apply it to the dataset """) mlask(begin="\n") ### TODO HOW TO KEEP THE OUTPUT QUIET?????? train_time = model.fit(Xtr, Xtst) ### TODO HOW TO KEEP THE OUTPUT QUIET?????? top_k, test_time = model.recommend_k_items(Xtst) # Map the index back to original ids????? top_k_df = am.map_back_sparse(top_k, kind='prediction') test_df = am.map_back_sparse(Xtst, kind='ratings') top_k_df.head(10) def ranking_metrics(data_size, data_true, data_pred, time_train, time_test, K): eval_map = map_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K)