Esempio n. 1
0
    def test_b_1_lfm_hybrid(self):
        self._setup_obs_handler()

        from ml_recsys_tools.recommenders.lightfm_recommender import LightFMRecommender
        lfm_rec = LightFMRecommender(external_features=self.state.train_obs.get_item_features(), no_components=50)
        lfm_rec.fit(self.state.train_obs, epochs=20)
        self._test_recommender(lfm_rec)
Esempio n. 2
0
    def test_b_1_lfm_recommender(self):
        self._setup_obs_handler()

        from ml_recsys_tools.recommenders.lightfm_recommender import LightFMRecommender
        lfm_rec = LightFMRecommender()
        lfm_rec.fit(self.state.train_obs, epochs=20)
        self.assertEqual(lfm_rec.fit_params['epochs'], 20)
        self._test_recommender(lfm_rec)
        # self._test_predict_for_user(lfm_rec)
        self.state.lfm_rec = lfm_rec
Esempio n. 3
0
from ml_recsys_tools.datasets.prep_movielense_data import get_and_prep_data

rating_csv_path, users_csv_path, movies_csv_path = get_and_prep_data()

# read the interactions dataframe and create a data handler object and  split to train and test
import pandas as pd

ratings_df = pd.read_csv(rating_csv_path)
from ml_recsys_tools.data_handlers.interaction_handlers_base import ObservationsDF

obs = ObservationsDF(ratings_df, uid_col='userid', iid_col='itemid')
train_obs, test_obs = obs.split_train_test(ratio=0.2)

# train and test LightFM recommender
from ml_recsys_tools.recommenders.lightfm_recommender import LightFMRecommender

lfm_rec = LightFMRecommender()
lfm_rec.fit(train_obs, epochs=10)

# print summary evaluation report:
print(
    lfm_rec.eval_on_test_by_ranking(test_obs.df_obs, prefix='lfm ', n_rec=100))

# get all recommendations and print a sample (training interactions are filtered out by default)
recs = lfm_rec.get_recommendations(lfm_rec.all_users, n_rec=5)
print(recs.sample(5))

# get all similarities and print a sample
simils = lfm_rec.get_similar_items(lfm_rec.all_items, n_simil=5)
print(simils.sample(10))
import pandas as pd
ratings_df = pd.read_csv(rating_csv_path)
movies_df = pd.read_csv(movies_csv_path)

from ml_recsys_tools.data_handlers.interactions_with_features import ObsWithFeatures

obs = ObsWithFeatures(df_obs=ratings_df, df_items=movies_df,
                      uid_col='userid', iid_col='itemid', item_id_col='itemid')
train_obs, test_obs = obs.split_train_test(ratio=0.2)

# compare LightFM recommenders
from ml_recsys_tools.recommenders.lightfm_recommender import LightFMRecommender

# no features - just CF
cf_only = LightFMRecommender()
cf_only.fit(train_obs, epochs=20)
print(cf_only.eval_on_test_by_ranking(test_obs.df_obs, prefix='lfm ', n_rec=100))


# using movie genres and CF (hybrid mode) - slightly better
feature_columns = list(movies_df.columns.difference(['itemid']))
hybrid = LightFMRecommender(external_features=train_obs.get_item_features(bin_cols=feature_columns))
hybrid.fit(train_obs, epochs=20)
print(hybrid.eval_on_test_by_ranking(test_obs.df_obs, prefix='lfm hybrid ', n_rec=100))


# using only genres - much worse than both - but still better than chance
feature_columns = list(movies_df.columns.difference(['item_ind', 'itemid']))
only_feat = LightFMRecommender(external_features=train_obs.get_item_features(bin_cols=feature_columns),
                            external_features_params=dict(add_identity_mat=False))
only_feat.fit(train_obs, epochs=20)