def do_prepare(opts): name = opts['-d'] ml = MovieLens(f'data/{name}') train, test = next(sample_users(ml.ratings, 1, 10000, SampleN(5))) test.to_parquet(f'data/{name}-test.parquet', index=False) _log.info('getting popular recs') pop = Popular() pop.fit(train) pop_recs = recommend(pop, test['user'].unique(), 100) _log.info('getting ALS recs') als = ImplicitMF(20, iterations=10) als = Recommender.adapt(als) als.fit(train.drop(columns=['rating'])) als_recs = recommend(als, test['user'].unique(), 100) _log.info('merging recs') recs = pd.concat({ 'Popular': pop_recs, 'ALS': als_recs }, names=['Algorithm']) recs.reset_index('Algorithm', inplace=True) recs.to_parquet(f'data/{name}-recs.parquet', index=False)
def test_pop_recommend(ml20m, rng, n_jobs): users = rng.choice(ml20m['user'].unique(), 10000, replace=False) algo = Popular() _log.info('training %s', algo) algo.fit(ml20m) _log.info('recommending with %s', algo) recs = batch.recommend(algo, users, 10, n_jobs=n_jobs) assert recs['user'].nunique() == 10000
def test_store_save(store_cls): algo = Popular() algo.fit(lktu.ml_test.ratings) with store_cls() as store: k = store.put_model(algo) a2 = store.get_model(k) assert a2 is not algo assert a2.item_pop_ is not algo.item_pop_ assert all(a2.item_pop_ == algo.item_pop_) del a2
def test_store_client_pickle(store_cls): algo = Popular() algo.fit(lktu.ml_test.ratings) with store_cls() as store: k = store.put_model(algo) client = store.client() client = pickle.loads(pickle.dumps(client)) k = pickle.loads(pickle.dumps(k)) a2 = client.get_model(k) assert a2 is not algo assert a2.item_pop_ is not algo.item_pop_ assert all(a2.item_pop_ == algo.item_pop_) del a2
def run(self, strategy_context: RecommenderAlgorithmStrategyContext ) -> np.ndarray: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) partition = list( partition_users(data=data_set, partitions=1, method=crossfold.SampleFrac(0.2)))[0] test, train = partition.test, partition.train number_of_recommendations = strategy_context.number_of_recommendations algorithm = Popular() trained_algorithm = algorithm.fit(train) recommendations = lenskit.batch.recommend(trained_algorithm, test['user'].unique(), number_of_recommendations) return recommendations.groupby('user')['item'].apply( lambda x: x).to_numpy().reshape((-1, number_of_recommendations))