def main(args): mod_name = args.get('-m') input = args.get('--splits') output = args.get('-o') n_recs = int(args.get('-n')) model = args.get('ALGO') _log.info(f'importing from module {mod_name}') algorithms = importlib.import_module(mod_name) algo = getattr(algorithms, model) algo = Recommender.adapt(algo) path = Path(input) dest = Path(output) dest.mkdir(exist_ok=True, parents=True) ds_def = getattr(datasets, path.name, None) for file in path.glob("test-*"): test = pd.read_csv(file, sep=',') suffix = file.name[5:] train_file = path / f'train-{suffix}' timer = util.Stopwatch() if 'index' in test.columns: _log.info('setting test index') test = test.set_index('index') else: _log.warn('no index column found in %s', file.name) if train_file.exists(): _log.info('[%s] loading training data from %s', timer, train_file) train = pd.read_csv(path / f'train-{suffix}', sep=',') elif ds_def is not None: _log.info('[%s] extracting training data from data set %s', timer, path.name) train = datasets.ds_diff(ds_def.ratings, test) train.reset_index(drop=True, inplace=True) else: _log.error('could not find training data for %s', file.name) continue _log.info('[%s] Fitting the model', timer) # We train isolated to manage resource use model = batch.train_isolated(algo, train) try: _log.info('[%s] generating recommendations for unique users', timer) users = test.user.unique() recs = batch.recommend(model, users, n_recs) _log.info('[%s] writing recommendations to %s', timer, dest) recs.to_csv(dest / f'recs-{suffix}', index=False) if isinstance(algo, Predictor) and not args['--no-predict']: _log.info('[%s] generating predictions for user-item', timer) preds = batch.predict(model, test) preds.to_csv(dest / f'pred-{suffix}', index=False) finally: model.close()
def evaluate(self, algo, train, test, **kwargs): _log.info('running training') if self.isolate: algo = batch.train_isolated(algo, train) else: algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, **kwargs) return recs
def test_batch_predict_preshared(): from lenskit.algorithms import basic import lenskit.crossfold as xf algo = basic.Bias() splits = xf.sample_users(lktu.ml_test.ratings, 1, 100, xf.SampleN(5)) train, test = next(splits) ares = lkb.train_isolated(algo, train) preds = lkb.predict(ares, test) assert len(preds) == len(test) assert not any(preds['prediction'].isna())
def test_train_isolate(): algo = Bias() algo = Recommender.adapt(algo) saved = train_isolated(algo, ml_test.ratings) try: trained = saved.get() assert isinstance(trained, TopN) recs = trained.recommend(10, 10) assert len(recs) == 10 del recs, trained finally: saved.close()
def test_train_isolate_file(tmp_path): fn = tmp_path / 'saved.bpk' algo = Bias() algo = Recommender.adapt(algo) saved = train_isolated(algo, ml_test.ratings, file=fn) try: assert saved.path == fn trained = saved.get() assert isinstance(trained, TopN) recs = trained.recommend(10, 10) assert len(recs) == 10 del recs, trained finally: saved.close()
def test_als_isolate(ml20m, rng): users = rng.choice(ml20m['user'].unique(), 5000, replace=False) algo = BiasedMF(20, iterations=10) algo = Recommender.adapt(algo) _log.info('training %s', algo) ares = batch.train_isolated(algo, ml20m) try: _log.info('recommending with %s', algo) recs = batch.recommend(ares, users, 10) assert recs['user'].nunique() == 5000 _log.info('predicting with %s', algo) pairs = ml20m.sample(1000) preds = batch.predict(ares, pairs) assert len(preds) == len(pairs) finally: ares.close()