def test_svd_clone(): algo = svd.BiasedSVD(5, damping=10) a2 = clone(algo) assert a2.factorization.n_components == algo.factorization.n_components assert a2.bias.user_damping == algo.bias.user_damping assert a2.bias.item_damping == algo.bias.item_damping
def eval(aname, algo, train, test, all_preds): fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) # predict ratings preds = batch.predict(fittable, test) preds['Algorithm'] = aname all_preds.append(preds)
def eval(algo, train, test): fittable = util.clone(algo) algo.fit(train) users = test.user.unique() preds = algo.predict(test) rmse = predict.rmse(preds, test['rating']) return rmse
def eval(train, test): _log.info('running training') train['rating'] = train.rating.astype(np.float_) algo = util.clone(algo_t) algo.fit(train) users = test.user.unique() _log.info('testing %d users', len(users)) recs = batch.recommend(algo, users, 100) return recs
def do_recommend(algo_wrapper, train, test): fittable = util.clone(algo_wrapper.algo) fittable = Recommender.adapt(fittable) fittable.fit(train) users = test.user.unique() # now we run the recommender recs = batch.recommend(fittable, users, N) # add the algorithm name for analyzability recs['Algorithm'] = algo_wrapper.name return recs
def batch_eval(aname, algo, train, test): fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) users = test.user.unique() # Now we run the recommender recs = batch.recommend(fittable, users, 10) # Add the algorithm name for analyzability recs['Algorithm'] = aname return recs
def test_fallback_clone(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 clone = lku.clone(algo) assert clone is not algo for a1, a2 in zip(algo.algorithms, clone.algorithms): assert a1 is not a2 assert type(a2) == type(a1)
def eval(self, aname, algo): """ Fit the model to the input data and create predictions. """ fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(self.train) users = self.test.user.unique() recs = batch.recommend(fittable, users, self.num_recs) recs['Algorithm'] = aname return recs
def test_bias_clone(): algo = bl.Bias() algo.fit(simple_df) params = algo.get_params() assert sorted(params.keys()) == ['damping', 'items', 'users'] a2 = lku.clone(algo) assert a2 is not algo assert getattr(a2, 'mean_', None) is None assert getattr(a2, 'item_offsets_', None) is None assert getattr(a2, 'user_offsets_', None) is None
def user_eval(aname, algo, train, userId): fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) #user_ratings = load_user_reviews_from_table(userId) # Now we run the recommender recs = fittable.recommend(userId, 10) #recs = fittable.recommend(userId, 10, ratings=user_ratings) # Add the algorithm name for analyzability recs['Algorithm'] = aname return recs
def eval(aname, algo, train, test): print("test") fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) users = test.user.unique() # now we run the recommender recs = batch.recommend(fittable, users, 100) # add the algorithm name for analyzability recs['Algorithm'] = aname print("recs") print(recs.head()) return recs
def objective_fn(params: Dict[str, Any]): algo = als.BiasedMF( features=params["features"], iterations=params["iteration"], reg=0.1, damping=5, ) model = util.clone(algo) model = Recommender.adapt(model) model.fit(train_df) recs = batch.recommend(model, test_users, recsize) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test_df) target_metric = -results.ndcg.mean() return {"loss": target_metric, "status": STATUS_OK}
def eval(aname, algo, train, test): fittable = util.clone(algo) fittable = Recommender.adapt(fittable) results = fittable.fit(train) return
def test_uu_imp_clone(): algo = knn.UserUser(30, feedback='implicit') a2 = clone(algo) assert a2.get_params() == algo.get_params() assert a2.__dict__ == algo.__dict__
dest.mkdir(exist_ok=True, parents=True) for file in path.glob("test-*"): test = pd.read_csv(file, sep=',') suffix = file.name[5:] try: train = pd.read_csv(path / f'train-{suffix}', sep=',') except FileNotFoundError: _log.error(f'train-{suffix} does not exists') continue _log.info('Fitting the model') users = test.user.unique() fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) _log.info(f'generating recommendations for unique users') recs = batch.recommend(fittable, users, n_recs) _log.info(f'writing recommendations to {dest}') suffix = model + suffix recs.to_csv(dest / f'recs-{suffix}', index=False) if isinstance(fittable, Predictor): _log.info(f'generating predictions for user-item') preds = batch.predict(fittable, test) preds.to_csv(dest / f'pred-{suffix}', index=False)
def test_ii_imp_clone(): algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit') a2 = clone(algo) assert a2.get_params() == algo.get_params() assert a2.__dict__ == algo.__dict__
def my_clone(obj): if hasattr(obj, 'clone'): return obj.clone() else: return util.clone(obj)
# read in the movielens 100k ratings with pandas # https://grouplens.org/datasets/movielens/100k/ ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) # define the algorithm we will use # In this case we use an alternating least square # implementation of matrix factorization # We train 6 features # https://lkpy.lenskit.org/en/stable/mf.html#module-lenskit.algorithms.als algoAls = als.BiasedMF(6) # Clone the algoritm as otherwise some # algorithms can behave strange after they # fitted multiple times fittableALS = util.clone(algoAls) # split the data in a test and a training set # for each user leave one row out for test purpose data = ratings nb_partitions = 1 splits = xf.partition_users(data, nb_partitions, xf.SampleN(1)) for (trainSet, testSet) in splits: train = trainSet test = testSet # Build a model modelAls = fittableALS.fit(train) # Inspect the user-feature matrix (numpy array) print(modelAls.user_features_[0:10])
.toPandas() # COMMAND ---------- user_pref = spark.read.parquet("/tmp/ml-20m/user_preference.parquet") # COMMAND ---------- algo = als.BiasedMF( features=382, iterations=1, reg=0.1, damping=5, ) model = util.clone(algo) model = Recommender.adapt(model) model.fit(train_df) # COMMAND ---------- # have a subset of test users with equal distribution between longtail and shorthead preference df1 = test_df.merge(user_pref.toPandas(), "left", left_on="user", right_on="userId") df2 = df1.query("longtail_pref >= 0.5").sample(n=250, random_state=123) df3 = df1.query("longtail_pref < 0.5").sample(n=250, random_state=123) test_users = pd.concat([df2, df3]).user.unique()
def eval(aname, algo, train, test, n): fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) # predict ratings ratings_est = fittable.predict(test[['user', 'item']]) print(len(ratings_est)) print(len(test['rating'])) # now we run the recommender users = test.user.unique() recs = batch.recommend(fittable, users, n) # add the algorithm name for analyzability recs['Algorithm'] = aname y_true = [] for i in range(len(recs)): row = recs.iloc[i] user_id = row['user'] item_id = row['item'] boolen_ls = (test['user'] == user_id) chosen_rows = [i for i, x in enumerate(boolen_ls) if x] focs_test = test.iloc[chosen_rows] focs_test = focs_test[focs_test['rating'] >= 4] if (item_id in focs_test['item']): y_true.append('1') else: y_true.append('0') def coverage(preds, items, num_items): rec_item = [] for i in range(len(preds)): # for beer if preds[i] >= 4: # for jester # if preds[i] > 0: rec_item.append(items[i]) return len(set(rec_item)) / num_items def hit_rate(preds, labels, users, topk=10): user_pred_dict = {} hit_rates = [] for i in range(len(preds)): if users[i] not in user_pred_dict: user_pred_dict[users[i]] = [] user_pred_dict[users[i]].append((preds[i], labels[i])) for user in user_pred_dict: user_res = sorted(user_pred_dict[user], key=lambda x: x[0])[-topk:] hit_rates.append( np.sum([int(x[1]) > 0 for x in user_res]) / topk) return np.mean(hit_rates) all_df = pd.concat([train, test]) item_count = len(all_df['item'].unique()) auc_score = roc_auc_score(y_true, recs['score']) cov = coverage(recs['score'], recs['item'], item_count) hit = hit_rate(recs['score'], recs['item'], recs['user']) return score, cov, hit