def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) cand_fun = topn.UnratedCandidates(train) recs = batch.recommend(algo, test.user.unique(), 100, cand_fun, test, nprocs=ncpus) return recs
def eval(aname, algo, train, test): print("test") fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) users = test.user.unique() # now we run the recommender recs = batch.recommend(fittable, users, 100) # add the algorithm name for analyzability recs['Algorithm'] = aname print("recs") print(recs.head()) return recs
def demo_recs(): """ A demo set of train, test, and recommendation data. """ train, test = simple_test_pair(ml_test.ratings, f_rates=0.5) users = test['user'].unique() algo = PopScore() algo = PlackettLuce(algo, rng_spec='user') algo.fit(train) recs = recommend(algo, users, 500) return train, test, recs
def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) cand_fun = topn.UnratedCandidates(train) recs = batch.recommend(algo, test.user.unique(), 100, cand_fun, nprocs=ncpus) # combine with test ratings for relevance data res = pd.merge(recs, test, how='left', on=('user', 'item')) # fill in missing 0s res.loc[res.rating.isna(), 'rating'] = 0 return res
def objective_fn(params: Dict[str, Any]): algo = als.BiasedMF( features=params["features"], iterations=params["iteration"], reg=0.1, damping=5, ) model = util.clone(algo) model = Recommender.adapt(model) model.fit(train_df) recs = batch.recommend(model, test_users, recsize) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test_df) target_metric = -results.ndcg.mean() return {"loss": target_metric, "status": STATUS_OK}
def test_uu_implicit_batch_accuracy(): from lenskit import batch, topn import lenskit.crossfold as xf import lenskit.metrics.topn as lm ratings = lktu.ml100k.load_ratings() algo = knn.UserUser(30, center=False, aggregate='sum') folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) rec_lists = [] for train, test in folds: _log.info('running training') algo.fit(train.loc[:, ['user', 'item']]) cands = topn.UnratedCandidates(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, cands, test) rec_lists.append(recs) recs = pd.concat(rec_lists) user_dcg = recs.groupby('user').rating.apply(lm.dcg) dcg = user_dcg.mean() assert dcg >= 0.1
# See that the users are ranked differently in the matrix print(modelAls.user_index_[0:10]) first = modelAls.user_index_[0] print(type(first)) firstUser = np.array([first], np.int64) # Get recommendation for a user # As als.BiasedMF is not implementing Recommend # We need to first adapt the model # https://lkpy.lenskit.org/en/stable/interfaces.html#recommendation # rec = Recommender.adapt(modelAls) # recs = rec.recommend(first, 10) #Gives error # Get 10 recommendations for a user (pandas dataframe) recs = batch.recommend(modelAls, firstUser, 10, topn.UnratedCandidates(train), test) print(recs) # Get the first recommended item firstRec = recs.iloc[0, 0] firstRecScore = recs.iloc[0, 1] print(firstRec) # Get the explanation of the recommendation # Get the index of the items items = modelAls.item_index_ # Find the index of the first item indexFirstRec = items.get_loc(firstRec) # Get the feature values of the user userProfile = modelAls.user_features_[0] itemProfile = modelAls.item_features_[indexFirstRec]
def eval(aname, algo, train, test, n): fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) # predict ratings ratings_est = fittable.predict(test[['user', 'item']]) print(len(ratings_est)) print(len(test['rating'])) # now we run the recommender users = test.user.unique() recs = batch.recommend(fittable, users, n) # add the algorithm name for analyzability recs['Algorithm'] = aname y_true = [] for i in range(len(recs)): row = recs.iloc[i] user_id = row['user'] item_id = row['item'] boolen_ls = (test['user'] == user_id) chosen_rows = [i for i, x in enumerate(boolen_ls) if x] focs_test = test.iloc[chosen_rows] focs_test = focs_test[focs_test['rating'] >= 4] if (item_id in focs_test['item']): y_true.append('1') else: y_true.append('0') def coverage(preds, items, num_items): rec_item = [] for i in range(len(preds)): # for beer if preds[i] >= 4: # for jester # if preds[i] > 0: rec_item.append(items[i]) return len(set(rec_item)) / num_items def hit_rate(preds, labels, users, topk=10): user_pred_dict = {} hit_rates = [] for i in range(len(preds)): if users[i] not in user_pred_dict: user_pred_dict[users[i]] = [] user_pred_dict[users[i]].append((preds[i], labels[i])) for user in user_pred_dict: user_res = sorted(user_pred_dict[user], key=lambda x: x[0])[-topk:] hit_rates.append( np.sum([int(x[1]) > 0 for x in user_res]) / topk) return np.mean(hit_rates) all_df = pd.concat([train, test]) item_count = len(all_df['item'].unique()) auc_score = roc_auc_score(y_true, recs['score']) cov = coverage(recs['score'], recs['item'], item_count) hit = hit_rate(recs['score'], recs['item'], recs['user']) return score, cov, hit
def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, nprocs=ncpus) return recs
model = Recommender.adapt(model) model.fit(train_df) # COMMAND ---------- # have a subset of test users with equal distribution between longtail and shorthead preference df1 = test_df.merge(user_pref.toPandas(), "left", left_on="user", right_on="userId") df2 = df1.query("longtail_pref >= 0.5").sample(n=250, random_state=123) df3 = df1.query("longtail_pref < 0.5").sample(n=250, random_state=123) test_users = pd.concat([df2, df3]).user.unique() recs = batch.recommend(model, test_users, 100) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test_df) print(f"NDCG: {results.ndcg.mean()}") # COMMAND ---------- # MAGIC %md # MAGIC Save the results of `recs`: # COMMAND ----------