コード例 #1
0
 def eval(train, test):
     _log.info('running training')
     algo.fit(train)
     _log.info('testing %d users', test.user.nunique())
     cand_fun = topn.UnratedCandidates(train)
     recs = batch.recommend(algo,
                            test.user.unique(),
                            100,
                            cand_fun,
                            test,
                            nprocs=ncpus)
     return recs
コード例 #2
0
 def eval(aname, algo, train, test):
     print("test")
     fittable = util.clone(algo)
     fittable = Recommender.adapt(fittable)
     fittable.fit(train)
     users = test.user.unique()
     # now we run the recommender
     recs = batch.recommend(fittable, users, 100)
     # add the algorithm name for analyzability
     recs['Algorithm'] = aname
     print("recs")
     print(recs.head())
     return recs
コード例 #3
0
def demo_recs():
    """
    A demo set of train, test, and recommendation data.
    """
    train, test = simple_test_pair(ml_test.ratings, f_rates=0.5)

    users = test['user'].unique()
    algo = PopScore()
    algo = PlackettLuce(algo, rng_spec='user')
    algo.fit(train)

    recs = recommend(algo, users, 500)
    return train, test, recs
コード例 #4
0
 def eval(train, test):
     _log.info('running training')
     algo.fit(train)
     _log.info('testing %d users', test.user.nunique())
     cand_fun = topn.UnratedCandidates(train)
     recs = batch.recommend(algo,
                            test.user.unique(),
                            100,
                            cand_fun,
                            nprocs=ncpus)
     # combine with test ratings for relevance data
     res = pd.merge(recs, test, how='left', on=('user', 'item'))
     # fill in missing 0s
     res.loc[res.rating.isna(), 'rating'] = 0
     return res
コード例 #5
0
ファイル: optimization.py プロジェクト: hsm207/fair-recsys
    def objective_fn(params: Dict[str, Any]):
        algo = als.BiasedMF(
            features=params["features"],
            iterations=params["iteration"],
            reg=0.1,
            damping=5,
        )

        model = util.clone(algo)
        model = Recommender.adapt(model)
        model.fit(train_df)

        recs = batch.recommend(model, test_users, recsize)

        rla = topn.RecListAnalysis()
        rla.add_metric(topn.ndcg)

        results = rla.compute(recs, test_df)

        target_metric = -results.ndcg.mean()

        return {"loss": target_metric, "status": STATUS_OK}
コード例 #6
0
ファイル: test_knn_user_user.py プロジェクト: rburke2233/lkpy
def test_uu_implicit_batch_accuracy():
    from lenskit import batch, topn
    import lenskit.crossfold as xf
    import lenskit.metrics.topn as lm

    ratings = lktu.ml100k.load_ratings()

    algo = knn.UserUser(30, center=False, aggregate='sum')

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    rec_lists = []
    for train, test in folds:
        _log.info('running training')
        algo.fit(train.loc[:, ['user', 'item']])
        cands = topn.UnratedCandidates(train)
        _log.info('testing %d users', test.user.nunique())
        recs = batch.recommend(algo, test.user.unique(), 100, cands, test)
        rec_lists.append(recs)
    recs = pd.concat(rec_lists)

    user_dcg = recs.groupby('user').rating.apply(lm.dcg)
    dcg = user_dcg.mean()
    assert dcg >= 0.1
コード例 #7
0
ファイル: ALS.py プロジェクト: MartijnMillecamp/WebRec
# See that the users are ranked differently in the matrix
print(modelAls.user_index_[0:10])
first = modelAls.user_index_[0]
print(type(first))
firstUser = np.array([first], np.int64)

# Get recommendation for a user
# As als.BiasedMF is not implementing Recommend
# We need to first adapt the model
# https://lkpy.lenskit.org/en/stable/interfaces.html#recommendation
# rec = Recommender.adapt(modelAls)
# recs = rec.recommend(first, 10) #Gives error


# Get 10 recommendations for a user (pandas dataframe)
recs = batch.recommend(modelAls, firstUser,
                       10, topn.UnratedCandidates(train), test)
print(recs)

# Get the first recommended item
firstRec = recs.iloc[0, 0]
firstRecScore = recs.iloc[0, 1]
print(firstRec)

# Get the explanation of the recommendation
# Get the index of the items
items = modelAls.item_index_
# Find the index of the first item
indexFirstRec = items.get_loc(firstRec)
# Get the feature values of the user
userProfile = modelAls.user_features_[0]
itemProfile = modelAls.item_features_[indexFirstRec]
コード例 #8
0
    def eval(aname, algo, train, test, n):
        fittable = util.clone(algo)
        fittable = Recommender.adapt(fittable)
        fittable.fit(train)

        # predict ratings
        ratings_est = fittable.predict(test[['user', 'item']])
        print(len(ratings_est))
        print(len(test['rating']))
        # now we run the recommender
        users = test.user.unique()
        recs = batch.recommend(fittable, users, n)
        # add the algorithm name for analyzability
        recs['Algorithm'] = aname

        y_true = []
        for i in range(len(recs)):
            row = recs.iloc[i]
            user_id = row['user']
            item_id = row['item']
            boolen_ls = (test['user'] == user_id)
            chosen_rows = [i for i, x in enumerate(boolen_ls) if x]
            focs_test = test.iloc[chosen_rows]
            focs_test = focs_test[focs_test['rating'] >= 4]
            if (item_id in focs_test['item']):
                y_true.append('1')
            else:
                y_true.append('0')

        def coverage(preds, items, num_items):
            rec_item = []
            for i in range(len(preds)):
                # for beer
                if preds[i] >= 4:
                    # for jester
                    # if preds[i] > 0:
                    rec_item.append(items[i])
            return len(set(rec_item)) / num_items

        def hit_rate(preds, labels, users, topk=10):
            user_pred_dict = {}
            hit_rates = []
            for i in range(len(preds)):
                if users[i] not in user_pred_dict:
                    user_pred_dict[users[i]] = []
                    user_pred_dict[users[i]].append((preds[i], labels[i]))
                for user in user_pred_dict:
                    user_res = sorted(user_pred_dict[user],
                                      key=lambda x: x[0])[-topk:]
                    hit_rates.append(
                        np.sum([int(x[1]) > 0 for x in user_res]) / topk)
            return np.mean(hit_rates)

        all_df = pd.concat([train, test])

        item_count = len(all_df['item'].unique())

        auc_score = roc_auc_score(y_true, recs['score'])
        cov = coverage(recs['score'], recs['item'], item_count)
        hit = hit_rate(recs['score'], recs['item'], recs['user'])

        return score, cov, hit
コード例 #9
0
 def eval(train, test):
     _log.info('running training')
     algo.fit(train)
     _log.info('testing %d users', test.user.nunique())
     recs = batch.recommend(algo, test.user.unique(), 100, nprocs=ncpus)
     return recs
コード例 #10
0
model = Recommender.adapt(model)
model.fit(train_df)

# COMMAND ----------

# have a subset of test users with equal distribution between longtail and shorthead preference
df1 = test_df.merge(user_pref.toPandas(),
                    "left",
                    left_on="user",
                    right_on="userId")
df2 = df1.query("longtail_pref >= 0.5").sample(n=250, random_state=123)
df3 = df1.query("longtail_pref < 0.5").sample(n=250, random_state=123)

test_users = pd.concat([df2, df3]).user.unique()

recs = batch.recommend(model, test_users, 100)

rla = topn.RecListAnalysis()
rla.add_metric(topn.ndcg)

results = rla.compute(recs, test_df)

print(f"NDCG: {results.ndcg.mean()}")

# COMMAND ----------

# MAGIC %md
# MAGIC Save the results of `recs`:

# COMMAND ----------