Example #1
0
def test_svd_clone():
    algo = svd.BiasedSVD(5, damping=10)

    a2 = clone(algo)
    assert a2.factorization.n_components == algo.factorization.n_components
    assert a2.bias.user_damping == algo.bias.user_damping
    assert a2.bias.item_damping == algo.bias.item_damping
 def eval(aname, algo, train, test, all_preds):
     fittable = util.clone(algo)
     fittable = Recommender.adapt(fittable)
     fittable.fit(train)
     # predict ratings
     preds = batch.predict(fittable, test)
     preds['Algorithm'] = aname
     all_preds.append(preds)
def eval(algo, train, test):
    fittable = util.clone(algo)
    algo.fit(train)
    users = test.user.unique()
    preds = algo.predict(test)

    rmse = predict.rmse(preds, test['rating'])
    return rmse
Example #4
0
 def eval(train, test):
     _log.info('running training')
     train['rating'] = train.rating.astype(np.float_)
     algo = util.clone(algo_t)
     algo.fit(train)
     users = test.user.unique()
     _log.info('testing %d users', len(users))
     recs = batch.recommend(algo, users, 100)
     return recs
Example #5
0
def do_recommend(algo_wrapper, train, test):
    fittable = util.clone(algo_wrapper.algo)
    fittable = Recommender.adapt(fittable)
    fittable.fit(train)
    users = test.user.unique()
    # now we run the recommender
    recs = batch.recommend(fittable, users, N)
    # add the algorithm name for analyzability
    recs['Algorithm'] = algo_wrapper.name
    return recs
Example #6
0
def batch_eval(aname, algo, train, test):
    fittable = util.clone(algo)
    fittable = Recommender.adapt(fittable)
    fittable.fit(train)
    users = test.user.unique()
    # Now we run the recommender
    recs = batch.recommend(fittable, users, 10)
    # Add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs
Example #7
0
def test_fallback_clone():
    algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()])
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    clone = lku.clone(algo)
    assert clone is not algo
    for a1, a2 in zip(algo.algorithms, clone.algorithms):
        assert a1 is not a2
        assert type(a2) == type(a1)
 def eval(self, aname, algo):
     """
     Fit the model to the input data and create predictions.
     """
     fittable = util.clone(algo)
     fittable = Recommender.adapt(fittable)
     fittable.fit(self.train)
     users = self.test.user.unique()
     recs = batch.recommend(fittable, users, self.num_recs)
     recs['Algorithm'] = aname
     return recs
Example #9
0
def test_bias_clone():
    algo = bl.Bias()
    algo.fit(simple_df)

    params = algo.get_params()
    assert sorted(params.keys()) == ['damping', 'items', 'users']

    a2 = lku.clone(algo)
    assert a2 is not algo
    assert getattr(a2, 'mean_', None) is None
    assert getattr(a2, 'item_offsets_', None) is None
    assert getattr(a2, 'user_offsets_', None) is None
Example #10
0
def user_eval(aname, algo, train, userId):
    fittable = util.clone(algo)
    fittable = Recommender.adapt(fittable)
    fittable.fit(train)
    #user_ratings = load_user_reviews_from_table(userId)

    # Now we run the recommender
    recs = fittable.recommend(userId, 10)
    #recs = fittable.recommend(userId, 10, ratings=user_ratings)
    # Add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs
Example #11
0
 def eval(aname, algo, train, test):
     print("test")
     fittable = util.clone(algo)
     fittable = Recommender.adapt(fittable)
     fittable.fit(train)
     users = test.user.unique()
     # now we run the recommender
     recs = batch.recommend(fittable, users, 100)
     # add the algorithm name for analyzability
     recs['Algorithm'] = aname
     print("recs")
     print(recs.head())
     return recs
Example #12
0
    def objective_fn(params: Dict[str, Any]):
        algo = als.BiasedMF(
            features=params["features"],
            iterations=params["iteration"],
            reg=0.1,
            damping=5,
        )

        model = util.clone(algo)
        model = Recommender.adapt(model)
        model.fit(train_df)

        recs = batch.recommend(model, test_users, recsize)

        rla = topn.RecListAnalysis()
        rla.add_metric(topn.ndcg)

        results = rla.compute(recs, test_df)

        target_metric = -results.ndcg.mean()

        return {"loss": target_metric, "status": STATUS_OK}
Example #13
0
def eval(aname, algo, train, test):
    fittable = util.clone(algo)
    fittable = Recommender.adapt(fittable)
    results = fittable.fit(train)

    return 
Example #14
0
def test_uu_imp_clone():
    algo = knn.UserUser(30, feedback='implicit')
    a2 = clone(algo)

    assert a2.get_params() == algo.get_params()
    assert a2.__dict__ == algo.__dict__
Example #15
0
dest.mkdir(exist_ok=True, parents=True)

for file in path.glob("test-*"):
    test = pd.read_csv(file, sep=',')
    suffix = file.name[5:]

    try:
        train = pd.read_csv(path / f'train-{suffix}', sep=',')
    except FileNotFoundError:
        _log.error(f'train-{suffix} does not exists')
        continue

    _log.info('Fitting the model')

    users = test.user.unique()

    fittable = util.clone(algo)
    fittable = Recommender.adapt(fittable)
    fittable.fit(train)

    _log.info(f'generating recommendations for unique users')
    recs = batch.recommend(fittable, users, n_recs)
    _log.info(f'writing recommendations to {dest}')
    suffix = model + suffix
    recs.to_csv(dest / f'recs-{suffix}', index=False)

    if isinstance(fittable, Predictor):
        _log.info(f'generating predictions for user-item')
        preds = batch.predict(fittable, test)
        preds.to_csv(dest / f'pred-{suffix}', index=False)
Example #16
0
def test_ii_imp_clone():
    algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit')
    a2 = clone(algo)

    assert a2.get_params() == algo.get_params()
    assert a2.__dict__ == algo.__dict__
Example #17
0
def my_clone(obj):
    if hasattr(obj, 'clone'):
        return obj.clone()
    else:
        return util.clone(obj)
Example #18
0
# read in the movielens 100k ratings with pandas
# https://grouplens.org/datasets/movielens/100k/
ratings = pd.read_csv('ml-100k/u.data', sep='\t',
        names=['user', 'item', 'rating', 'timestamp'])

# define the algorithm we will use
# In this case we use an alternating least square
# implementation of matrix factorization
# We train 6 features
# https://lkpy.lenskit.org/en/stable/mf.html#module-lenskit.algorithms.als
algoAls = als.BiasedMF(6)

# Clone the algoritm as otherwise some
# algorithms can behave strange after they
# fitted multiple times
fittableALS = util.clone(algoAls)

# split the data in a test and a training set
# for each user leave one row out for test purpose
data = ratings
nb_partitions = 1
splits = xf.partition_users(data, nb_partitions, xf.SampleN(1))
for (trainSet, testSet) in splits:
    train = trainSet
    test = testSet

# Build a model
modelAls = fittableALS.fit(train)

# Inspect the user-feature matrix (numpy array)
print(modelAls.user_features_[0:10])
Example #19
0
  .toPandas()

# COMMAND ----------

user_pref = spark.read.parquet("/tmp/ml-20m/user_preference.parquet")

# COMMAND ----------

algo = als.BiasedMF(
    features=382,
    iterations=1,
    reg=0.1,
    damping=5,
)

model = util.clone(algo)
model = Recommender.adapt(model)
model.fit(train_df)

# COMMAND ----------

# have a subset of test users with equal distribution between longtail and shorthead preference
df1 = test_df.merge(user_pref.toPandas(),
                    "left",
                    left_on="user",
                    right_on="userId")
df2 = df1.query("longtail_pref >= 0.5").sample(n=250, random_state=123)
df3 = df1.query("longtail_pref < 0.5").sample(n=250, random_state=123)

test_users = pd.concat([df2, df3]).user.unique()
Example #20
0
    def eval(aname, algo, train, test, n):
        fittable = util.clone(algo)
        fittable = Recommender.adapt(fittable)
        fittable.fit(train)

        # predict ratings
        ratings_est = fittable.predict(test[['user', 'item']])
        print(len(ratings_est))
        print(len(test['rating']))
        # now we run the recommender
        users = test.user.unique()
        recs = batch.recommend(fittable, users, n)
        # add the algorithm name for analyzability
        recs['Algorithm'] = aname

        y_true = []
        for i in range(len(recs)):
            row = recs.iloc[i]
            user_id = row['user']
            item_id = row['item']
            boolen_ls = (test['user'] == user_id)
            chosen_rows = [i for i, x in enumerate(boolen_ls) if x]
            focs_test = test.iloc[chosen_rows]
            focs_test = focs_test[focs_test['rating'] >= 4]
            if (item_id in focs_test['item']):
                y_true.append('1')
            else:
                y_true.append('0')

        def coverage(preds, items, num_items):
            rec_item = []
            for i in range(len(preds)):
                # for beer
                if preds[i] >= 4:
                    # for jester
                    # if preds[i] > 0:
                    rec_item.append(items[i])
            return len(set(rec_item)) / num_items

        def hit_rate(preds, labels, users, topk=10):
            user_pred_dict = {}
            hit_rates = []
            for i in range(len(preds)):
                if users[i] not in user_pred_dict:
                    user_pred_dict[users[i]] = []
                    user_pred_dict[users[i]].append((preds[i], labels[i]))
                for user in user_pred_dict:
                    user_res = sorted(user_pred_dict[user],
                                      key=lambda x: x[0])[-topk:]
                    hit_rates.append(
                        np.sum([int(x[1]) > 0 for x in user_res]) / topk)
            return np.mean(hit_rates)

        all_df = pd.concat([train, test])

        item_count = len(all_df['item'].unique())

        auc_score = roc_auc_score(y_true, recs['score'])
        cov = coverage(recs['score'], recs['item'], item_count)
        hit = hit_rate(recs['score'], recs['item'], recs['user'])

        return score, cov, hit