Exemple #1
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu')
    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd')
    # algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('training LU')
        lu_algo.fit(train)
        _log.info('training CD')
        cd_algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(lu_pred=lu_algo.predict(test), cd_pred=cd_algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred)
    _log.info('predictions:\n%s', preds.sort_values('abs_diff', ascending=False))
    _log.info('diff summary:\n%s', preds.abs_diff.describe())

    lu_mae = pm.mae(preds.lu_pred, preds.rating)
    assert lu_mae == approx(0.73, abs=0.025)
    cd_mae = pm.mae(preds.cd_pred, preds.rating)
    assert cd_mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.lu_pred, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.cd_pred, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
Exemple #2
0
def test_als_method_match():
    lu = als.BiasedMF(20,
                      iterations=15,
                      reg=(2, 0.001),
                      method='lu',
                      rng_spec=42)
    cd = als.BiasedMF(20,
                      iterations=20,
                      reg=(2, 0.001),
                      method='cd',
                      rng_spec=42)

    ratings = lktu.ml_test.ratings

    timer = Stopwatch()
    lu.fit(ratings)
    timer.stop()
    _log.info('fit with LU solver in %s', timer)

    timer = Stopwatch()
    cd.fit(ratings)
    timer.stop()
    _log.info('fit with CD solver in %s', timer)

    assert lu.global_bias_ == approx(ratings.rating.mean())
    assert cd.global_bias_ == approx(ratings.rating.mean())

    preds = []

    rng = util.rng(42, legacy=True)
    for u in rng.choice(np.unique(ratings.user), 15, replace=False):
        items = rng.choice(np.unique(ratings.item), 15, replace=False)
        lu_preds = lu.predict_for_user(u, items)
        cd_preds = cd.predict_for_user(u, items)
        diff = lu_preds - cd_preds
        adiff = np.abs(diff)
        _log.info(
            'user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f',
            u, np.linalg.norm(diff, 2), np.min(adiff), np.median(adiff),
            np.max(adiff), np.quantile(adiff, 0.9))

        preds.append(
            pd.DataFrame({
                'user': u,
                'item': items,
                'lu': lu_preds,
                'cd': cd_preds,
                'adiff': adiff
            }))

    preds = pd.concat(preds, ignore_index=True)
    _log.info('LU preds:\n%s', preds.lu.describe())
    _log.info('CD preds:\n%s', preds.cd.describe())
    _log.info('overall differences:\n%s', preds.adiff.describe())
    # there are differences. our check: the 90% are under a quarter star
    assert np.quantile(adiff, 0.9) <= 0.25
Exemple #3
0
def test_als_predict_for_new_users_with_new_ratings():
    n_users = 3
    n_items = 2
    new_u_id = -1
    ratings = lktu.ml_test.ratings

    np.random.seed(45)
    users = np.random.choice(ratings.user.unique(), n_users)
    items = np.random.choice(ratings.item.unique(), n_items)

    algo = als.BiasedMF(20, iterations=10, method="lu")
    algo.fit(ratings)
    _log.debug("Items: " + str(items))

    for u in users:
        _log.debug(f"user: {u}")
        preds = algo.predict_for_user(u, items)

        user_data = ratings[ratings.user == u]

        _log.debug("user_features from fit: " +
                   str(algo.user_features_[algo.user_index_.get_loc(u), :]))

        new_ratings = pd.Series(
            user_data.rating.to_numpy(),
            index=user_data.item)  # items as index and ratings as values
        new_preds = algo.predict_for_user(new_u_id, items, new_ratings)

        _log.debug("preds: " + str(preds.values))
        _log.debug("new_preds: " + str(new_preds.values))
        _log.debug("------------")
        assert new_preds.values == approx(preds.values, rel=9e-2)
Exemple #4
0
def test_alogrithms():
    # data = MovieLens('ml-latest-small')
    data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = [
        basic.Bias(damping=5),
        basic.Popular(),
        item_knn.ItemItem(20),
        user_knn.UserUser(20),
        als.BiasedMF(50),
        als.ImplicitMF(50),
        funksvd.FunkSVD(50)
    ]
    pairs = list(
        partition_users(ratings[['user', 'item', 'rating']], 5,
                        SampleFrac(0.2)))
    eval_algorithms(dataset=pairs, algorithms=algorithms)
    runs = display_runs()
    recs = display_recommendations()
    truth = pd.concat((p.test for p in pairs), ignore_index=True)
    ndcg_means = check_recommendations(runs, recs, truth)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
 def _create_non_social_recommender_algorithm(algo_name, aggregation):
     if algo_name == 'ii':
         algo = knn.ItemItem(NEIGHBORS)
     elif algo_name == 'als':
         algo = als.BiasedMF(NUM_FEATURES)
     return SocialRecommenderAlgorithmFactory._create_recommender_algorithm_with_fallback(
         algo, aggregation)
Exemple #6
0
def test_als_binpickle(tmp_path):
    "Test saving ALS with BinPickle"

    original = als.BiasedMF(20, iterations=5, method='lu')
    ratings = lktu.ml_test.ratings
    original.fit(ratings)

    assert original.global_bias_ == approx(ratings.rating.mean())

    file = tmp_path / 'als.bpk'
    binpickle.dump(original, file)

    with binpickle.BinPickleFile(file) as bpf:
        # the pickle data should be small
        _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length)
        pickle_dis(bpf._read_buffer(bpf.entries[-1]))
        assert bpf.entries[-1].dec_length < 1024

        algo = bpf.load()

        assert algo.global_bias_ == original.global_bias_
        assert np.all(algo.user_bias_ == original.user_bias_)
        assert np.all(algo.item_bias_ == original.item_bias_)
        assert np.all(algo.user_features_ == original.user_features_)
        assert np.all(algo.item_features_ == original.item_features_)
        assert np.all(algo.item_index_ == original.item_index_)
        assert np.all(algo.user_index_ == original.user_index_)
Exemple #7
0
def test_als_binpickle(tmp_path):
    "Test saving ALS with BinPickle"

    original = als.BiasedMF(20, iterations=5, method='lu')
    ratings = lktu.ml_test.ratings
    original.fit(ratings)

    assert original.bias.mean_ == approx(ratings.rating.mean())

    file = tmp_path / 'als.bpk'
    binpickle.dump(original, file)

    with binpickle.BinPickleFile(file) as bpf:
        # the pickle data should be small
        _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length)
        pickle_dis(bpf._read_buffer(bpf.entries[-1]))
        assert bpf.entries[-1].dec_length < 2048

        algo = bpf.load()

        assert algo.bias.mean_ == original.bias.mean_
        assert np.all(algo.bias.user_offsets_ == original.bias.user_offsets_)
        assert np.all(algo.bias.item_offsets_ == original.bias.item_offsets_)
        assert np.all(algo.user_features_ == original.user_features_)
        assert np.all(algo.item_features_ == original.item_features_)
        assert np.all(algo.item_index_ == original.item_index_)
        assert np.all(algo.user_index_ == original.user_index_)

        # make sure it still works
        preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
        assert len(preds) == 50
Exemple #8
0
def user_movie_recommend(ratings, optionList, userId):
    all_recs = []

    for option in optionList:
        if option == 1:
            basic_bias_model = basic.Bias()
            all_recs.append(
                user_eval('BasicBias', basic_bias_model, ratings, userId))
        if option == 2:
            knn_model = iknn.ItemItem(20)
            all_recs.append(user_eval('ItemItem', knn_model, ratings, userId))
        if option == 3:
            knn_u_model = uknn.UserUser(20)
            all_recs.append(user_eval('UserUser', knn_u_model, ratings,
                                      userId))
        if option == 4:
            als_b_model = als.BiasedMF(50)
            all_recs.append(
                user_eval('ALS-Biased', als_b_model, ratings, userId))
        if option == 5:
            als_i_model = als.ImplicitMF(50)
            all_recs.append(
                user_eval('ALS-Implicit', als_i_model, ratings, userId))
        if option == 6:
            funk_model = funksvd.FunkSVD(50)
            all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId))

    all_recs = pd.concat(all_recs, ignore_index=True)

    return all_recs
Exemple #9
0
class LegMedLensKit():
    def loadData():
        ratings = pd.read_csv('/Users/josse/Desktop/ratings.dat',
                              sep='::',
                              names=['user', 'item', 'rating', 'timestamp'])
        print(ratings.head())
        return (ratings)

    #print ("test")
    ratings = loadData()
    data_matrix = np.array(
        ratings.pivot(index='item', columns='user', values='rating'))
    print(data_matrix)
    data_matrix_rev = np.nan_to_num(data_matrix)
    print(data_matrix_rev)

    algo_ii = knn.ItemItem(20)
    algo_als = als.BiasedMF(50)

    def eval(aname, algo, train, test):
        print("test")
        fittable = util.clone(algo)
        fittable = Recommender.adapt(fittable)
        fittable.fit(train)
        users = test.user.unique()
        # now we run the recommender
        recs = batch.recommend(fittable, users, 100)
        # add the algorithm name for analyzability
        recs['Algorithm'] = aname
        print("recs")
        print(recs.head())
        return recs

    all_recs = []
    test_data = []

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          1, xf.SampleFrac(0.2)):
        test_data.append(test)
        #print(test.head(10))
        all_recs.append(eval('ItemItem', algo_ii, train, test))
        all_recs.append(eval('ALS', algo_als, train, test))

    print("test2")
    print(all_recs.head())
    all_recs = pd.concat(all_recs, ignore_index=True)
    print(all_recs.head())
    test_data = pd.concat(test_data, ignore_index=True)
    #print(test_data.head)

    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
    results.head()

    results.groupby('Algorithm').ndcg.mean()
    results.groupby('Algorithm').ndcg.mean().plot.bar()
Exemple #10
0
def test_als_predict_bad_user():
    algo = als.BiasedMF(20, iterations=10)
    algo.fit(simple_df)

    assert algo.global_bias_ == approx(simple_df.rating.mean())

    preds = algo.predict_for_user(50, [3])
    assert len(preds) == 1
    assert preds.index[0] == 3
    assert np.isnan(preds.loc[3])
Exemple #11
0
def test_als_predict_bad_item():
    algo = als.BiasedMF(20, iterations=10)
    algo.fit(simple_df)

    assert algo.bias.mean_ == approx(simple_df.rating.mean())

    preds = algo.predict_for_user(10, [4])
    assert len(preds) == 1
    assert preds.index[0] == 4
    assert np.isnan(preds.loc[4])
Exemple #12
0
def test_als_predict_basic(m):
    algo = als.BiasedMF(20, iterations=10, method=m)
    algo.fit(simple_df)

    assert algo.global_bias_ == approx(simple_df.rating.mean())

    preds = algo.predict_for_user(10, [3])
    assert len(preds) == 1
    assert preds.index[0] == 3
    assert preds.loc[3] >= -0.1
    assert preds.loc[3] <= 5.1
Exemple #13
0
def test_als_no_bias(m):
    algo = als.BiasedMF(20, iterations=10, bias=None, method=m)
    algo.fit(simple_df)

    assert algo.bias is None
    assert set(algo.user_index_) == set([10, 12, 13])
    assert set(algo.item_index_) == set([1, 2, 3])
    assert algo.user_features_.shape == (3, 20)
    assert algo.item_features_.shape == (3, 20)

    preds = algo.predict_for_user(10, [3])
    assert len(preds) == 1
Exemple #14
0
def test_als_save_load(tmp_path):
    tmp_path = lktu.norm_path(tmp_path)
    mod_file = tmp_path / 'als.npz'
    original = als.BiasedMF(20, iterations=5)
    ratings = lktu.ml_pandas.renamed.ratings
    original.fit(ratings)

    assert original.global_bias_ == approx(ratings.rating.mean())

    original.save(mod_file)
    assert mod_file.exists()

    algo = als.BiasedMF(20)
    algo.load(mod_file)
    assert algo.global_bias_ == original.global_bias_
    assert np.all(algo.user_bias_ == original.user_bias_)
    assert np.all(algo.item_bias_ == original.item_bias_)
    assert np.all(algo.user_features_ == original.user_features_)
    assert np.all(algo.item_features_ == original.item_features_)
    assert np.all(algo.item_index_ == original.item_index_)
    assert np.all(algo.user_index_ == original.user_index_)
Exemple #15
0
def test_als_basic_build(m):
    algo = als.BiasedMF(20, iterations=10, progress=util.no_progress, method=m)
    algo.fit(simple_df)

    assert algo.global_bias_ == approx(simple_df.rating.mean())
    assert set(algo.user_index_) == set([10, 12, 13])
    assert set(algo.item_index_) == set([1, 2, 3])
    assert algo.user_features_.shape == (3, 20)
    assert algo.item_features_.shape == (3, 20)

    assert algo.n_features == 20
    assert algo.n_users == 3
    assert algo.n_items == 3
Exemple #16
0
def test_als_predict_basic_for_new_ratings():
    algo = als.BiasedMF(20, iterations=10)
    algo.fit(simple_df)

    assert algo.bias.mean_ == approx(simple_df.rating.mean())

    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values

    preds = algo.predict_for_user(15, [3], new_ratings)

    assert len(preds) == 1
    assert preds.index[0] == 3
    assert preds.loc[3] >= -0.1
    assert preds.loc[3] <= 5.1
Exemple #17
0
def test_als_predict_basic_for_new_user_with_new_ratings():
    u = 10
    i = 3

    algo = als.BiasedMF(20, iterations=10)
    algo.fit(simple_df)

    preds = algo.predict_for_user(u, [i])

    new_u_id = -1
    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values

    new_preds = algo.predict_for_user(new_u_id, [i], new_ratings)

    assert preds.loc[i] == approx(new_preds.loc[i], rel=9e-2)
Exemple #18
0
def test_als_train_large():
    algo = als.BiasedMF(20, iterations=10)
    ratings = lktu.ml_test.ratings
    algo.fit(ratings)

    assert algo.global_bias_ == approx(ratings.rating.mean())
    assert algo.n_features == 20
    assert algo.n_items == ratings.item.nunique()
    assert algo.n_users == ratings.user.nunique()

    icounts = ratings.groupby('item').rating.count()
    isums = ratings.groupby('item').rating.sum()
    is2 = isums - icounts * ratings.rating.mean()
    imeans = is2 / (icounts + 5)
    ibias = pd.Series(algo.item_bias_, index=algo.item_index_)
    imeans, ibias = imeans.align(ibias)
    assert ibias.values == approx(imeans.values)
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
Exemple #20
0
 def get_algo_class(self, algo):
     if algo == 'popular':
         return basic.Popular()
     elif algo == 'bias':
         return basic.Bias(users=False)
     elif algo == 'topn':
         return basic.TopN(basic.Bias())
     elif algo == 'itemitem':
         return iknn.ItemItem(nnbrs=-1)
     elif algo == 'useruser':
         return uknn.UserUser(nnbrs=5)
     elif algo == 'biasedmf':
         return als.BiasedMF(50, iterations=10)
     elif algo == 'implicitmf':
         return als.ImplicitMF(20, iterations=10)
     elif algo == 'funksvd':
         return svd.FunkSVD(20, iterations=20)
Exemple #21
0
def test_als_save_load():
    original = als.BiasedMF(20, iterations=5, method='lu')
    ratings = lktu.ml_test.ratings
    original.fit(ratings)

    assert original.global_bias_ == approx(ratings.rating.mean())

    mod = pickle.dumps(original)
    _log.info('serialized to %d bytes', len(mod))

    algo = pickle.loads(mod)
    assert algo.global_bias_ == original.global_bias_
    assert np.all(algo.user_bias_ == original.user_bias_)
    assert np.all(algo.item_bias_ == original.item_bias_)
    assert np.all(algo.user_features_ == original.user_features_)
    assert np.all(algo.item_features_ == original.item_features_)
    assert np.all(algo.item_index_ == original.item_index_)
    assert np.all(algo.user_index_ == original.user_index_)
def test_alogrithms():
    data = MovieLens('ml-latest-small')
    #data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = {
        'Bias': basic.Bias(damping=5),
        'Popular': basic.Popular(),
        'ItemItem': item_knn.ItemItem(20),
        'UserUser': user_knn.UserUser(20),
        'BiasedMF': als.BiasedMF(50),
        'ImplicitMF': als.ImplicitMF(50),
        'FunkSVD': funksvd.FunkSVD(50)
    }
    all_recs, test_data = eval_algos(ratings, algorithms)
    ndcg_means = eval_ndcg(all_recs, test_data)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
Exemple #23
0
def test_als_save_load():
    original = als.BiasedMF(5, iterations=5, method='lu')
    ratings = lktu.ml_test.ratings
    original.fit(ratings)

    assert original.bias.mean_ == approx(ratings.rating.mean())

    mod = pickle.dumps(original)
    _log.info('serialized to %d bytes', len(mod))

    algo = pickle.loads(mod)
    assert algo.bias.mean_ == original.bias.mean_
    assert np.all(algo.bias.user_offsets_ == original.bias.user_offsets_)
    assert np.all(algo.bias.item_offsets_ == original.bias.item_offsets_)
    assert np.all(algo.user_features_ == original.user_features_)
    assert np.all(algo.item_features_ == original.item_features_)
    assert np.all(algo.item_index_ == original.item_index_)
    assert np.all(algo.user_index_ == original.user_index_)

    # make sure it still works
    preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
    assert len(preds) == 50
Exemple #24
0
    def objective_fn(params: Dict[str, Any]):
        algo = als.BiasedMF(
            features=params["features"],
            iterations=params["iteration"],
            reg=0.1,
            damping=5,
        )

        model = util.clone(algo)
        model = Recommender.adapt(model)
        model.fit(train_df)

        recs = batch.recommend(model, test_users, recsize)

        rla = topn.RecListAnalysis()
        rla.add_metric(topn.ndcg)

        results = rla.compute(recs, test_df)

        target_metric = -results.ndcg.mean()

        return {"loss": target_metric, "status": STATUS_OK}
Exemple #25
0
def all_movie_recommends(ratings, optionList):
    all_recs = []
    test_data = []

    #Declare algorithm models
    basic_bias_model = basic.Bias()
    knn_model = iknn.ItemItem(20)
    knn_u_model = uknn.UserUser(20)
    als_b_model = als.BiasedMF(50)
    als_i_model = als.ImplicitMF(50)
    funk_model = funksvd.FunkSVD(50)

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          5, xf.SampleFrac(0.2)):
        test_data.append(test)

        for option in optionList:
            if option == 1:
                all_recs.append(
                    batch_eval('BasicBias', basic_bias_model, train, test))
            if option == 2:
                all_recs.append(batch_eval('ItemItem', knn_model, train, test))
            if option == 3:
                all_recs.append(
                    batch_eval('UserUser', knn_u_model, train, test))
            if option == 4:
                all_recs.append(
                    batch_eval('ALS-Biased', als_b_model, train, test))
            if option == 5:
                all_recs.append(
                    batch_eval('ALS-Implicit', als_i_model, train, test))
            if option == 6:
                all_recs.append(batch_eval('FunkSVD', funk_model, train, test))

    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)

    return all_recs, test_data
def get_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.Bias(users=False)
    elif algo == 'topn':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return iknn.ItemItem(nnbrs=-1)
    elif algo == 'useruser':
        return uknn.UserUser(nnbrs=5)
    elif algo == 'biasedmf':
        return als.BiasedMF(50, iterations=10)
    elif algo == 'implicitmf':
        return als.ImplicitMF(20, iterations=10)
    elif algo == 'funksvd':
        return svd.FunkSVD(20, iterations=20)
    elif algo == 'tf_bpr':
        return lktf.BPR(20,
                        batch_size=1024,
                        epochs=5,
                        neg_count=2,
                        rng_spec=42)
Exemple #27
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.load_ratings()

    svd_algo = als.BiasedMF(25, iterations=20, damping=5)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(prediction=algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
    def test(self, path):
        algo_pop = Bias()
        algo_als5 = als.BiasedMF(5)

        def eval(aname, algo, train, test, all_preds):
            fittable = util.clone(algo)
            fittable = Recommender.adapt(fittable)
            fittable.fit(train)
            # predict ratings
            preds = batch.predict(fittable, test)
            preds['Algorithm'] = aname
            all_preds.append(preds)

        if '100k' in path:
            ml100k = ML100K(path)
            ratings = ml100k.ratings
        elif '1m' in path:
            ml100k = ML1M(path)
            ratings = ml100k.ratings
        elif '10m' in path:
            ml100k = ML10M(path)
            ratings = ml100k.ratings
        else:
            mlsmall = MovieLens(path)
            ratings = mlsmall.ratings
        print(ratings.head())

        all_preds = []
        test_data = []
        for train, test in xf.partition_users(
                ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
            test_data.append(test)
            eval('MF', algo_als5, train, test, all_preds)
        preds = pd.concat(all_preds, ignore_index=True)
        preds_mf = preds[preds['Algorithm'].str.match('MF')]
        test_data = pd.concat(test_data, ignore_index=True)
        print('RMSE MF:', rmse(preds_mf['prediction'], preds_mf['rating']))
from lenskit.algorithms import basic
from lenskit.algorithms import item_knn
from lenskit import crossfold as xf
from lenskit.metrics import predict
'''
This code tests different configurations for the number of features of the reduced dimension space that performs best on
the RMSE metric for the ALS matrix factorization technique, including sanity check scores using the a baseline biasedscorer
and an item-item based scorer. 
'''
train = pd.read_csv("/project/naray190/ml-20m/ratings.csv")
test = pd.read_csv("/project/naray190/ml-20m/truncated_user_ratings.csv")
train = train[['userId', 'movieId', 'rating']]
test = test[['userId', 'movieId', 'rating']]
train.columns = ['user', 'item', 'rating']
test.columns = ['user', 'item', 'rating']
algo_30als = als.BiasedMF(features=30, iterations=50, reg=0.1)
algo_40als = als.BiasedMF(features=40, iterations=50, reg=0.1)
algo_20als = als.BiasedMF(features=20, iterations=50, reg=0.1)
algo_25als = als.BiasedMF(features=25, iterations=50, reg=0.1)
algo_15als = als.BiasedMF(features=15, iterations=50, reg=0.1)
algo_50als = als.BiasedMF(features=50, iterations=50, reg=0.1)
algo_60als = als.BiasedMF(features=60, iterations=50, reg=0.1)
algo_10als = als.BiasedMF(features=10, iterations=50, reg=0.1)
algo_70als = als.BiasedMF(features=70, iterations=50, reg=0.1)
algo_80als = als.BiasedMF(features=80, iterations=50, reg=0.1)
algo_base = basic.Bias()
algo_ii = item_knn.ItemItem(nnbrs=20)


def eval(algo, train, test):
    fittable = util.clone(algo)
Exemple #30
0
from lenskit import crossfold as xf
from lenskit.algorithms import item_knn as knn
from lenskit.algorithms import funksvd as funk
from lenskit.algorithms import als

from flask import make_response, abort, jsonify


# read in the movielens 100k ratings with pandas
# https://grouplens.org/datasets/movielens/100k/
ratings = pd.read_csv('ml-100k/u.data', sep='\t',
        names=['user', 'item', 'rating', 'timestamp'])

algoKNN = knn.ItemItem(30)
algoFunk = funk.FunkSVD(2)
algoAls = als.BiasedMF(20)


# split the data in a test and a training set
# for each user leave one row out for test purpose
data = ratings
nb_partitions = 1
splits = xf.partition_users(data, nb_partitions, xf.SampleN(1))
for (trainSet, testSet) in splits:
    train = trainSet
    test = testSet

# train model
modelKNN = algoKNN.fit(train)
modelFunk = algoFunk.fit(train)
modelALS = algoAls.fit(train)