Exemple #1
0
def test_fsvd_save_load(tmp_path):
    tmp_path = lktu.norm_path(tmp_path)
    mod_file = tmp_path / 'funksvd.npz'

    ratings = lktu.ml_pandas.renamed.ratings

    original = svd.FunkSVD(20, iterations=20)
    original.fit(ratings)

    assert original.global_bias_ == approx(ratings.rating.mean())
    assert original.item_features_.shape == (ratings.item.nunique(), 20)
    assert original.user_features_.shape == (ratings.user.nunique(), 20)

    original.save(mod_file)
    assert mod_file.exists()

    algo = svd.FunkSVD(20, iterations=20)
    algo.load(mod_file)
    assert algo.global_bias_ == original.global_bias_
    assert np.all(algo.user_bias_ == original.user_bias_)
    assert np.all(algo.item_bias_ == original.item_bias_)
    assert np.all(algo.user_features_ == original.user_features_)
    assert np.all(algo.item_features_ == original.item_features_)
    assert np.all(algo.item_index_ == original.item_index_)
    assert np.all(algo.user_index_ == original.user_index_)
Exemple #2
0
def user_movie_recommend(ratings, optionList, userId):
    all_recs = []

    for option in optionList:
        if option == 1:
            basic_bias_model = basic.Bias()
            all_recs.append(
                user_eval('BasicBias', basic_bias_model, ratings, userId))
        if option == 2:
            knn_model = iknn.ItemItem(20)
            all_recs.append(user_eval('ItemItem', knn_model, ratings, userId))
        if option == 3:
            knn_u_model = uknn.UserUser(20)
            all_recs.append(user_eval('UserUser', knn_u_model, ratings,
                                      userId))
        if option == 4:
            als_b_model = als.BiasedMF(50)
            all_recs.append(
                user_eval('ALS-Biased', als_b_model, ratings, userId))
        if option == 5:
            als_i_model = als.ImplicitMF(50)
            all_recs.append(
                user_eval('ALS-Implicit', als_i_model, ratings, userId))
        if option == 6:
            funk_model = funksvd.FunkSVD(50)
            all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId))

    all_recs = pd.concat(all_recs, ignore_index=True)

    return all_recs
Exemple #3
0
def test_alogrithms():
    # data = MovieLens('ml-latest-small')
    data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = [
        basic.Bias(damping=5),
        basic.Popular(),
        item_knn.ItemItem(20),
        user_knn.UserUser(20),
        als.BiasedMF(50),
        als.ImplicitMF(50),
        funksvd.FunkSVD(50)
    ]
    pairs = list(
        partition_users(ratings[['user', 'item', 'rating']], 5,
                        SampleFrac(0.2)))
    eval_algorithms(dataset=pairs, algorithms=algorithms)
    runs = display_runs()
    recs = display_recommendations()
    truth = pd.concat((p.test for p in pairs), ignore_index=True)
    ndcg_means = check_recommendations(runs, recs, truth)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
Exemple #4
0
def test_fsvd_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    svd_algo = svd.FunkSVD(25, 125, damping=10)
    algo = basic.Fallback(svd_algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.74, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Exemple #5
0
def test_fsvd_clamp_build():
    algo = svd.FunkSVD(20, iterations=20, range=(1, 5))
    algo.fit(simple_df)

    assert algo.global_bias_ == approx(simple_df.rating.mean())
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)
Exemple #6
0
def test_fsvd_basic_build():
    algo = svd.FunkSVD(20, iterations=20)
    algo.fit(simple_df)

    assert algo.bias.mean_ == approx(simple_df.rating.mean())
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)
Exemple #7
0
def test_fsvd_train_binary():
    ratings = lktu.ml_test.ratings.drop(columns=['rating', 'timestamp'])

    original = svd.FunkSVD(20, iterations=20, bias=False)
    original.fit(ratings)

    assert original.global_bias_ == 0
    assert original.item_features_.shape == (ratings.item.nunique(), 20)
    assert original.user_features_.shape == (ratings.user.nunique(), 20)
Exemple #8
0
def test_fsvd_predict_bad_item_clamp():
    algo = svd.FunkSVD(20, iterations=20, range=(1, 5))
    algo.fit(simple_df)

    assert algo.global_bias_ == approx(simple_df.rating.mean())
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)

    preds = algo.predict_for_user(10, [4])
    assert len(preds) == 1
    assert preds.index[0] == 4
    assert np.isnan(preds.loc[4])
Exemple #9
0
def test_fsvd_no_bias():
    algo = svd.FunkSVD(20, iterations=20, bias=None)
    algo.fit(simple_df)

    assert algo.bias is None
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)

    preds = algo.predict_for_user(10, [3])
    assert len(preds) == 1
    assert preds.index[0] == 3
    assert all(preds.notna())
Exemple #10
0
def test_fsvd_predict_bad_user():
    algo = svd.FunkSVD(20, iterations=20)
    algo.fit(simple_df)

    assert algo.bias.mean_ == approx(simple_df.rating.mean())
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)

    preds = algo.predict_for_user(50, [3])
    assert len(preds) == 1
    assert preds.index[0] == 3
    assert np.isnan(preds.loc[3])
Exemple #11
0
def test_fsvd_predict_basic():
    algo = svd.FunkSVD(20, iterations=20)
    algo.fit(simple_df)

    assert algo.global_bias_ == approx(simple_df.rating.mean())
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)

    preds = algo.predict_for_user(10, [3])
    assert len(preds) == 1
    assert preds.index[0] == 3
    assert preds.loc[3] >= 0
    assert preds.loc[3] <= 5
Exemple #12
0
def test_fsvd_predict_clamp():
    algo = svd.FunkSVD(20, iterations=20, range=(1, 5))
    algo.fit(simple_df)

    assert algo.bias.mean_ == approx(simple_df.rating.mean())
    assert algo.item_features_.shape == (3, 20)
    assert algo.user_features_.shape == (3, 20)

    preds = algo.predict_for_user(10, [3])
    assert isinstance(preds, pd.Series)
    assert len(preds) == 1
    assert preds.index[0] == 3
    assert preds.loc[3] >= 1
    assert preds.loc[3] <= 5
Exemple #13
0
 def get_algo_class(self, algo):
     if algo == 'popular':
         return basic.Popular()
     elif algo == 'bias':
         return basic.Bias(users=False)
     elif algo == 'topn':
         return basic.TopN(basic.Bias())
     elif algo == 'itemitem':
         return iknn.ItemItem(nnbrs=-1)
     elif algo == 'useruser':
         return uknn.UserUser(nnbrs=5)
     elif algo == 'biasedmf':
         return als.BiasedMF(50, iterations=10)
     elif algo == 'implicitmf':
         return als.ImplicitMF(20, iterations=10)
     elif algo == 'funksvd':
         return svd.FunkSVD(20, iterations=20)
def get_topn_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum'))
    elif algo == 'useruser':
        return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum'))
    elif algo == 'biasedmf':
        return basic.TopN(als.BiasedMF(50, iterations=10))
    elif algo == 'implicitmf':
        return basic.TopN(als.ImplicitMF(20, iterations=10))
    elif algo == 'funksvd':
        return basic.TopN(svd.FunkSVD(20, iterations=20))
    elif algo == 'bpr':
        return basic.TopN(BPR(25))
def test_alogrithms():
    data = MovieLens('ml-latest-small')
    #data = ML1M('ml-1m')
    ratings = data.ratings
    print('Initial ratings table head:')
    print(ratings.head())
    algorithms = {
        'Bias': basic.Bias(damping=5),
        'Popular': basic.Popular(),
        'ItemItem': item_knn.ItemItem(20),
        'UserUser': user_knn.UserUser(20),
        'BiasedMF': als.BiasedMF(50),
        'ImplicitMF': als.ImplicitMF(50),
        'FunkSVD': funksvd.FunkSVD(50)
    }
    all_recs, test_data = eval_algos(ratings, algorithms)
    ndcg_means = eval_ndcg(all_recs, test_data)
    print('NDCG means:')
    print(ndcg_means)
    plot_comparison(ndcg_means)
Exemple #16
0
def test_fsvd_save_load():
    ratings = lktu.ml_test.ratings

    original = svd.FunkSVD(20, iterations=20)
    original.fit(ratings)

    assert original.global_bias_ == approx(ratings.rating.mean())
    assert original.item_features_.shape == (ratings.item.nunique(), 20)
    assert original.user_features_.shape == (ratings.user.nunique(), 20)

    mod = pickle.dumps(original)
    _log.info('serialized to %d bytes', len(mod))
    algo = pickle.loads(mod)

    assert algo.global_bias_ == original.global_bias_
    assert np.all(algo.user_bias_ == original.user_bias_)
    assert np.all(algo.item_bias_ == original.item_bias_)
    assert np.all(algo.user_features_ == original.user_features_)
    assert np.all(algo.item_features_ == original.item_features_)
    assert np.all(algo.item_index_ == original.item_index_)
    assert np.all(algo.user_index_ == original.user_index_)
Exemple #17
0
def all_movie_recommends(ratings, optionList):
    all_recs = []
    test_data = []

    #Declare algorithm models
    basic_bias_model = basic.Bias()
    knn_model = iknn.ItemItem(20)
    knn_u_model = uknn.UserUser(20)
    als_b_model = als.BiasedMF(50)
    als_i_model = als.ImplicitMF(50)
    funk_model = funksvd.FunkSVD(50)

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']],
                                          5, xf.SampleFrac(0.2)):
        test_data.append(test)

        for option in optionList:
            if option == 1:
                all_recs.append(
                    batch_eval('BasicBias', basic_bias_model, train, test))
            if option == 2:
                all_recs.append(batch_eval('ItemItem', knn_model, train, test))
            if option == 3:
                all_recs.append(
                    batch_eval('UserUser', knn_u_model, train, test))
            if option == 4:
                all_recs.append(
                    batch_eval('ALS-Biased', als_b_model, train, test))
            if option == 5:
                all_recs.append(
                    batch_eval('ALS-Implicit', als_i_model, train, test))
            if option == 6:
                all_recs.append(batch_eval('FunkSVD', funk_model, train, test))

    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)

    return all_recs, test_data
def get_algo_class(algo):
    if algo == 'popular':
        return basic.Popular()
    elif algo == 'bias':
        return basic.Bias(users=False)
    elif algo == 'topn':
        return basic.TopN(basic.Bias())
    elif algo == 'itemitem':
        return iknn.ItemItem(nnbrs=-1)
    elif algo == 'useruser':
        return uknn.UserUser(nnbrs=5)
    elif algo == 'biasedmf':
        return als.BiasedMF(50, iterations=10)
    elif algo == 'implicitmf':
        return als.ImplicitMF(20, iterations=10)
    elif algo == 'funksvd':
        return svd.FunkSVD(20, iterations=20)
    elif algo == 'tf_bpr':
        return lktf.BPR(20,
                        batch_size=1024,
                        epochs=5,
                        neg_count=2,
                        rng_spec=42)
Exemple #19
0
def test_fsvd_known_preds():
    algo = svd.FunkSVD(15, iterations=125, lrate=0.001)
    _log.info('training %s on ml data', algo)
    algo.fit(lktu.ml_test.ratings)

    dir = Path(__file__).parent
    pred_file = dir / 'funksvd-preds.csv'
    _log.info('reading known predictions from %s', pred_file)
    known_preds = pd.read_csv(str(pred_file))
    pairs = known_preds.loc[:, ['user', 'item']]

    preds = algo.predict(pairs)
    known_preds.rename(columns={'prediction': 'expected'}, inplace=True)
    merged = known_preds.assign(prediction=preds)
    merged['error'] = merged.expected - merged.prediction
    assert not any(merged.prediction.isna() & merged.expected.notna())
    err = merged.error
    err = err[err.notna()]
    try:
        assert all(err.abs() < 0.01)
    except AssertionError as e:
        bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
        _log.error('erroneous predictions:\n%s', bad)
        raise e
Exemple #20
0
from lenskit.algorithms import funksvd
#from lenskit.algorithms import als
from lenskit.algorithms import item_knn as knn
import random
import time
from itertools import product
import statistics

startTime = time.time()
# read in the movielens 100k ratings with pandas
ratings = pd.read_csv('ml-100k/u.data',
                      sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])

#algo = knn.ItemItem(20)
algo = funksvd.FunkSVD(50)
#algo = als.BiasedMF(50)
# use als, paralleling computation
'''
# train and test 
# split the data into a test set and a training set, k-fold xf
num_folds = 5
splits = xf.partition_users(ratings, num_folds, xf.SampleFrac(0.2))
for (trainSet, testSet) in splits:
    train = trainSet  #?
    test = testSet    #?
'''
startTime = time.time()
# train model
model = algo.train(ratings)
spentTime = time.time() - startTime
Exemple #21
0
from lenskit import batch, topn
from lenskit import crossfold as xf
from lenskit.algorithms import item_knn as knn
from lenskit.algorithms import funksvd as funk
from lenskit.algorithms import als

from flask import make_response, abort, jsonify


# read in the movielens 100k ratings with pandas
# https://grouplens.org/datasets/movielens/100k/
ratings = pd.read_csv('ml-100k/u.data', sep='\t',
        names=['user', 'item', 'rating', 'timestamp'])

algoKNN = knn.ItemItem(30)
algoFunk = funk.FunkSVD(2)
algoAls = als.BiasedMF(20)


# split the data in a test and a training set
# for each user leave one row out for test purpose
data = ratings
nb_partitions = 1
splits = xf.partition_users(data, nb_partitions, xf.SampleN(1))
for (trainSet, testSet) in splits:
    train = trainSet
    test = testSet

# train model
modelKNN = algoKNN.fit(train)
modelFunk = algoFunk.fit(train)
xf_dataset_batch, xf_dataset_test = tee(xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)))
truth = pd.concat([test for _, test in xf_dataset_test], ignore_index = True)

runner = batch.MultiEval('result', False, nprocs = 4)
runner.add_algorithms(
    [item_knn.ItemItem(10), item_knn.ItemItem(20), item_knn.ItemItem(30)],
    False,
    ['nnbrs']
)
runner.add_algorithms(
    [user_knn.UserUser(10), user_knn.UserUser(20), user_knn.UserUser(30)],
    True,
    ['nnbrs']
)
runner.add_algorithms(
    [funksvd.FunkSVD(40, damping = 0), funksvd.FunkSVD(50, damping = 5), funksvd.FunkSVD(60, damping = 10)],
    False,
    ['features', 'damping']
)
runner.add_datasets(xf_dataset_batch)
runner.run()

runs = pd.read_parquet('result/runs.parquet', 
                       columns = ('AlgoClass','RunId','damping','features','nnbrs'))
runs.rename({'AlgoClass': 'Algorithm'}, axis = 'columns', inplace = True)

def extract_config(x):
    from math import isnan
    
    damping, features, nnbrs = x
    result = ''
Exemple #23
0
"""
Basic algorithm definitions as starting points.
"""

from lenskit.algorithms import item_knn, user_knn, als, funksvd
from lenskit.algorithms import basic

Bias = basic.Bias(damping=5)
Pop = basic.Popular()
II = item_knn.ItemItem(20, save_nbrs=2500)
UU = user_knn.UserUser(30)
ALS = als.BiasedMF(50)
IALS = als.ImplicitMF(50)
MFSGD = funksvd.FunkSVD(50)
Exemple #24
0
random = basic.Random()
popular = basic.Popular()
item_to_item_100 = item_knn.ItemItem(100)
item_to_item_200 = item_knn.ItemItem(200)
item_to_item_500 = item_knn.ItemItem(500)
user_to_user_100 = user_knn.UserUser(100)
user_to_user_200 = user_knn.UserUser(200)
user_to_user_500 = user_knn.UserUser(500)
biased_mf_50 = als.BiasedMF(50)
biased_mf_100 = als.BiasedMF(100)
biased_mf_200 = als.BiasedMF(200)
implicit_mf_50 = als.ImplicitMF(50)
implicit_mf_100 = als.ImplicitMF(100)
implicit_mf_200 = als.ImplicitMF(200)
funk_svd_mf_50 = funksvd.FunkSVD(50)
funk_svd_mf_100 = funksvd.FunkSVD(100)
funk_svd_mf_200 = funksvd.FunkSVD(200)
bayesian = BPR()
hierarchical_poisson_fact_50 = HPF(50)
hierarchical_poisson_fact_100 = HPF(100)
hierarchical_poisson_fact_200 = HPF(200)

train, test = train_test_split(ratings[['user', 'item', 'rating']],
                               test_size=0.2)

eval = batch.MultiEval('../recs/cf', recommend=NUM_OF_RECS)
eval.add_datasets((train, test), name='ml-1m')
eval.add_algorithms(random, name='random')
eval.add_algorithms(popular, name='popular')
eval.add_algorithms(item_to_item_100, name='item_to_item_100')