def test_ranking_evaluation_3(model, interactions_ds): """Evaluation with k parameter set to a list.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=[1, 5, 10], n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, verbose=False) == \ {'HitRatio@1': 0.1953, 'HitRatio@10': 0.4107, 'HitRatio@5': 0.4107, 'NDCG@1': 0.3968, 'NDCG@10': 0.4189, 'NDCG@5': 0.4189, 'Precision@1': 0.7447, 'Precision@10': 0.7089, 'Precision@5': 0.7089, 'Recall@1': 0.1953, 'Recall@10': 0.4107, 'Recall@5': 0.4107}
def test_ranking_evaluation_4(model, interactions_ds): """Evaluation with k parameter set to a list and generated negative pairs.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=[1, 2, 3], n_pos_interactions=None, n_neg_interactions=20, generate_negative_pairs=True, novelty=False, verbose=False) == \ {'HitRatio@1': 0.0397, 'HitRatio@2': 0.0943, 'HitRatio@3': 0.1233, 'NDCG@1': 0.0965, 'NDCG@2': 0.1249, 'NDCG@3': 0.1303, 'Precision@1': 0.14, 'Precision@2': 0.16, 'Precision@3': 0.14, 'Recall@1': 0.0397, 'Recall@2': 0.0943, 'Recall@3': 0.1233}
def test_ranking_evaluation_21(model, interactions_ds): """Evaluation with invalid number of k (< 0).""" try: ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=-1, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, metrics=[HitRatio(), NDCG()], verbose=False) assert False except Exception as e: assert str(e) == 'k (-1) should be > 0.'
def test_ranking_evaluation_22(model, interactions_ds): """Invalid metrics value (not a list).""" try: ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=5, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, metrics={}, verbose=False) assert False except Exception as e: assert str(e) == 'Expected "metrics" argument to be a list and found <class \'dict\'>. ' \ 'Should contain instances of RankingMetricABC.'
def test_ranking_evaluation_17(model, interactions_ds): """Evaluation with invalid number of negative interactions (0).""" try: ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=[1, 2], n_pos_interactions=None, n_neg_interactions=0, generate_negative_pairs=False, novelty=False, metrics=[HitRatio(), NDCG()], verbose=False) assert False except Exception as e: assert str( e ) == 'The number of negative interactions (0) should be None or an integer > 0.'
def test_ranking_evaluation_23(model, interactions_ds): """Invalid metrics value (list with non-RankingMetricABC instances).""" fun = lambda x: 1 try: ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=5, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, metrics=[fun], verbose=False) assert False except Exception as e: assert str( e ) == f'Expected metric {fun} to be an instance of type RankingMetricABC.'
def test_ranking_evaluation_19(model, interactions_ds): """Evaluation with invalid combination of generate_negative_pairs and n_neg_interactions (generate_negative_pairs without a set value of n_neg_interactions).""" try: ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=[1, 2], n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=True, novelty=False, metrics=[HitRatio(), NDCG()], verbose=False) assert False except Exception as e: assert str(e) == 'Cannot generate negative interaction pairs when the number of negative interactions per ' \ 'user is not defined. Either set generate_negative_pairs=False or define the ' \ 'n_neg_interactions parameter.'
def test_ranking_evaluation_11(model, interactions_ds): """Evaluation with custom metrics.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, metrics=[HitRatio(), NDCG()]) == { 'HitRatio@2': 0.3137, 'NDCG@2': 0.4093 }
def test_ranking_evaluation_7(model, interactions_ds): """Evaluation with limited number of test users.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=10, k=2, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False) == \ {'HitRatio@2': 0.3383, 'NDCG@2': 0.4339, 'Precision@2': 0.75, 'Recall@2': 0.3383}
def test_ranking_evaluation_6(model, interactions_ds): """Evaluation with limited number of positive interactions.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=2, n_pos_interactions=1, n_neg_interactions=None, generate_negative_pairs=False, novelty=False) == \ {'HitRatio@2': 0.46, 'NDCG@2': 0.3858, 'Precision@2': 0.4487, 'Recall@2': 0.46}
def test_ranking_evaluation_5(model, interactions_ds): """Evaluation with novelty=True.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=True) == \ {'HitRatio@2': 0.3137, 'NDCG@2': 0.4093, 'Precision@2': 0.7021, 'Recall@2': 0.3137}
def test_ranking_evaluation_10(model, interactions_ds): """Evaluation with a custom interaction threshold.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, interaction_threshold=2) == \ {'HitRatio@2': 0.3142, 'NDCG@2': 0.4093, 'Precision@2': 0.5638, 'Recall@2': 0.3142}
# cosine sim knn = UserKNN(k=10, m=0, sim_metric='cosine_cf', shrinkage=None, seed=25, use_averages=False, verbose=True) knn.fit(ds_train) evaluation = ranking_evaluation(knn, ds_test, interaction_threshold=2, k=list(range(1, 11)), generate_negative_pairs=False, n_pos_interactions=None, n_neg_interactions=None, seed=25, verbose=True, metrics=[Precision(), Recall(), NDCG()]) print('cosine sim', evaluation) # jaccard sim knn = UserKNN(k=10, m=0, sim_metric='jaccard', shrinkage=None, seed=25, use_averages=False, verbose=True)
from DRecPy.Evaluation.Processes import ranking_evaluation from DRecPy.Evaluation.Splits import leave_k_out from DRecPy.Evaluation.Metrics import NDCG from DRecPy.Evaluation.Metrics import HitRatio from DRecPy.Evaluation.Metrics import Precision import time ds_train = get_train_dataset('ml-100k') ds_test = get_test_dataset('ml-100k') ds_train, ds_val = leave_k_out(ds_train, k=1, min_user_interactions=10, seed=0) def epoch_callback_fn(model): return {'val_' + metric: v for metric, v in ranking_evaluation(model, ds_val, n_pos_interactions=1, n_neg_interactions=100, generate_negative_pairs=True, k=10, verbose=False, seed=10, metrics=[HitRatio(), NDCG()]).items()} start_train = time.time() cdae = CDAE(hidden_factors=50, corruption_level=0.2, loss='bce', seed=10) cdae.fit(ds_train, learning_rate=0.001, reg_rate=0.001, epochs=100, batch_size=64, neg_ratio=5, epoch_callback_fn=epoch_callback_fn, epoch_callback_freq=10, early_stopping_rule=MaxValidationValueRule('val_HitRatio'), early_stopping_freq=10) print("Training took", time.time() - start_train) print(ranking_evaluation(cdae, ds_test, k=[1, 5, 10], novelty=True, n_pos_interactions=1, n_neg_interactions=100, generate_negative_pairs=True, seed=10, metrics=[HitRatio(), NDCG(), Precision()], max_concurrent_threads=4, verbose=True))
def test_ranking_evaluation_12(model, interactions_ds): """Evaluation with custom metrics and k set to a list.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=[1, 2], n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False, metrics=[HitRatio(), NDCG()], verbose=False) == \ {'HitRatio@1': 0.1953, 'HitRatio@2': 0.3137, 'NDCG@1': 0.3968, 'NDCG@2': 0.4093}
def test_ranking_evaluation_8(model): """Train evaluation.""" assert ranking_evaluation(model, n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=False) == \ {'HitRatio@2': 0.0717, 'NDCG@2': 0.3845, 'Precision@2': 0.88, 'Recall@2': 0.0717}
ds_train_bin = ds_train.copy() ds_train_bin.apply('interaction', lambda x: 1) ds_test_bin = ds_test.copy() ds_test_bin.apply('interaction', lambda x: 1) for nce in [True, False]: print('NCE =', nce) start_train = time.time() dmf = DMF(use_nce=nce, user_factors=[128, 64], item_factors=[128, 64], seed=10) dmf.fit(ds_train if nce else ds_train_bin, epochs=50, batch_size=256, learning_rate=0.001, reg_rate=0.0001, neg_ratio=5) print("Training took", time.time() - start_train) print( ranking_evaluation(dmf, ds_test if nce else ds_test_bin, n_pos_interactions=1, n_neg_interactions=100, generate_negative_pairs=True, novelty=True, k=list(range(1, 11)), metrics=[HitRatio(), NDCG()], seed=10))
def test_ranking_evaluation_1(model, interactions_ds): """Evaluation with generated negative pairs.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=20, generate_negative_pairs=True, novelty=False) == \ {'HitRatio@2': 0.0943, 'NDCG@2': 0.1249, 'Precision@2': 0.16, 'Recall@2': 0.0943}
def epoch_callback_fn(model): return {'val_' + metric: v for metric, v in ranking_evaluation(model, ds_val, n_pos_interactions=1, n_neg_interactions=100, generate_negative_pairs=True, k=10, verbose=False, seed=10, metrics=[HitRatio(), NDCG()]).items()}
def test_ranking_evaluation_2(model, interactions_ds): """Evaluation with limited negative pairs.""" assert ranking_evaluation(model, interactions_ds[1], n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=1, generate_negative_pairs=False, novelty=False) == \ {'HitRatio@2': 0.3337, 'NDCG@2': 0.4341, 'Precision@2': 0.8111, 'Recall@2': 0.3337}
from DRecPy.Dataset import get_train_dataset from DRecPy.Dataset import get_test_dataset from DRecPy.Evaluation.Processes import ranking_evaluation import time ds_train = get_train_dataset('ml-100k') ds_test = get_test_dataset('ml-100k') start_train = time.time() cdae = CDAE(hidden_factors=50, corruption_level=0.2, loss='bce', seed=10) cdae.fit(ds_train, learning_rate=0.001, reg_rate=0.001, epochs=50, batch_size=64, neg_ratio=5) print("Training took", time.time() - start_train) print( ranking_evaluation(cdae, ds_test, k=[1, 5, 10], novelty=True, n_test_users=100, n_pos_interactions=1, n_neg_interactions=100, generate_negative_pairs=True, seed=10, max_concurrent_threads=4, verbose=True))
def test_ranking_evaluation_9(model): """Train evaluation with novelty=True should result in all 0s.""" assert ranking_evaluation(model, n_test_users=None, k=2, n_pos_interactions=None, n_neg_interactions=None, generate_negative_pairs=False, novelty=True) == \ {'HitRatio@2': 0.0, 'NDCG@2': 0.0, 'Precision@2': 0, 'Recall@2': 0.0}