コード例 #1
0
def test_leave_k_out_5(interactions_ds):
    """Test if error is thrown with an invalid value of max_concurrent_threads (negative)."""
    try:
        leave_k_out(interactions_ds, max_concurrent_threads=-1)
    except Exception as e:
        assert str(
            e) == 'The value of max_concurrent_threads (-1) must be > 0.'
コード例 #2
0
def test_leave_k_out_3(interactions_ds):
    """Test if error is thrown with an invalid value of k (ratio variant with k higher than 1)."""
    try:
        leave_k_out(interactions_ds, 1.5)
    except Exception as e:
        assert str(e) == 'The k parameter should be in the (0, 1) range when it\'s used as the percentage of ' \
                         'interactions to sample to the test set, per user. Current value: 1.5'
コード例 #3
0
def test_leave_k_out_7(interactions_ds):
    """Test fixed k variant with value of k > 1. Should ignore users where #items <= k."""
    train_ds, test_ds = leave_k_out(interactions_ds, k=2, seed=0)
    assert [[1, 4, 5, 50, 1], [2, 2, 5, 100, 3],
            [2, 3, 2, 20, 4]] == train_ds.values_list(to_list=True)
    assert [[1, 2, 3, 100, 0], [1, 5, 2, 25,
                                2]] == test_ds.values_list(to_list=True)
コード例 #4
0
def test_leave_k_out_6(interactions_ds):
    """Test fixed k variant with value of k = 1."""
    train_ds, test_ds = leave_k_out(interactions_ds, k=1, seed=0)
    assert [[1, 4, 5, 50, 1], [1, 5, 2, 25, 2],
            [2, 3, 2, 20, 4]] == train_ds.values_list(to_list=True)
    assert [[1, 2, 3, 100, 0], [2, 2, 5, 100,
                                3]] == test_ds.values_list(to_list=True)
コード例 #5
0
def test_leave_k_out_14(interactions_ds):
    """Test ratio k variant with value of k resulting on no sampled records."""
    train_ds, test_ds = leave_k_out(interactions_ds, k=0.3, seed=0)
    assert [[1, 2, 3, 100, 0], [1, 4, 5, 50, 1], [1, 5, 2, 25, 2],
            [2, 2, 5, 100, 3], [2, 3, 2, 20,
                                4]] == train_ds.values_list(to_list=True)
    assert [] == test_ds.values_list(to_list=True)
コード例 #6
0
def test_leave_k_out_15(interactions_ds):
    """Test ratio k variant with min_user_interactions > 1. Should remove users from train and test sets that don't have at least min_user_interactions records."""
    train_ds, test_ds = leave_k_out(interactions_ds,
                                    k=0.4,
                                    min_user_interactions=3,
                                    seed=0)
    assert [[1, 4, 5, 50, 1], [1, 5, 2, 25,
                               2]] == train_ds.values_list(to_list=True)
    assert [[1, 2, 3, 100, 0]] == test_ds.values_list(to_list=True)
コード例 #7
0
def test_leave_k_out_16(interactions_ds):
    """Test ratio k variant with last_timestamps = True."""
    train_ds, test_ds = leave_k_out(interactions_ds,
                                    k=0.5,
                                    last_timestamps=True,
                                    seed=0)
    assert [[1, 2, 3, 100, 0], [1, 4, 5, 50, 1],
            [2, 2, 5, 100, 3]] == train_ds.values_list(to_list=True)
    assert [[1, 5, 2, 25, 2], [2, 3, 2, 20,
                               4]] == test_ds.values_list(to_list=True)
コード例 #8
0
def interactions_ds():
    rng = random.Random(0)
    df = pd.DataFrame([[u, i, rng.randint(-1, 5)] for u in range(50)
                       for i in range(200) if rng.randint(0, 4) == 0],
                      columns=['user', 'item', 'interaction'])
    print(df.values)
    return leave_k_out(InteractionDataset.read_df(df),
                       k=5,
                       min_user_interactions=0,
                       last_timestamps=False,
                       seed=10)
コード例 #9
0
def test_leave_k_out_17(interactions_ds_timestamp_label):
    """Test fixed k variant with last_timestamps = True with custom timestamp label."""
    train_ds, test_ds = leave_k_out(interactions_ds_timestamp_label,
                                    k=0.5,
                                    last_timestamps=True,
                                    timestamp_label='custom_timestamp_label',
                                    seed=0)
    assert [[1, 2, 3, 100, 0], [1, 4, 5, 50, 1],
            [2, 2, 5, 100, 3]] == train_ds.values_list(to_list=True)
    assert [[1, 5, 2, 25, 2], [2, 3, 2, 20,
                               4]] == test_ds.values_list(to_list=True)
コード例 #10
0
def get_test_dataset(ds_name, force_out_of_memory=False, verbose=True, **kwds):
    """Gets a test dataset. If the named dataset does not have a specific test file
    (example: BX dataset), a test InteractionDataset will be created using leave_k_out() from the Evaluation module
    on the full dataset. The split is deterministic (i.e. has a defined seed value).
    Might download the dataset if it hasn't been downloaded before.

    Args:
        ds_name: A string with the name of the requested dataset.
            This name should be present in the list returned by available_datasets(),
            otherwise an error will be thrown.
        force_out_of_memory: A boolean indicating whether to force dataset loading to out of memory. Default: False.
        verbose: A boolean indicating whether to log info messages or not. Default: True.

    Returns:
        A InteractionDataset containing the test dataset.
    """
    if ds_name not in DATASETS:
        raise FileNotFoundError(f'"{ds_name}" is not a valid dataset. Supported datasets: {", ".join(available_datasets())}.')

    ds_options = DATASETS[ds_name]
    if ds_options.test_file is None:
        generated_path = os.path.join(get_dataset_path(ds_name), ds_name + '_test.gen')
        if os.path.exists(generated_path):  # might have been generated already
            return get_dataset(ds_name, generated_path, is_generated=True, force_out_of_memory=force_out_of_memory,
                               verbose=verbose, **kwds)

        # need to generate it now
        path = os.path.join(get_dataset_path(ds_name), ds_options.full_file)
        full_ds = get_dataset(ds_name, path, force_out_of_memory=force_out_of_memory, verbose=verbose, **kwds)
        train_ds, test_ds = leave_k_out(full_ds, k=10, min_user_interactions=10, seed=10)

        # store generated datasets for future calls
        train_ds.save(os.path.join(get_dataset_path(ds_name), ds_name + '_train.gen'))
        test_ds.save(os.path.join(get_dataset_path(ds_name), ds_name + '_test.gen'))
        return test_ds

    path = os.path.join(get_dataset_path(ds_name), ds_options.test_file)
    return get_dataset(ds_name, path, force_out_of_memory=force_out_of_memory, verbose=verbose, **kwds)
コード例 #11
0
ファイル: cdae_validation.py プロジェクト: lasigeBioTM/DRecPy
from DRecPy.Recommender import CDAE
from DRecPy.Recommender.EarlyStopping import MaxValidationValueRule
from DRecPy.Dataset import get_train_dataset
from DRecPy.Dataset import get_test_dataset
from DRecPy.Evaluation.Processes import ranking_evaluation
from DRecPy.Evaluation.Splits import leave_k_out
from DRecPy.Evaluation.Metrics import NDCG
from DRecPy.Evaluation.Metrics import HitRatio
from DRecPy.Evaluation.Metrics import Precision
import time


ds_train = get_train_dataset('ml-100k')
ds_test = get_test_dataset('ml-100k')
ds_train, ds_val = leave_k_out(ds_train, k=1, min_user_interactions=10, seed=0)


def epoch_callback_fn(model):
    return {'val_' + metric: v for metric, v in
            ranking_evaluation(model, ds_val, n_pos_interactions=1, n_neg_interactions=100,
                               generate_negative_pairs=True, k=10, verbose=False, seed=10,
                               metrics=[HitRatio(), NDCG()]).items()}


start_train = time.time()
cdae = CDAE(hidden_factors=50, corruption_level=0.2, loss='bce', seed=10)
cdae.fit(ds_train, learning_rate=0.001, reg_rate=0.001, epochs=100, batch_size=64, neg_ratio=5,
         epoch_callback_fn=epoch_callback_fn, epoch_callback_freq=10,
         early_stopping_rule=MaxValidationValueRule('val_HitRatio'), early_stopping_freq=10)
print("Training took", time.time() - start_train)
コード例 #12
0
ファイル: dmf.py プロジェクト: lasigeBioTM/DRecPy
from DRecPy.Recommender import DMF
from DRecPy.Dataset import get_full_dataset
from DRecPy.Evaluation.Splits import leave_k_out
from DRecPy.Evaluation.Processes import ranking_evaluation
from DRecPy.Evaluation.Metrics import NDCG
from DRecPy.Evaluation.Metrics import HitRatio
import time

ds = get_full_dataset('ml-100k')
ds_train, ds_test = leave_k_out(ds, k=1, last_timestamps=True, seed=10)

ds_train_bin = ds_train.copy()
ds_train_bin.apply('interaction', lambda x: 1)
ds_test_bin = ds_test.copy()
ds_test_bin.apply('interaction', lambda x: 1)

for nce in [True, False]:
    print('NCE =', nce)
    start_train = time.time()
    dmf = DMF(use_nce=nce,
              user_factors=[128, 64],
              item_factors=[128, 64],
              seed=10)
    dmf.fit(ds_train if nce else ds_train_bin,
            epochs=50,
            batch_size=256,
            learning_rate=0.001,
            reg_rate=0.0001,
            neg_ratio=5)
    print("Training took", time.time() - start_train)
コード例 #13
0
from DRecPy.Evaluation.Splits import leave_k_out
from DRecPy.Dataset import get_full_dataset
import time

dataset = get_full_dataset("ml-100k")
print('Full dataset', dataset)

# Dataset is split by leaving k user interactions out from the train set.
# If a given user does not have k interactions, all interactions stay on train set.
# Although, if a given user has < min_user_interactions, it will be removed
# from both sets.
start_t = time.time()
dataset_train, dataset_test = leave_k_out(dataset,
                                          k=10,
                                          min_user_interactions=20)
print(f'Splitting complete. Took: {time.time() - start_t}s')
print('Train dataset', dataset_train)
print('Test dataset', dataset_test)
コード例 #14
0
def test_leave_k_out_2(interactions_ds):
    """Test if error is thrown with an invalid value of k (ratio variant with negative k)."""
    try:
        leave_k_out(interactions_ds, -0.5)
    except Exception as e:
        assert str(e) == 'The value of k (-0.5) must be > 0.'
コード例 #15
0
def test_leave_k_out_1(interactions_ds):
    """Test if error is thrown with an invalid value of k (negative)."""
    try:
        leave_k_out(interactions_ds, -999)
    except Exception as e:
        assert str(e) == 'The value of k (-999) must be > 0.'
コード例 #16
0
def test_leave_k_out_0(interactions_ds):
    """Test if error is thrown with an invalid value of k (zero)."""
    try:
        leave_k_out(interactions_ds, 0)
    except Exception as e:
        assert str(e) == 'The value of k (0) must be > 0.'