Esempio n. 1
0
def test_interaction_ds():
    return get_test_dataset('ml-100k')
Esempio n. 2
0
from DRecPy.Recommender import CDAE
from DRecPy.Dataset import get_train_dataset
from DRecPy.Dataset import get_test_dataset
from DRecPy.Evaluation.Processes import ranking_evaluation
import time

ds_train = get_train_dataset('ml-100k')
ds_test = get_test_dataset('ml-100k')

start_train = time.time()
cdae = CDAE(hidden_factors=50, corruption_level=0.2, loss='bce', seed=10)
cdae.fit(ds_train,
         learning_rate=0.001,
         reg_rate=0.001,
         epochs=50,
         batch_size=64,
         neg_ratio=5)
print("Training took", time.time() - start_train)

print(
    ranking_evaluation(cdae,
                       ds_test,
                       k=[1, 5, 10],
                       novelty=True,
                       n_test_users=100,
                       n_pos_interactions=1,
                       n_neg_interactions=100,
                       generate_negative_pairs=True,
                       seed=10,
                       max_concurrent_threads=4,
                       verbose=True))
def test_get_test_dataset_4():
    ret = get_test_dataset('bx')
    assert (len(ret), len(ret.columns)) == (120530, 4)
    assert next(ret.values(columns=['interaction', 'item', 'user'])) == \
           {'interaction': 5, 'item': '0380711524', 'user': 276762}
def test_get_test_dataset_5():
    ret = get_test_dataset('ml-1m')
    assert (len(ret), len(ret.columns)) == (60400, 5)
    assert next(ret.values(columns=['interaction', 'item', 'user', 'timestamp'])) == \
           {'interaction': 4, 'item': 938, 'timestamp': 978301752, 'user': 1}
def test_get_test_dataset_3():
    ret = get_test_dataset('ml-100k', force_out_of_memory=True)
    assert isinstance(ret, DatabaseInteractionDataset)
def test_get_test_dataset_2():
    ret = get_test_dataset('ml-100k')
    assert isinstance(ret, MemoryInteractionDataset)
def test_get_test_dataset_1():
    ret = get_test_dataset('ml-100k')
    assert (len(ret), len(ret.columns)) == (9430, 5)
    assert next(ret.values(columns=['interaction', 'item', 'user', 'timestamp'])) == \
           {'interaction': 4, 'user': 1, 'item': 20, 'timestamp': 887431883}
def test_get_test_dataset_0():
    try:
        get_test_dataset('')
    except FileNotFoundError as e:
        assert str(e) == '"" is not a valid dataset. Supported datasets: ml-100k, ml-1m, ml-10m, ml-20m, bx.'
Esempio n. 9
0
from DRecPy.Dataset import get_train_dataset
from DRecPy.Dataset import get_test_dataset
from DRecPy.Dataset import get_full_dataset
from DRecPy.Dataset import available_datasets

print('Available datasets', available_datasets())

# Reading the ml-100k full dataset and prebuilt train and test datasets.
print('ml-100k full dataset', get_full_dataset('ml-100k'))
print('ml-100k train dataset', get_train_dataset('ml-100k'))
print('ml-100k test dataset', get_test_dataset('ml-100k'))

# Reading the ml-1m full dataset and generated train and test datasets using out of memory storage.
print('ml-1m full dataset', get_full_dataset('ml-1m', force_out_of_memory=True))
print('ml-1m train dataset', get_train_dataset('ml-1m', force_out_of_memory=True))
print('ml-1m test dataset', get_test_dataset('ml-1m', force_out_of_memory=True))

# Showcase some dataset operations
ds_ml = get_full_dataset('ml-100k')
print('Minimum rating value:', ds_ml.min('interaction'))
print('Unique rating values:', ds_ml.unique('interaction').values_list())

ds_ml.apply('interaction', lambda x: x / ds_ml.max('interaction'))  # standardize the rating value
print('New values', ds_ml.values_list()[:5])