def __init__(self, **kwds): super(TestRecommenderNonDeepLearning, self).__init__(**kwds) def _pre_fit(self, learning_rate, neg_ratio, reg_rate, **kwds): # used to declare variables and do the non-deep learning fit process, such as computing similarities and # neighbours for knn-based models self._info( f'doing pre-fit with learning_rate={learning_rate}, neg_ratio={neg_ratio}, reg_rate={reg_rate}' ) pass def _sample_batch(self, batch_size, **kwds): raise NotImplemented # since it's non-deep learning based, no need for batch training def _predict_batch(self, batch_samples, **kwds): raise NotImplemented # since it's non-deep learning based, no need for batch training def _compute_batch_loss(self, predictions, desired_values, **kwds): raise NotImplemented # since it's non-deep learning based, no need for batch training def _predict(self, uid, iid, **kwds): return 5 # predict for a (user, item) pair ds_train = get_train_dataset('ml-100k', verbose=False) print('TestRecommenderNonDeepLearning') recommender = TestRecommenderNonDeepLearning(verbose=True) recommender.fit(ds_train, epochs=2, batch_size=10) print(recommender.predict(1, 1))
def train_interaction_ds(): return get_train_dataset('ml-100k')
from DRecPy.Recommender import CDAE from DRecPy.Dataset import get_train_dataset from DRecPy.Dataset import get_test_dataset from DRecPy.Evaluation.Processes import ranking_evaluation import time ds_train = get_train_dataset('ml-100k') ds_test = get_test_dataset('ml-100k') start_train = time.time() cdae = CDAE(hidden_factors=50, corruption_level=0.2, loss='bce', seed=10) cdae.fit(ds_train, learning_rate=0.001, reg_rate=0.001, epochs=50, batch_size=64, neg_ratio=5) print("Training took", time.time() - start_train) print( ranking_evaluation(cdae, ds_test, k=[1, 5, 10], novelty=True, n_test_users=100, n_pos_interactions=1, n_neg_interactions=100, generate_negative_pairs=True, seed=10, max_concurrent_threads=4, verbose=True))
def test_get_train_dataset_4(): ret = get_train_dataset('bx') assert (len(ret), len(ret.columns)) == (845183, 4) assert next(ret.values(columns=['interaction', 'item', 'user'])) == \ {'user': 276762, 'item': '034544003X', 'interaction': 0}
def test_get_train_dataset_5(): ret = get_train_dataset('ml-1m') assert (len(ret), len(ret.columns)) == (939809, 5) assert next(ret.values(columns=['interaction', 'item', 'user', 'timestamp'])) == \ {'user': 1, 'item': 1193, 'interaction': 5, 'timestamp': 978300760}
def test_get_train_dataset_3(): ret = get_train_dataset('ml-100k', force_out_of_memory=True) assert isinstance(ret, DatabaseInteractionDataset)
def test_get_train_dataset_2(): ret = get_train_dataset('ml-100k') assert isinstance(ret, MemoryInteractionDataset)
def test_get_train_dataset_1(): ret = get_train_dataset('ml-100k') assert (len(ret), len(ret.columns)) == (90570, 5) assert next(ret.values(columns=['interaction', 'item', 'user', 'timestamp'])) == \ {'user': 1, 'item': 1, 'interaction': 5, 'timestamp': 874965758}
def test_get_train_dataset_0(): try: get_train_dataset('') except FileNotFoundError as e: assert str(e) == '"" is not a valid dataset. Supported datasets: ml-100k, ml-1m, ml-10m, ml-20m, bx.'
from DRecPy.Dataset import get_train_dataset from DRecPy.Dataset import get_test_dataset from DRecPy.Dataset import get_full_dataset from DRecPy.Dataset import available_datasets print('Available datasets', available_datasets()) # Reading the ml-100k full dataset and prebuilt train and test datasets. print('ml-100k full dataset', get_full_dataset('ml-100k')) print('ml-100k train dataset', get_train_dataset('ml-100k')) print('ml-100k test dataset', get_test_dataset('ml-100k')) # Reading the ml-1m full dataset and generated train and test datasets using out of memory storage. print('ml-1m full dataset', get_full_dataset('ml-1m', force_out_of_memory=True)) print('ml-1m train dataset', get_train_dataset('ml-1m', force_out_of_memory=True)) print('ml-1m test dataset', get_test_dataset('ml-1m', force_out_of_memory=True)) # Showcase some dataset operations ds_ml = get_full_dataset('ml-100k') print('Minimum rating value:', ds_ml.min('interaction')) print('Unique rating values:', ds_ml.unique('interaction').values_list()) ds_ml.apply('interaction', lambda x: x / ds_ml.max('interaction')) # standardize the rating value print('New values', ds_ml.values_list()[:5])