def get_dataset(ds_name, path, is_generated=False, force_out_of_memory=False, verbose=True, **kwds): """Returns an InteractionDataset containing the data present in the path argument, and uses the settings defined for the dataset specified in the ds_name argument. Downloads the dataset if is not already stored.""" ds_options = DATASETS[ds_name] if not is_stored(ds_name): download_dataset(ds_name) if is_generated: return InteractionDataset(path, delimiter=',', columns=ds_options.columns, encoding=ds_options.encoding, in_memory=not force_out_of_memory, verbose=verbose, **kwds) else: return InteractionDataset(path, delimiter=ds_options.delimiter, columns=ds_options.columns, encoding=ds_options.encoding, has_header=ds_options.has_header, in_memory=not force_out_of_memory, verbose=verbose, **kwds)
def test_interactions_ds(): df = pd.DataFrame([ [1, 1, 2], [2, 4, 5], [3, 3, 3], [3, 6, 1], ], columns=['user', 'item', 'interaction']) return InteractionDataset.read_df(df)
def interactions_ds(): df = pd.DataFrame([ [1, 2, 3, 100], [1, 4, 5, 50], [1, 5, 2, 25], [2, 2, 5, 100], [2, 3, 2, 20], ], columns=['user', 'item', 'interaction', 'timestamp']) return InteractionDataset.read_df(df)
def interactions_ds(): rng = random.Random(0) df = pd.DataFrame([[u, i, rng.randint(-1, 5)] for u in range(50) for i in range(200) if rng.randint(0, 4) == 0], columns=['user', 'item', 'interaction']) print(df.values) return leave_k_out(InteractionDataset.read_df(df), k=5, min_user_interactions=0, last_timestamps=False, seed=10)
def train_interactions_ds(): df = pd.DataFrame([ [1, 2, 3], [1, 4, 5], [1, 5, 2], [2, 2, 5], [2, 3, 2], [3, 2, 2], [3, 5, 5], [3, 1, 1], ], columns=['user', 'item', 'interaction']) return InteractionDataset.read_df(df)
from DRecPy.Dataset import InteractionDataset import pandas as pd from os import remove # create file with sample dataset with open('tmp.csv', 'w') as f: f.write('users,items,interactions\n') f.write('"john","ps4",4.5\n') f.write('"patrick","xbox",4.1\n') f.write('"anna","brush",3.6\n') f.write('"david","tv",2.0\n') # load dataset into memory df = pd.read_csv('tmp.csv') ds_memory = InteractionDataset.read_df(df, user_label='users', item_label='items', interaction_label='interactions') print('all values:', ds_memory.values_list()) remove('tmp.csv') # delete previously created sample dataset file
from DRecPy.Recommender.Baseline import UserKNN from DRecPy.Dataset import InteractionDataset from DRecPy.Evaluation.Processes import ranking_evaluation from DRecPy.Evaluation.Splits import matrix_split from DRecPy.Evaluation.Metrics import Precision from DRecPy.Evaluation.Metrics import Recall from DRecPy.Evaluation.Metrics import NDCG ds = InteractionDataset('./cheRM_total.csv', columns=['user', 'item', 'interaction'], verbose=False) ds_train, ds_test = matrix_split(ds, min_user_interactions=20, user_test_ratio=0.2, item_test_ratio=0.2, seed=25, verbose=False) # cosine sim knn = UserKNN(k=10, m=0, sim_metric='cosine_cf', shrinkage=None, seed=25, use_averages=False, verbose=True) knn.fit(ds_train) evaluation = ranking_evaluation(knn, ds_test,
from DRecPy.Recommender.Baseline import UserKNN from DRecPy.Dataset import InteractionDataset from DRecPy.Evaluation.Processes import ranking_evaluation from DRecPy.Evaluation.Splits import matrix_split from DRecPy.Evaluation.Metrics import Precision from DRecPy.Evaluation.Metrics import Recall from DRecPy.Evaluation.Metrics import NDCG ds = InteractionDataset('./arm_total_1998_2019.csv', columns=['user', 'item', 'interaction'], verbose=False) ds_train, ds_test = matrix_split(ds, min_user_interactions=20, user_test_ratio=0.2, item_test_ratio=0.2, seed=25, verbose=False) # cosine sim knn = UserKNN(k=10, m=0, sim_metric='cosine_cf', shrinkage=None, seed=25, use_averages=False, verbose=True) knn.fit(ds_train) evaluation = ranking_evaluation(knn, ds_test,
from DRecPy.Dataset import InteractionDataset from os import remove # create file with sample dataset with open('tmp.csv', 'w') as f: f.write('"john","ps4",4.5\n') f.write('"patrick","xbox",4.1\n') f.write('"anna","brush",3.6\n') f.write('"david","tv",2.0\n') # load dataset into memory ds_memory = InteractionDataset('tmp.csv', columns=['user', 'item', 'interaction']) print('all values:', ds_memory.values_list()) print('filtered values:', ds_memory.select('interaction > 3.5').values_list()) ds_memory_scaled = ds_memory.copy() ds_memory_scaled.apply('interaction', lambda x: x / ds_memory.max('interaction')) print('all values scaled:', ds_memory_scaled.values_list()) # load dataset out of memory ds_out_of_memory = InteractionDataset('tmp.csv', columns=['user', 'item', 'interaction'], in_memory=False) print('all values:', ds_out_of_memory.values_list()) print('filtered values:', ds_out_of_memory.select('interaction > 3.5').values_list()) remove('tmp.csv') # delete previously created sample dataset file