コード例 #1
0
def get_dataset(ds_name, path, is_generated=False, force_out_of_memory=False, verbose=True, **kwds):
    """Returns an InteractionDataset containing the data present in the path argument, and uses the
    settings defined for the dataset specified in the ds_name argument. Downloads the dataset
    if is not already stored."""
    ds_options = DATASETS[ds_name]

    if not is_stored(ds_name):
        download_dataset(ds_name)

    if is_generated:
        return InteractionDataset(path, delimiter=',', columns=ds_options.columns, encoding=ds_options.encoding,
                                  in_memory=not force_out_of_memory, verbose=verbose, **kwds)
    else:
        return InteractionDataset(path, delimiter=ds_options.delimiter, columns=ds_options.columns,
                                  encoding=ds_options.encoding, has_header=ds_options.has_header,
                                  in_memory=not force_out_of_memory, verbose=verbose, **kwds)
コード例 #2
0
def test_interactions_ds():
    df = pd.DataFrame([
        [1, 1, 2],
        [2, 4, 5],
        [3, 3, 3],
        [3, 6, 1],
    ],
                      columns=['user', 'item', 'interaction'])
    return InteractionDataset.read_df(df)
コード例 #3
0
def interactions_ds():
    df = pd.DataFrame([
        [1, 2, 3, 100],
        [1, 4, 5, 50],
        [1, 5, 2, 25],
        [2, 2, 5, 100],
        [2, 3, 2, 20],
    ],
                      columns=['user', 'item', 'interaction', 'timestamp'])
    return InteractionDataset.read_df(df)
コード例 #4
0
def interactions_ds():
    rng = random.Random(0)
    df = pd.DataFrame([[u, i, rng.randint(-1, 5)] for u in range(50)
                       for i in range(200) if rng.randint(0, 4) == 0],
                      columns=['user', 'item', 'interaction'])
    print(df.values)
    return leave_k_out(InteractionDataset.read_df(df),
                       k=5,
                       min_user_interactions=0,
                       last_timestamps=False,
                       seed=10)
コード例 #5
0
def train_interactions_ds():
    df = pd.DataFrame([
        [1, 2, 3],
        [1, 4, 5],
        [1, 5, 2],
        [2, 2, 5],
        [2, 3, 2],
        [3, 2, 2],
        [3, 5, 5],
        [3, 1, 1],
    ],
                      columns=['user', 'item', 'interaction'])
    return InteractionDataset.read_df(df)
コード例 #6
0
from DRecPy.Dataset import InteractionDataset
import pandas as pd
from os import remove

# create file with sample dataset
with open('tmp.csv', 'w') as f:
    f.write('users,items,interactions\n')
    f.write('"john","ps4",4.5\n')
    f.write('"patrick","xbox",4.1\n')
    f.write('"anna","brush",3.6\n')
    f.write('"david","tv",2.0\n')

# load dataset into memory
df = pd.read_csv('tmp.csv')
ds_memory = InteractionDataset.read_df(df, user_label='users', item_label='items', interaction_label='interactions')
print('all values:', ds_memory.values_list())

remove('tmp.csv')  # delete previously created sample dataset file
コード例 #7
0
from DRecPy.Recommender.Baseline import UserKNN
from DRecPy.Dataset import InteractionDataset
from DRecPy.Evaluation.Processes import ranking_evaluation
from DRecPy.Evaluation.Splits import matrix_split
from DRecPy.Evaluation.Metrics import Precision
from DRecPy.Evaluation.Metrics import Recall
from DRecPy.Evaluation.Metrics import NDCG

ds = InteractionDataset('./cheRM_total.csv',
                        columns=['user', 'item', 'interaction'],
                        verbose=False)

ds_train, ds_test = matrix_split(ds,
                                 min_user_interactions=20,
                                 user_test_ratio=0.2,
                                 item_test_ratio=0.2,
                                 seed=25,
                                 verbose=False)

# cosine sim
knn = UserKNN(k=10,
              m=0,
              sim_metric='cosine_cf',
              shrinkage=None,
              seed=25,
              use_averages=False,
              verbose=True)
knn.fit(ds_train)

evaluation = ranking_evaluation(knn,
                                ds_test,
コード例 #8
0
ファイル: arm_knn.py プロジェクト: lasigeBioTM/DRecPy
from DRecPy.Recommender.Baseline import UserKNN
from DRecPy.Dataset import InteractionDataset
from DRecPy.Evaluation.Processes import ranking_evaluation
from DRecPy.Evaluation.Splits import matrix_split
from DRecPy.Evaluation.Metrics import Precision
from DRecPy.Evaluation.Metrics import Recall
from DRecPy.Evaluation.Metrics import NDCG

ds = InteractionDataset('./arm_total_1998_2019.csv',
                        columns=['user', 'item', 'interaction'],
                        verbose=False)

ds_train, ds_test = matrix_split(ds,
                                 min_user_interactions=20,
                                 user_test_ratio=0.2,
                                 item_test_ratio=0.2,
                                 seed=25,
                                 verbose=False)

# cosine sim
knn = UserKNN(k=10,
              m=0,
              sim_metric='cosine_cf',
              shrinkage=None,
              seed=25,
              use_averages=False,
              verbose=True)
knn.fit(ds_train)

evaluation = ranking_evaluation(knn,
                                ds_test,
コード例 #9
0
ファイル: custom_datasets.py プロジェクト: lasigeBioTM/DRecPy
from DRecPy.Dataset import InteractionDataset
from os import remove

# create file with sample dataset
with open('tmp.csv', 'w') as f:
    f.write('"john","ps4",4.5\n')
    f.write('"patrick","xbox",4.1\n')
    f.write('"anna","brush",3.6\n')
    f.write('"david","tv",2.0\n')

# load dataset into memory
ds_memory = InteractionDataset('tmp.csv', columns=['user', 'item', 'interaction'])
print('all values:', ds_memory.values_list())
print('filtered values:', ds_memory.select('interaction > 3.5').values_list())
ds_memory_scaled = ds_memory.copy()
ds_memory_scaled.apply('interaction', lambda x: x / ds_memory.max('interaction'))
print('all values scaled:', ds_memory_scaled.values_list())

# load dataset out of memory
ds_out_of_memory = InteractionDataset('tmp.csv', columns=['user', 'item', 'interaction'], in_memory=False)
print('all values:', ds_out_of_memory.values_list())
print('filtered values:', ds_out_of_memory.select('interaction > 3.5').values_list())

remove('tmp.csv')  # delete previously created sample dataset file