예제 #1
0
def make_dataset(dataset, specs):
    set_seed(
        seed
    )  # Need to set this so each dataset is created the same in any machine/order selected

    if isinstance(dataset, str) and dataset in Movielens.urls.keys():
        reader = Movielens(version=dataset, **specs)
    else:
        reader = dataset(**specs)

    sets = []
    URM_train = reader.get_URM_train()
    URM_test = reader.get_URM_test()
    URM_for_train, _, URM_validation = reader.split_urm(
        URM_train.tocoo(),
        split_ratio=[0.75, 0, 0.25],
        save_local=False,
        verbose=False,
        min_ratings=1)
    URM_train_small, _, URM_early_stop = reader.split_urm(
        URM_for_train.tocoo(),
        split_ratio=[0.85, 0, 0.15],
        save_local=False,
        verbose=False,
        min_ratings=1)

    sets.extend(
        [URM_train, URM_test, URM_validation, URM_train_small, URM_early_stop])

    for suf, urm in zip(URM_suffixes, sets):
        sps.save_npz(os.path.join(exp_path, reader.DATASET_NAME + suf),
                     urm,
                     compressed=True)

    return sets
예제 #2
0
    kwargs['force_rebuild'] = False
    kwargs['implicit'] = False
    kwargs['save_local'] = True
    kwargs['verbose'] = False
    kwargs['split'] = False

    datasets = [CiaoDVD, Delicious, LastFM, '100K', '1M', '10M']
    URMs = []

    path = os.path.join(DataReader.all_datasets_dir, 'stats')
    if not os.path.exists(path):
        os.makedirs(path)

    for d in datasets:
        if isinstance(d, str):
            reader = Movielens(version=d, **kwargs)
        else:
            reader = d(**kwargs)
        reader.describe(save_plots=True, path=path)
        URMs.append(reader.get_URM_full().T.tocsr())

    reader = Movielens('1M', **kwargs)
    plot_long_tail(dataset=reader.get_URM_full().T.tocsr(),
                   label=reader.DATASET_NAME,
                   path=path)
    plot_lorenz_curve(datasets=URMs,
                      labels=[
                          'MovieLens ' +
                          d if isinstance(d, str) else d.DATASET_NAME
                          for d in datasets
                      ],
예제 #3
0
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['KMP_WARNINGS'] = '0'
simplefilter(action='ignore', category=UserWarning)
simplefilter(action='ignore', category=FutureWarning)

use_gpu = False
verbose = False
only_build = False
transposed = False

if not use_gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

reader = Movielens(version='100K',
                   split_ratio=[0.6, 0.2, 0.2],
                   use_local=True,
                   implicit=True,
                   verbose=False,
                   seed=seed)

URM_train = reader.get_URM_train(transposed=transposed)
URM_validation = reader.get_URM_validation(transposed=transposed)
URM_test = reader.get_URM_test(transposed=transposed)

evaluator = EvaluatorHoldout(URM_test, [5, 20], exclude_seen=True)
evaluatorValidation = EvaluatorHoldout(URM_validation, [5], exclude_seen=True)

gan = GANMF(URM_train, mode='user')

gan.fit(num_factors=10,
        emb_dim=128,
        d_reg=1e-4,