def make_dataset(dataset, specs): set_seed( seed ) # Need to set this so each dataset is created the same in any machine/order selected if isinstance(dataset, str) and dataset in Movielens.urls.keys(): reader = Movielens(version=dataset, **specs) else: reader = dataset(**specs) sets = [] URM_train = reader.get_URM_train() URM_test = reader.get_URM_test() URM_for_train, _, URM_validation = reader.split_urm( URM_train.tocoo(), split_ratio=[0.75, 0, 0.25], save_local=False, verbose=False, min_ratings=1) URM_train_small, _, URM_early_stop = reader.split_urm( URM_for_train.tocoo(), split_ratio=[0.85, 0, 0.15], save_local=False, verbose=False, min_ratings=1) sets.extend( [URM_train, URM_test, URM_validation, URM_train_small, URM_early_stop]) for suf, urm in zip(URM_suffixes, sets): sps.save_npz(os.path.join(exp_path, reader.DATASET_NAME + suf), urm, compressed=True) return sets
kwargs['force_rebuild'] = False kwargs['implicit'] = False kwargs['save_local'] = True kwargs['verbose'] = False kwargs['split'] = False datasets = [CiaoDVD, Delicious, LastFM, '100K', '1M', '10M'] URMs = [] path = os.path.join(DataReader.all_datasets_dir, 'stats') if not os.path.exists(path): os.makedirs(path) for d in datasets: if isinstance(d, str): reader = Movielens(version=d, **kwargs) else: reader = d(**kwargs) reader.describe(save_plots=True, path=path) URMs.append(reader.get_URM_full().T.tocsr()) reader = Movielens('1M', **kwargs) plot_long_tail(dataset=reader.get_URM_full().T.tocsr(), label=reader.DATASET_NAME, path=path) plot_lorenz_curve(datasets=URMs, labels=[ 'MovieLens ' + d if isinstance(d, str) else d.DATASET_NAME for d in datasets ],
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['KMP_WARNINGS'] = '0' simplefilter(action='ignore', category=UserWarning) simplefilter(action='ignore', category=FutureWarning) use_gpu = False verbose = False only_build = False transposed = False if not use_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = '' reader = Movielens(version='100K', split_ratio=[0.6, 0.2, 0.2], use_local=True, implicit=True, verbose=False, seed=seed) URM_train = reader.get_URM_train(transposed=transposed) URM_validation = reader.get_URM_validation(transposed=transposed) URM_test = reader.get_URM_test(transposed=transposed) evaluator = EvaluatorHoldout(URM_test, [5, 20], exclude_seen=True) evaluatorValidation = EvaluatorHoldout(URM_validation, [5], exclude_seen=True) gan = GANMF(URM_train, mode='user') gan.fit(num_factors=10, emb_dim=128, d_reg=1e-4,