Exemple #1
0
    def test_default_settings(self):
        config = Config(model='BPR', dataset='ml-100k')

        self.assertEqual(config['model'], 'BPR')
        self.assertEqual(config['dataset'], 'ml-100k')

        self.assertIsInstance(config['gpu_id'], int)
        self.assertIsInstance(config['use_gpu'], bool)
        self.assertIsInstance(config['seed'], int)
        self.assertIsInstance(config['state'], str)
        self.assertIsInstance(config['data_path'], str)

        self.assertIsInstance(config['epochs'], int)
        self.assertIsInstance(config['train_batch_size'], int)
        self.assertIsInstance(config['learner'], str)
        self.assertIsInstance(config['learning_rate'], float)
        self.assertIsInstance(config['training_neg_sample_num'], int)
        self.assertIsInstance(config['eval_step'], int)
        self.assertIsInstance(config['stopping_step'], int)
        self.assertIsInstance(config['checkpoint_dir'], str)

        self.assertIsInstance(config['eval_setting'], str)
        self.assertIsInstance(config['group_by_user'], bool)
        self.assertIsInstance(config['split_ratio'], list)
        self.assertIsInstance(config['leave_one_num'], int)
        self.assertIsInstance(config['real_time_process'], bool)
        self.assertIsInstance(config['metrics'], list)
        self.assertIsInstance(config['topk'], list)
        self.assertIsInstance(config['valid_metric'], str)
        self.assertIsInstance(config['eval_batch_size'], int)
Exemple #2
0
def objective_function(config_dict=None, config_file_list=None, saved=True):
    r""" The default objective_function used in HyperTuning

    Args:
        config_dict (dict): parameters dictionary used to modify experiment parameters
        config_file_list (list): config files used to modify experiment parameters
        saved (bool): whether to save the model
    """

    config = Config(config_dict=config_dict, config_file_list=config_file_list)
    init_seed(config['seed'], config['reproducibility'])
    logging.basicConfig(level=logging.ERROR)
    dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, dataset)
    model = get_model(config['model'])(config, train_data).to(config['device'])
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False, saved=saved)
    test_result = trainer.evaluate(test_data, load_best_model=saved)

    return {
        'best_valid_score': best_valid_score,
        'valid_score_bigger': config['valid_metric_bigger'],
        'best_valid_result': best_valid_result,
        'test_result': test_result
    }
Exemple #3
0
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True):
    r""" A fast running api, which includes the complete process of
    training and testing a model on a specified dataset

    Args:
        model (str): model name
        dataset (str): dataset name
        config_file_list (list): config files used to modify experiment parameters
        config_dict (dict): parameters dictionary used to modify experiment parameters
        saved (bool): whether to save the model
    """
    # configurations initialization
    config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])

    # logger initialization
    init_logger(config)
    logger = getLogger()

    logger.info(config)

    # dataset filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
    train_data, valid_data, test_data = data_preparation(config, dataset)
    print(train_data.dataset.item_feat)
    print(valid_data.dataset.item_feat)

    # model loading and initialization
    model = get_model(config['model'])(config, train_data).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

    # model training
    with profiler.profile(enabled=config["monitor"], with_stack=True, profile_memory=True, use_cuda=True) as prof:
        best_valid_score, best_valid_result = trainer.fit(
            train_data, valid_data, saved=saved, show_progress=config['show_progress']
        )
    if prof is not None:
        print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total'))

    # model evaluation
    with profiler.profile(enabled=config["monitor_eval"], with_stack=True, profile_memory=True, use_cuda=True) as prof:
        test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress'], cold_warm_distinct_eval=True)
    if prof is not None:
        print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total'))

    logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}')
    logger.info(set_color('test result', 'yellow') + f': {test_result}')

    return {
        'best_valid_score': best_valid_score,
        'valid_score_bigger': config['valid_metric_bigger'],
        'best_valid_result': best_valid_result,
        'test_result': test_result
    }
Exemple #4
0
def save_example():
    # configurations initialization
    config_dict = {'checkpoint_dir': '../saved'}
    config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])
    init_logger(config)

    # dataset filtering
    dataset = create_dataset(config)
    dataset.save('../saved/')

    # dataset splitting
    train_data, valid_data, test_data = data_preparation(config, dataset)
    save_split_dataloaders(config,
                           dataloaders=(train_data, valid_data, test_data))

    model = get_model(config['model'])(config, train_data).to(config['device'])

    # trainer loading and initialization
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

    # model training
    # the best model will be saved in here
    best_valid_score, best_valid_result = trainer.fit(
        train_data,
        valid_data,
        saved=True,
        show_progress=config['show_progress'])
Exemple #5
0
def run_recbole(model=None,
                dataset=None,
                config_file_list=None,
                config_dict=None,
                saved=True):
    r""" A fast running api, which includes the complete process of
    training and testing a model on a specified dataset

    Args:
        model (str): model name
        dataset (str): dataset name
        config_file_list (list): config files used to modify experiment parameters
        config_dict (dict): parameters dictionary used to modify experiment parameters
        saved (bool): whether to save the model
    """
    # configurations initialization
    config = Config(model=model,
                    dataset=dataset,
                    config_file_list=config_file_list,
                    config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])

    # logger initialization
    init_logger(config)
    logger = getLogger()

    logger.info(config)

    # dataset filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
    train_data, valid_data, test_data = data_preparation(config, dataset)

    # model loading and initialization
    model = get_model(config['model'])(config, train_data).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

    # model training
    best_valid_score, best_valid_result = trainer.fit(train_data,
                                                      valid_data,
                                                      saved=saved)

    # model evaluation
    test_result = trainer.evaluate(test_data, load_best_model=saved)

    logger.info('best valid result: {}'.format(best_valid_result))
    logger.info('test result: {}'.format(test_result))

    return {
        'best_valid_score': best_valid_score,
        'valid_score_bigger': config['valid_metric_bigger'],
        'best_valid_result': best_valid_result,
        'test_result': test_result
    }
Exemple #6
0
    def test_default_context_settings(self):
        config = Config(model='FM', dataset='ml-100k')

        self.assertEqual(config['eval_setting'], 'RO_RS')
        self.assertEqual(config['group_by_user'], False)
        self.assertEqual(config['metrics'], ['AUC', 'LogLoss'])
        self.assertEqual(config['valid_metric'], 'AUC')
        self.assertEqual(config['training_neg_sample_num'], 0)
Exemple #7
0
    def test_config_dict(self):
        config = Config(model='BPR',
                        dataset='ml-100k',
                        config_dict=parameters_dict)

        self.assertEqual(config['model'], 'BPR')
        self.assertEqual(config['learning_rate'], 0.2)
        self.assertEqual(config['topk'], [50, 100])
        self.assertEqual(config['eval_setting'], 'RO_RS,full')
Exemple #8
0
    def test_config_file_list(self):
        config = Config(model='BPR',
                        dataset='ml-100k',
                        config_file_list=config_file_list)

        self.assertEqual(config['model'], 'BPR')
        self.assertEqual(config['learning_rate'], 0.1)
        self.assertEqual(config['topk'], [5, 20])
        self.assertEqual(config['eval_setting'], 'TO_LS,full')
 def test_default_sequential_settings(self):
     para_dict = {'neg_sampling': None}
     config = Config(model='SASRec',
                     dataset='ml-100k',
                     config_dict=para_dict)
     self.assertEqual(config['eval_args']['split'],
                      {'LS': 'valid_and_test'})
     self.assertEqual(config['eval_args']['order'], 'TO')
     self.assertEqual(config['eval_args']['mode'], 'full')
     self.assertEqual(config['eval_args']['group_by'], 'user')
Exemple #10
0
    def test_priority(self):
        config = Config(model='BPR',
                        dataset='ml-100k',
                        config_file_list=config_file_list,
                        config_dict=parameters_dict)

        self.assertEqual(config['learning_rate'], 0.2)  # default, file, dict
        self.assertEqual(config['topk'], [50, 100])  # default, file, dict
        self.assertEqual(config['eval_setting'], 'TO_LS,full')  # default, file
        self.assertEqual(config['epochs'], 100)  # default, dict
Exemple #11
0
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True):
    r""" A fast running api, which includes the complete process of
    training and testing a model on a specified dataset

    Args:
        model (str, optional): Model name. Defaults to ``None``.
        dataset (str, optional): Dataset name. Defaults to ``None``.
        config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``.
        config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``.
        saved (bool, optional): Whether to save the model. Defaults to ``True``.
    """
    # configurations initialization
    config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])
    # logger initialization
    init_logger(config)
    logger = getLogger()

    logger.info(config)

    # dataset filtering
    dataset = create_dataset(config)
    if config['save_dataset']:
        dataset.save()
    logger.info(dataset)

    # dataset splitting
    train_data, valid_data, test_data = data_preparation(config, dataset)
    if config['save_dataloaders']:
        save_split_dataloaders(config, dataloaders=(train_data, valid_data, test_data))

    # model loading and initialization
    model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

    # model training
    best_valid_score, best_valid_result = trainer.fit(
        train_data, valid_data, saved=saved, show_progress=config['show_progress']
    )

    # model evaluation
    test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress'])

    logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}')
    logger.info(set_color('test result', 'yellow') + f': {test_result}')

    return {
        'best_valid_score': best_valid_score,
        'valid_score_bigger': config['valid_metric_bigger'],
        'best_valid_result': best_valid_result,
        'test_result': test_result
    }
    def test_default_context_settings(self):
        config = Config(model='FM', dataset='ml-100k')

        self.assertEqual(config['eval_args']['split'], {'RS': [0.8, 0.1, 0.1]})
        self.assertEqual(config['eval_args']['order'], 'RO')
        self.assertEqual(config['eval_args']['mode'], 'labeled')
        self.assertEqual(config['eval_args']['group_by'], None)

        self.assertEqual(config['metrics'], ['AUC', 'LogLoss'])
        self.assertEqual(config['valid_metric'], 'AUC')
        self.assertEqual(config['neg_sampling'], None)
Exemple #13
0
 def test_collect(self):
     config = Config('BPR', 'ml-100k', config_dict=parameters_dict)
     metrics = ['GAUC']
     rank_evaluator = RankEvaluator(config, metrics)
     self.assertEqual(
         get_collect_result(rank_evaluator,
                            case=0).squeeze().cpu().numpy().tolist(),
         np.array([
             0, (2 + 3) / 2 * 2, (1 + 2 + 3 + 4) / 4 * 3,
             1 + (2 + 3) / 2 + 4 + 5
         ]).tolist())
    def test_config_dict(self):
        config = Config(model='BPR',
                        dataset='ml-100k',
                        config_dict=parameters_dict)

        self.assertEqual(config['model'], 'BPR')
        self.assertEqual(config['learning_rate'], 0.2)
        self.assertEqual(config['topk'], [50, 100])
        self.assertEqual(config['eval_args']['split'], {'RS': [0.8, 0.1, 0.1]})
        self.assertEqual(config['eval_args']['order'], 'RO')
        self.assertEqual(config['eval_args']['mode'], 'full')
        self.assertEqual(config['eval_args']['group_by'], 'user')
    def test_config_file_list(self):
        config = Config(model='BPR',
                        dataset='ml-100k',
                        config_file_list=config_file_list)

        self.assertEqual(config['model'], 'BPR')
        self.assertEqual(config['learning_rate'], 0.1)
        self.assertEqual(config['topk'], [5, 20])
        self.assertEqual(config['eval_args']['split'],
                         {'LS': 'valid_and_test'})
        self.assertEqual(config['eval_args']['order'], 'TO')
        self.assertEqual(config['eval_args']['mode'], 'full')
        self.assertEqual(config['eval_args']['group_by'], 'user')
    def test_priority(self):
        config = Config(model='BPR',
                        dataset='ml-100k',
                        config_file_list=config_file_list,
                        config_dict=parameters_dict)

        self.assertEqual(config['learning_rate'], 0.2)  # default, file, dict
        self.assertEqual(config['topk'], [50, 100])  # default, file, dict
        self.assertEqual(config['eval_args']['split'],
                         {'LS': 'valid_and_test'})
        self.assertEqual(config['eval_args']['order'], 'TO')
        self.assertEqual(config['eval_args']['mode'], 'full')
        self.assertEqual(config['eval_args']['group_by'], 'user')
        self.assertEqual(config['epochs'], 100)  # default, dict
Exemple #17
0
def load_example():
    # configurations initialization
    config_dict = {'checkpoint_dir': '../saved'}
    config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])
    init_logger(config)
    logger = getLogger()

    with open('../saved/ml-100k-dataset.pth',
              'rb') as f:  # You can use your filtered data path here.
        dataset = pickle.load(f)

    train_data, valid_data, test_data = load_split_dataloaders(
        '../saved/ml-100k-for-BPR-dataloader.pth')
    # You can use your split data path here.

    model = get_model(config['model'])(config, train_data).to(config['device'])
    checkpoint = torch.load('../saved/BPR-Mar-20-2021_17-11-05.pth'
                            )  # Here you can replace it by your model path.
    model.load_state_dict(checkpoint['state_dict'])
    logger.info(model)
    logger.info(train_data.dataset)
    logger.info(valid_data.dataset)
    logger.info(test_data.dataset)
Exemple #18
0
    def test_default_sequential_settings(self):
        config = Config(model='SASRec', dataset='ml-100k')

        self.assertEqual(config['eval_setting'], 'TO_LS,full')
Exemple #19
0
"""


import torch
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.utils import get_model, init_seed
from recbole.utils.case_study import full_sort_topk, full_sort_scores


if __name__ == '__main__':
    # this part is to load saved model.
    config_dict = {
        # here you can set some parameters such as `gpu_id` and so on.
    }
    config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])
    dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, dataset)
    # Here you can also use `load_split_dataloaders` to load data.
    # The example code for `load_split_dataloaders` can be found in `save_and_load_example.py`.

    model = get_model(config['model'])(config, train_data)
    checkpoint = torch.load('RecBole/saved/BPR-Dec-08-2020_15-37-37.pth')  # Here you can replace it by your model path.
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    # uid_series = np.array([1, 2])  # internal user id series
    # or you can use dataset.token2id to transfer external user token to internal user id
    uid_series = dataset.token2id(dataset.uid_field, ['200'])
Exemple #20
0
def run_trial(model_name , dataset_name , hp_config = None , save_flag = False):

    if not hp_config:
        hp_config = {}
        tuning = False
    else:
        tuning = True

    commons.init_seeds()
    verbose = True
    verbose = (not tuning)
    model_class = statics.model_name_map[model_name]
    try:
        default_config = model_class.default_params
    except AttributeError:
        default_config = {}
        assert model_name in statics.recbole_models

    default_config.update(statics.datasets_params[dataset_name])
    default_config.update(hp_config)

    config = Config(model=model_class, dataset=dataset_name, config_dict=default_config)
    init_seed(config['seed'], config['reproducibility'])

    init_logger(config)
    logger = logging.getLogger()

    # logger initialization
    if verbose:
        logger.info(config)

    # dataset filtering
    dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, dataset)
    train_data = add_graph(train_data)

    if verbose:
        logger.info(dataset)

    model = model_class(config, train_data).to(commons.device)
    trainer = utils.get_trainer(config)(config, model)

    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data , verbose= verbose , show_progress=verbose)
    test_result = trainer.evaluate(test_data)

    if verbose:
        logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}')
        logger.info(set_color('test result', 'yellow') + f': {test_result}')

    metric = str.lower(config['valid_metric'])

    if save_flag:
        os.makedirs(os.path.join("bestmodels" , dataset_name , str(config["topk"])) , exist_ok=True)
        save_path = os.path.join("bestmodels" , dataset_name , str(config["topk"]) , "{}.pth".format(model_name))
    else:
        save_path = None

    if save_path:
        shutil.copyfile(trainer.saved_model_file , save_path)

    return {
        'metric' : config['valid_metric'],
        'best_valid_score': best_valid_score,
        'valid_score_bigger': config['valid_metric_bigger'],
        'best_valid_result': best_valid_result,
        'test_score': test_result[metric]
    }
Exemple #21
0
def new_dataloader(config_dict=None, config_file_list=None):
    config = Config(config_dict=config_dict, config_file_list=config_file_list)
    init_seed(config['seed'], config['reproducibility'])
    logging.basicConfig(level=logging.ERROR)
    dataset = create_dataset(config)
    return data_preparation(config, dataset)
import os
import sys
import unittest

sys.path.append(os.getcwd())
import numpy as np
from recbole.config import Config
from recbole.evaluator import metrics_dict, Collector

parameters_dict = {
    'model': 'BPR',
    'eval_args': {'split':{'RS':[0.8,0.1,0.1]}, 'order': 'RO', 'mode': 'uni100'},
    'metric_decimal_place': 4,
}

config = Config('BPR', 'ml-1m', config_dict=parameters_dict)


class MetricsTestCases(object):
    user_len_list0 = np.array([2, 3, 5])
    pos_len_list0 = np.array([1, 2, 3])
    pos_rank_sum0 = np.array([1, 4, 9])

    user_len_list1 = np.array([3, 6, 4])
    pos_len_list1 = np.array([1, 0, 4])
    pos_rank_sum1 = np.array([3, 0, 6])


def get_metric_result(name, case=0):
    Metric = metrics_dict[name](config)
    return Metric.metric_info(
Exemple #23
0
# @Email  : [email protected]

from recbole.config import Config

if __name__ == '__main__':

    parameters_dict = {
        'model': 'SASRec',
        'learning_rate': 0.2,
        'topk': [50, 100],
        'epochs': 100,
    }

    config_file_list = ['test_config_example.yaml']

    config = Config(model='BPR', dataset='ml-100k')

    # command line
    assert config['use_gpu'] == False
    assert config['valid_metric'] == 'Recall@10'
    assert config['split_ratio'] == [0.7, 0.2, 0.1]
    # assert config['metrics'] == ['Recall@10']     # bug

    # priority
    assert config['epochs'] == 200
    assert config['eval_setting'] == 'LO_RS'
    assert config['learning_rate'] == 0.3

    print('------------------------------------------------------------')
    print('OK')
Exemple #24
0
def new_dataset(config_dict=None, config_file_list=None):
    config = Config(config_dict=config_dict, config_file_list=config_file_list)
    init_seed(config['seed'], config['reproducibility'])
    logging.basicConfig(level=logging.ERROR)
    return create_dataset(config)
    args = get_args()

    # configurations initialization
    config_dict = {
        'USER_ID_FIELD': 'session_id',
        'load_col': None,
        'neg_sampling': None,
        'benchmark_filename': ['train', 'test'],
        'alias_of_item_id': ['item_id_list'],
        'topk': [20],
        'metrics': ['Recall', 'MRR'],
        'valid_metric': 'MRR@20'
    }

    config = Config(model=args.model,
                    dataset=f'{args.dataset}',
                    config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])

    # logger initialization
    init_logger(config)
    logger = getLogger()

    logger.info(args)
    logger.info(config)

    # dataset filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
Exemple #26
0
 def test_default_sequential_settings(self):
     para_dict = {'training_neg_sample_num': 0}
     config = Config(model='SASRec',
                     dataset='ml-100k',
                     config_dict=para_dict)
     self.assertEqual(config['eval_setting'], 'TO_LS,full')