def test_default_settings(self): config = Config(model='BPR', dataset='ml-100k') self.assertEqual(config['model'], 'BPR') self.assertEqual(config['dataset'], 'ml-100k') self.assertIsInstance(config['gpu_id'], int) self.assertIsInstance(config['use_gpu'], bool) self.assertIsInstance(config['seed'], int) self.assertIsInstance(config['state'], str) self.assertIsInstance(config['data_path'], str) self.assertIsInstance(config['epochs'], int) self.assertIsInstance(config['train_batch_size'], int) self.assertIsInstance(config['learner'], str) self.assertIsInstance(config['learning_rate'], float) self.assertIsInstance(config['training_neg_sample_num'], int) self.assertIsInstance(config['eval_step'], int) self.assertIsInstance(config['stopping_step'], int) self.assertIsInstance(config['checkpoint_dir'], str) self.assertIsInstance(config['eval_setting'], str) self.assertIsInstance(config['group_by_user'], bool) self.assertIsInstance(config['split_ratio'], list) self.assertIsInstance(config['leave_one_num'], int) self.assertIsInstance(config['real_time_process'], bool) self.assertIsInstance(config['metrics'], list) self.assertIsInstance(config['topk'], list) self.assertIsInstance(config['valid_metric'], str) self.assertIsInstance(config['eval_batch_size'], int)
def objective_function(config_dict=None, config_file_list=None, saved=True): r""" The default objective_function used in HyperTuning Args: config_dict (dict): parameters dictionary used to modify experiment parameters config_file_list (list): config files used to modify experiment parameters saved (bool): whether to save the model """ config = Config(config_dict=config_dict, config_file_list=config_file_list) init_seed(config['seed'], config['reproducibility']) logging.basicConfig(level=logging.ERROR) dataset = create_dataset(config) train_data, valid_data, test_data = data_preparation(config, dataset) model = get_model(config['model'])(config, train_data).to(config['device']) trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False, saved=saved) test_result = trainer.evaluate(test_data, load_best_model=saved) return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): r""" A fast running api, which includes the complete process of training and testing a model on a specified dataset Args: model (str): model name dataset (str): dataset name config_file_list (list): config files used to modify experiment parameters config_dict (dict): parameters dictionary used to modify experiment parameters saved (bool): whether to save the model """ # configurations initialization config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(config) # dataset filtering dataset = create_dataset(config) logger.info(dataset) # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) print(train_data.dataset.item_feat) print(valid_data.dataset.item_feat) # model loading and initialization model = get_model(config['model'])(config, train_data).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training with profiler.profile(enabled=config["monitor"], with_stack=True, profile_memory=True, use_cuda=True) as prof: best_valid_score, best_valid_result = trainer.fit( train_data, valid_data, saved=saved, show_progress=config['show_progress'] ) if prof is not None: print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total')) # model evaluation with profiler.profile(enabled=config["monitor_eval"], with_stack=True, profile_memory=True, use_cuda=True) as prof: test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress'], cold_warm_distinct_eval=True) if prof is not None: print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total')) logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') logger.info(set_color('test result', 'yellow') + f': {test_result}') return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def save_example(): # configurations initialization config_dict = {'checkpoint_dir': '../saved'} config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) init_logger(config) # dataset filtering dataset = create_dataset(config) dataset.save('../saved/') # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) save_split_dataloaders(config, dataloaders=(train_data, valid_data, test_data)) model = get_model(config['model'])(config, train_data).to(config['device']) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training # the best model will be saved in here best_valid_score, best_valid_result = trainer.fit( train_data, valid_data, saved=True, show_progress=config['show_progress'])
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): r""" A fast running api, which includes the complete process of training and testing a model on a specified dataset Args: model (str): model name dataset (str): dataset name config_file_list (list): config files used to modify experiment parameters config_dict (dict): parameters dictionary used to modify experiment parameters saved (bool): whether to save the model """ # configurations initialization config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(config) # dataset filtering dataset = create_dataset(config) logger.info(dataset) # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) # model loading and initialization model = get_model(config['model'])(config, train_data).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, saved=saved) # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved) logger.info('best valid result: {}'.format(best_valid_result)) logger.info('test result: {}'.format(test_result)) return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def test_default_context_settings(self): config = Config(model='FM', dataset='ml-100k') self.assertEqual(config['eval_setting'], 'RO_RS') self.assertEqual(config['group_by_user'], False) self.assertEqual(config['metrics'], ['AUC', 'LogLoss']) self.assertEqual(config['valid_metric'], 'AUC') self.assertEqual(config['training_neg_sample_num'], 0)
def test_config_dict(self): config = Config(model='BPR', dataset='ml-100k', config_dict=parameters_dict) self.assertEqual(config['model'], 'BPR') self.assertEqual(config['learning_rate'], 0.2) self.assertEqual(config['topk'], [50, 100]) self.assertEqual(config['eval_setting'], 'RO_RS,full')
def test_config_file_list(self): config = Config(model='BPR', dataset='ml-100k', config_file_list=config_file_list) self.assertEqual(config['model'], 'BPR') self.assertEqual(config['learning_rate'], 0.1) self.assertEqual(config['topk'], [5, 20]) self.assertEqual(config['eval_setting'], 'TO_LS,full')
def test_default_sequential_settings(self): para_dict = {'neg_sampling': None} config = Config(model='SASRec', dataset='ml-100k', config_dict=para_dict) self.assertEqual(config['eval_args']['split'], {'LS': 'valid_and_test'}) self.assertEqual(config['eval_args']['order'], 'TO') self.assertEqual(config['eval_args']['mode'], 'full') self.assertEqual(config['eval_args']['group_by'], 'user')
def test_priority(self): config = Config(model='BPR', dataset='ml-100k', config_file_list=config_file_list, config_dict=parameters_dict) self.assertEqual(config['learning_rate'], 0.2) # default, file, dict self.assertEqual(config['topk'], [50, 100]) # default, file, dict self.assertEqual(config['eval_setting'], 'TO_LS,full') # default, file self.assertEqual(config['epochs'], 100) # default, dict
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): r""" A fast running api, which includes the complete process of training and testing a model on a specified dataset Args: model (str, optional): Model name. Defaults to ``None``. dataset (str, optional): Dataset name. Defaults to ``None``. config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``. config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``. saved (bool, optional): Whether to save the model. Defaults to ``True``. """ # configurations initialization config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(config) # dataset filtering dataset = create_dataset(config) if config['save_dataset']: dataset.save() logger.info(dataset) # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) if config['save_dataloaders']: save_split_dataloaders(config, dataloaders=(train_data, valid_data, test_data)) # model loading and initialization model = get_model(config['model'])(config, train_data.dataset).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training best_valid_score, best_valid_result = trainer.fit( train_data, valid_data, saved=saved, show_progress=config['show_progress'] ) # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') logger.info(set_color('test result', 'yellow') + f': {test_result}') return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def test_default_context_settings(self): config = Config(model='FM', dataset='ml-100k') self.assertEqual(config['eval_args']['split'], {'RS': [0.8, 0.1, 0.1]}) self.assertEqual(config['eval_args']['order'], 'RO') self.assertEqual(config['eval_args']['mode'], 'labeled') self.assertEqual(config['eval_args']['group_by'], None) self.assertEqual(config['metrics'], ['AUC', 'LogLoss']) self.assertEqual(config['valid_metric'], 'AUC') self.assertEqual(config['neg_sampling'], None)
def test_collect(self): config = Config('BPR', 'ml-100k', config_dict=parameters_dict) metrics = ['GAUC'] rank_evaluator = RankEvaluator(config, metrics) self.assertEqual( get_collect_result(rank_evaluator, case=0).squeeze().cpu().numpy().tolist(), np.array([ 0, (2 + 3) / 2 * 2, (1 + 2 + 3 + 4) / 4 * 3, 1 + (2 + 3) / 2 + 4 + 5 ]).tolist())
def test_config_dict(self): config = Config(model='BPR', dataset='ml-100k', config_dict=parameters_dict) self.assertEqual(config['model'], 'BPR') self.assertEqual(config['learning_rate'], 0.2) self.assertEqual(config['topk'], [50, 100]) self.assertEqual(config['eval_args']['split'], {'RS': [0.8, 0.1, 0.1]}) self.assertEqual(config['eval_args']['order'], 'RO') self.assertEqual(config['eval_args']['mode'], 'full') self.assertEqual(config['eval_args']['group_by'], 'user')
def test_config_file_list(self): config = Config(model='BPR', dataset='ml-100k', config_file_list=config_file_list) self.assertEqual(config['model'], 'BPR') self.assertEqual(config['learning_rate'], 0.1) self.assertEqual(config['topk'], [5, 20]) self.assertEqual(config['eval_args']['split'], {'LS': 'valid_and_test'}) self.assertEqual(config['eval_args']['order'], 'TO') self.assertEqual(config['eval_args']['mode'], 'full') self.assertEqual(config['eval_args']['group_by'], 'user')
def test_priority(self): config = Config(model='BPR', dataset='ml-100k', config_file_list=config_file_list, config_dict=parameters_dict) self.assertEqual(config['learning_rate'], 0.2) # default, file, dict self.assertEqual(config['topk'], [50, 100]) # default, file, dict self.assertEqual(config['eval_args']['split'], {'LS': 'valid_and_test'}) self.assertEqual(config['eval_args']['order'], 'TO') self.assertEqual(config['eval_args']['mode'], 'full') self.assertEqual(config['eval_args']['group_by'], 'user') self.assertEqual(config['epochs'], 100) # default, dict
def load_example(): # configurations initialization config_dict = {'checkpoint_dir': '../saved'} config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) init_logger(config) logger = getLogger() with open('../saved/ml-100k-dataset.pth', 'rb') as f: # You can use your filtered data path here. dataset = pickle.load(f) train_data, valid_data, test_data = load_split_dataloaders( '../saved/ml-100k-for-BPR-dataloader.pth') # You can use your split data path here. model = get_model(config['model'])(config, train_data).to(config['device']) checkpoint = torch.load('../saved/BPR-Mar-20-2021_17-11-05.pth' ) # Here you can replace it by your model path. model.load_state_dict(checkpoint['state_dict']) logger.info(model) logger.info(train_data.dataset) logger.info(valid_data.dataset) logger.info(test_data.dataset)
def test_default_sequential_settings(self): config = Config(model='SASRec', dataset='ml-100k') self.assertEqual(config['eval_setting'], 'TO_LS,full')
""" import torch from recbole.config import Config from recbole.data import create_dataset, data_preparation from recbole.utils import get_model, init_seed from recbole.utils.case_study import full_sort_topk, full_sort_scores if __name__ == '__main__': # this part is to load saved model. config_dict = { # here you can set some parameters such as `gpu_id` and so on. } config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) dataset = create_dataset(config) train_data, valid_data, test_data = data_preparation(config, dataset) # Here you can also use `load_split_dataloaders` to load data. # The example code for `load_split_dataloaders` can be found in `save_and_load_example.py`. model = get_model(config['model'])(config, train_data) checkpoint = torch.load('RecBole/saved/BPR-Dec-08-2020_15-37-37.pth') # Here you can replace it by your model path. model.load_state_dict(checkpoint['state_dict']) model.eval() # uid_series = np.array([1, 2]) # internal user id series # or you can use dataset.token2id to transfer external user token to internal user id uid_series = dataset.token2id(dataset.uid_field, ['200'])
def run_trial(model_name , dataset_name , hp_config = None , save_flag = False): if not hp_config: hp_config = {} tuning = False else: tuning = True commons.init_seeds() verbose = True verbose = (not tuning) model_class = statics.model_name_map[model_name] try: default_config = model_class.default_params except AttributeError: default_config = {} assert model_name in statics.recbole_models default_config.update(statics.datasets_params[dataset_name]) default_config.update(hp_config) config = Config(model=model_class, dataset=dataset_name, config_dict=default_config) init_seed(config['seed'], config['reproducibility']) init_logger(config) logger = logging.getLogger() # logger initialization if verbose: logger.info(config) # dataset filtering dataset = create_dataset(config) train_data, valid_data, test_data = data_preparation(config, dataset) train_data = add_graph(train_data) if verbose: logger.info(dataset) model = model_class(config, train_data).to(commons.device) trainer = utils.get_trainer(config)(config, model) best_valid_score, best_valid_result = trainer.fit(train_data, valid_data , verbose= verbose , show_progress=verbose) test_result = trainer.evaluate(test_data) if verbose: logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') logger.info(set_color('test result', 'yellow') + f': {test_result}') metric = str.lower(config['valid_metric']) if save_flag: os.makedirs(os.path.join("bestmodels" , dataset_name , str(config["topk"])) , exist_ok=True) save_path = os.path.join("bestmodels" , dataset_name , str(config["topk"]) , "{}.pth".format(model_name)) else: save_path = None if save_path: shutil.copyfile(trainer.saved_model_file , save_path) return { 'metric' : config['valid_metric'], 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_score': test_result[metric] }
def new_dataloader(config_dict=None, config_file_list=None): config = Config(config_dict=config_dict, config_file_list=config_file_list) init_seed(config['seed'], config['reproducibility']) logging.basicConfig(level=logging.ERROR) dataset = create_dataset(config) return data_preparation(config, dataset)
import os import sys import unittest sys.path.append(os.getcwd()) import numpy as np from recbole.config import Config from recbole.evaluator import metrics_dict, Collector parameters_dict = { 'model': 'BPR', 'eval_args': {'split':{'RS':[0.8,0.1,0.1]}, 'order': 'RO', 'mode': 'uni100'}, 'metric_decimal_place': 4, } config = Config('BPR', 'ml-1m', config_dict=parameters_dict) class MetricsTestCases(object): user_len_list0 = np.array([2, 3, 5]) pos_len_list0 = np.array([1, 2, 3]) pos_rank_sum0 = np.array([1, 4, 9]) user_len_list1 = np.array([3, 6, 4]) pos_len_list1 = np.array([1, 0, 4]) pos_rank_sum1 = np.array([3, 0, 6]) def get_metric_result(name, case=0): Metric = metrics_dict[name](config) return Metric.metric_info(
# @Email : [email protected] from recbole.config import Config if __name__ == '__main__': parameters_dict = { 'model': 'SASRec', 'learning_rate': 0.2, 'topk': [50, 100], 'epochs': 100, } config_file_list = ['test_config_example.yaml'] config = Config(model='BPR', dataset='ml-100k') # command line assert config['use_gpu'] == False assert config['valid_metric'] == 'Recall@10' assert config['split_ratio'] == [0.7, 0.2, 0.1] # assert config['metrics'] == ['Recall@10'] # bug # priority assert config['epochs'] == 200 assert config['eval_setting'] == 'LO_RS' assert config['learning_rate'] == 0.3 print('------------------------------------------------------------') print('OK')
def new_dataset(config_dict=None, config_file_list=None): config = Config(config_dict=config_dict, config_file_list=config_file_list) init_seed(config['seed'], config['reproducibility']) logging.basicConfig(level=logging.ERROR) return create_dataset(config)
args = get_args() # configurations initialization config_dict = { 'USER_ID_FIELD': 'session_id', 'load_col': None, 'neg_sampling': None, 'benchmark_filename': ['train', 'test'], 'alias_of_item_id': ['item_id_list'], 'topk': [20], 'metrics': ['Recall', 'MRR'], 'valid_metric': 'MRR@20' } config = Config(model=args.model, dataset=f'{args.dataset}', config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(args) logger.info(config) # dataset filtering dataset = create_dataset(config) logger.info(dataset) # dataset splitting
def test_default_sequential_settings(self): para_dict = {'training_neg_sample_num': 0} config = Config(model='SASRec', dataset='ml-100k', config_dict=para_dict) self.assertEqual(config['eval_setting'], 'TO_LS,full')