def objective_function(config_dict=None, config_file_list=None, saved=True): r""" The default objective_function used in HyperTuning Args: config_dict (dict): parameters dictionary used to modify experiment parameters config_file_list (list): config files used to modify experiment parameters saved (bool): whether to save the model """ config = Config(config_dict=config_dict, config_file_list=config_file_list) init_seed(config['seed'], config['reproducibility']) logging.basicConfig(level=logging.ERROR) dataset = create_dataset(config) train_data, valid_data, test_data = data_preparation(config, dataset) model = get_model(config['model'])(config, train_data).to(config['device']) trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False, saved=saved) test_result = trainer.evaluate(test_data, load_best_model=saved) return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def save_example(): # configurations initialization config_dict = {'checkpoint_dir': '../saved'} config = Config(model='BPR', dataset='ml-100k', config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) init_logger(config) # dataset filtering dataset = create_dataset(config) dataset.save('../saved/') # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) save_split_dataloaders(config, dataloaders=(train_data, valid_data, test_data)) model = get_model(config['model'])(config, train_data).to(config['device']) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training # the best model will be saved in here best_valid_score, best_valid_result = trainer.fit( train_data, valid_data, saved=True, show_progress=config['show_progress'])
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): r""" A fast running api, which includes the complete process of training and testing a model on a specified dataset Args: model (str): model name dataset (str): dataset name config_file_list (list): config files used to modify experiment parameters config_dict (dict): parameters dictionary used to modify experiment parameters saved (bool): whether to save the model """ # configurations initialization config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(config) # dataset filtering dataset = create_dataset(config) logger.info(dataset) # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) print(train_data.dataset.item_feat) print(valid_data.dataset.item_feat) # model loading and initialization model = get_model(config['model'])(config, train_data).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training with profiler.profile(enabled=config["monitor"], with_stack=True, profile_memory=True, use_cuda=True) as prof: best_valid_score, best_valid_result = trainer.fit( train_data, valid_data, saved=saved, show_progress=config['show_progress'] ) if prof is not None: print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total')) # model evaluation with profiler.profile(enabled=config["monitor_eval"], with_stack=True, profile_memory=True, use_cuda=True) as prof: test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress'], cold_warm_distinct_eval=True) if prof is not None: print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total')) logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') logger.info(set_color('test result', 'yellow') + f': {test_result}') return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): r""" A fast running api, which includes the complete process of training and testing a model on a specified dataset Args: model (str): model name dataset (str): dataset name config_file_list (list): config files used to modify experiment parameters config_dict (dict): parameters dictionary used to modify experiment parameters saved (bool): whether to save the model """ # configurations initialization config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(config) # dataset filtering dataset = create_dataset(config) logger.info(dataset) # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) # model loading and initialization model = get_model(config['model'])(config, train_data).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, saved=saved) # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved) logger.info('best valid result: {}'.format(best_valid_result)) logger.info('test result: {}'.format(test_result)) return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): r""" A fast running api, which includes the complete process of training and testing a model on a specified dataset Args: model (str, optional): Model name. Defaults to ``None``. dataset (str, optional): Dataset name. Defaults to ``None``. config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``. config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``. saved (bool, optional): Whether to save the model. Defaults to ``True``. """ # configurations initialization config = Config(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) init_seed(config['seed'], config['reproducibility']) # logger initialization init_logger(config) logger = getLogger() logger.info(config) # dataset filtering dataset = create_dataset(config) if config['save_dataset']: dataset.save() logger.info(dataset) # dataset splitting train_data, valid_data, test_data = data_preparation(config, dataset) if config['save_dataloaders']: save_split_dataloaders(config, dataloaders=(train_data, valid_data, test_data)) # model loading and initialization model = get_model(config['model'])(config, train_data.dataset).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training best_valid_score, best_valid_result = trainer.fit( train_data, valid_data, saved=saved, show_progress=config['show_progress'] ) # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') logger.info(set_color('test result', 'yellow') + f': {test_result}') return { 'best_valid_score': best_valid_score, 'valid_score_bigger': config['valid_metric_bigger'], 'best_valid_result': best_valid_result, 'test_result': test_result }
test_data = get_dataloader(config, 'test')(config, new_test_dataset, None, shuffle=False) else: train_data = get_dataloader(config, 'train')(config, train_dataset, None, shuffle=True) test_data = get_dataloader(config, 'test')(config, test_dataset, None, shuffle=False) # model loading and initialization model = get_model(config['model'])(config, train_data.dataset).to(config['device']) logger.info(model) # trainer loading and initialization trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training and evaluation test_score, test_result = trainer.fit( train_data, test_data, saved=True, show_progress=config['show_progress']) logger.info(set_color('test result', 'yellow') + f': {test_result}')