class BaysOptimizer(object): """Bayesian optimizer class for tuning hyperparameter. This class implements the Bayesian Optimizer for tuning the hyper-parameter. Args: args (object): The Argument Parser object providing arguments. name_dataset (str): The name of the dataset. sampling (str): sampling to be used for generating negative triples Examples: >>> from pykg2vec.config.hyperparams import KGETuneArgParser >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer >>> model = Complex() >>> args = KGETuneArgParser().get_args(sys.argv[1:]) >>> bays_opt = BaysOptimizer(args=args) >>> bays_opt.optimize() """ def __init__(self, args=None): """store the information of database""" model_name = args.model.lower() self.args = args self.knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, negative_sample=args.sampling) hyper_params = None try: self.model_obj = getattr( importlib.import_module(model_path + ".%s" % modelMap[model_name]), modelMap[model_name]) self.config_obj = getattr(importlib.import_module(config_path), configMap[model_name]) hyper_params = getattr(importlib.import_module(hyper_param_path), hypMap[model_name])() except ModuleNotFoundError: print("%s not implemented! Select from: %s" % (model_name, ' '.join(map(str, modelMap.values())))) config = self.config_obj() config.data = args.dataset_name self.trainer = Trainer(model=self.model_obj(config), debug=self.args.debug, tuning=True) self.search_space = self.define_search_space(hyper_params) self.max_evals = self.args.max_number_trials if not self.args.debug else 1 def define_search_space(self, hyper_params): """Function to perform search space addition""" space = { k: hp.choice(k, v) for k, v in hyper_params.__dict__.items() if not k.startswith('__') and not callable(k) } return space def optimize(self): """Function that performs bayesian optimization""" space = self.search_space trials = Trials() best_result = fmin(fn=self.get_loss, space=space, algo=tpe.suggest, max_evals=self.max_evals, trials=trials) columns = list(space.keys()) results = pd.DataFrame(columns=['iteration'] + columns + ['loss']) for idx, trial in enumerate(trials.trials): row = [] row.append(idx) translated_eval = space_eval( self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()}) for k in columns: row.append(translated_eval[k]) row.append(trial['result']['loss']) results.loc[idx] = row path = self.trainer.config.result / self.trainer.model.model_name path.mkdir(parents=True, exist_ok=True) results.to_csv(str(path / "trials.csv"), index=False) print(results) print('Found Golden Setting:') pprint(space_eval(space, best_result)) def get_loss(self, params): """Function that defines and acquires the loss""" self.trainer.config.L1_flag = params['L1_flag'] self.trainer.config.batch_size = params['batch_size'] self.trainer.config.epochs = params['epochs'] if 'hidden_size' in params: self.trainer.config.hidden_size = params['hidden_size'] if 'ent_hidden_size' in params: self.trainer.config.ent_hidden_size = params['ent_hidden_size'] if 'rel_hidden_size' in params: self.trainer.config.rel_hidden_size = params['rel_hidden_size'] self.trainer.config.learning_rate = params['learning_rate'] self.trainer.config.margin = params['margin'] self.trainer.config.disp_result = False self.trainer.config.disp_summary = False self.trainer.config.save_model = False self.trainer.config.debug = True self.trainer.config.test_num = 1000 self.trainer.build_model() self.trainer.summary_hyperparameter() loss = self.trainer.tune_model() # loss = self.trainer.train_model(tuning=True) return {'loss': loss, 'status': STATUS_OK}
class BaysOptimizer: """Bayesian optimizer class for tuning hyperparameter. This class implements the Bayesian Optimizer for tuning the hyper-parameter. Args: args (object): The Argument Parser object providing arguments. name_dataset (str): The name of the dataset. sampling (str): sampling to be used for generating negative triples Examples: >>> from pykg2vec.common import KGEArgParser >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer >>> model = Complex() >>> args = KGEArgParser().get_args(sys.argv[1:]) >>> bays_opt = BaysOptimizer(args=args) >>> bays_opt.optimize() """ _logger = Logger().get_logger(__name__) def __init__(self, args): """store the information of database""" if args.model_name.lower() in [ "conve", "convkb", "proje_pointwise", "interacte", "hyper", "acre" ]: raise Exception( "Model %s has not been supported in tuning hyperparameters!" % args.model) self.model_name = args.model_name self.knowledge_graph = KnowledgeGraph( dataset=args.dataset_name, custom_dataset_path=args.dataset_path) self.kge_args = args self.max_evals = args.max_number_trials if not args.debug else 3 self.config_obj, self.model_obj = Importer().import_model_config( self.model_name.lower()) self.config_local = self.config_obj(self.kge_args) self.search_space = HyperparameterLoader(args).load_search_space( self.model_name.lower()) self._best_result = None self.trainer = None def optimize(self): """Function that performs bayesian optimization""" trials = Trials() self._best_result = fmin(fn=self._get_loss, space=self.search_space, trials=trials, algo=tpe.suggest, max_evals=self.max_evals) columns = list(self.search_space.keys()) results = pd.DataFrame(columns=['iteration'] + columns + ['loss']) for idx, trial in enumerate(trials.trials): row = [idx] translated_eval = space_eval( self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()}) for k in columns: row.append(translated_eval[k]) row.append(trial['result']['loss']) results.loc[idx] = row path = self.config_local.path_result / self.model_name path.mkdir(parents=True, exist_ok=True) results.to_csv(str(path / "trials.csv"), index=False) self._logger.info(results) self._logger.info('Found golden setting:') self._logger.info(space_eval(self.search_space, self._best_result)) def return_best(self): """Function to return the best hyper-parameters""" assert self._best_result is not None, 'Cannot find golden setting. Has optimize() been called?' return space_eval(self.search_space, self._best_result) def _get_loss(self, params): """Function that defines and acquires the loss""" # copy the hyperparameters to trainer config and hyperparameter set. for key, value in params.items(): self.config_local.__dict__[key] = value self.config_local.__dict__['device'] = self.kge_args.device model = self.model_obj(**self.config_local.__dict__) self.trainer = Trainer(model, self.config_local) # configure common setting for a tuning training. self.config_local.disp_result = False self.config_local.disp_summary = False self.config_local.save_model = False # do not overwrite test numbers if set if self.config_local.test_num is None: self.config_local.test_num = 1000 if self.kge_args.debug: self.config_local.epochs = 1 # start the trial. self.trainer.build_model() loss = self.trainer.tune_model() return {'loss': loss, 'status': STATUS_OK}
class BaysOptimizer(object): def __init__(self, name_dataset='Freebase15k', sampling="uniform", args=None): """store the information of database""" model_name = args.model.lower() self.args = args self.knowledge_graph = KnowledgeGraph(dataset=name_dataset, negative_sample=sampling) hyper_params = None try: self.model_obj = getattr(importlib.import_module(model_path + ".%s" % modelMap[model_name]), modelMap[model_name]) self.config_obj = getattr(importlib.import_module(config_path), configMap[model_name]) hyper_params = getattr(importlib.import_module(hyper_param_path), hypMap[model_name])() except ModuleNotFoundError: print("%s not implemented! Select from: %s" % (model_name, ' '.join(map(str, modelMap.values())))) config = self.config_obj() config.data=name_dataset # config.set_dataset(name_dataset) self.trainer = Trainer(model=self.model_obj(config), debug=self.args.debug, tuning=True) self.search_space = self.define_search_space(hyper_params) def define_search_space(self, hyper_params): space = {k: hp.choice(k, v) for k, v in hyper_params.__dict__.items() if not k.startswith('__') and not callable(k)} return space def optimize(self): space = self.search_space trials = Trials() best_result = fmin(fn=self.get_loss, space=space, algo=tpe.suggest, max_evals=2, trials=trials) columns = list(space.keys()) results = pd.DataFrame(columns=['iteration'] + columns + ['loss']) for idx, trial in enumerate(trials.trials): row = [] row.append(idx) translated_eval = space_eval(self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()}) for k in columns: row.append(translated_eval[k]) row.append(trial['result']['loss']) results.loc[idx] = row path = self.trainer.config.result / self.trainer.model.model_name path.mkdir(parents=True, exist_ok=True) results.to_csv(str(path / "trials.csv"), index=False) print(results) print('Found Golden Setting:') pprint(space_eval(space, best_result)) def get_loss(self, params): self.trainer.config.L1_flag = params['L1_flag'] self.trainer.config.batch_size = params['batch_size'] self.trainer.config.epochs = params['epochs'] self.trainer.config.hidden_size = params['hidden_size'] self.trainer.config.learning_rate = params['learning_rate'] self.trainer.config.margin = params['margin'] self.trainer.config.disp_result = False self.trainer.config.disp_summary = False self.trainer.config.save_model = False self.trainer.config.debug = True self.trainer.config.test_num = 1000 self.trainer.build_model() self.trainer.summary_hyperparameter() loss = self.trainer.tune_model() # loss = self.trainer.train_model(tuning=True) return {'loss': loss, 'status': STATUS_OK}
class BaysOptimizer(object): """Bayesian optimizer class for tuning hyperparameter. This class implements the Bayesian Optimizer for tuning the hyper-parameter. Args: args (object): The Argument Parser object providing arguments. name_dataset (str): The name of the dataset. sampling (str): sampling to be used for generating negative triples Examples: >>> from pykg2vec.config.hyperparams import KGETuneArgParser >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer >>> model = Complex() >>> args = KGETuneArgParser().get_args(sys.argv[1:]) >>> bays_opt = BaysOptimizer(args=args) >>> bays_opt.optimize() """ _logger = Logger().get_logger(__name__) def __init__(self, args=None): """store the information of database""" if args.model.lower() in ["tucker", "tucker_v2", "conve", "convkb", "proje_pointwise"]: raise Exception("Model %s has not been supported in tuning hyperparameters!" % args.model) model_name = args.model.lower() self.args = args self.knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, custom_dataset_path=args.dataset_path) hyper_params = None try: self.model_obj = getattr(importlib.import_module(model_path + ".%s" % moduleMap[model_name]), modelMap[model_name]) self.config_obj = getattr(importlib.import_module(config_path), configMap[model_name]) hyper_params = getattr(importlib.import_module(hyper_param_path), hypMap[model_name])() except ModuleNotFoundError: self._logger.error("%s not implemented! Select from: %s" % \ (model_name, ' '.join(map(str, modelMap.values())))) from pykg2vec.config.config import KGEArgParser kge_args = KGEArgParser().get_args([]) kge_args.dataset_name = args.dataset_name kge_args.debug = self.args.debug config = self.config_obj(kge_args) model = self.model_obj(config) self.trainer = Trainer(model) self.search_space = hyper_params.search_space self.max_evals = self.args.max_number_trials if not self.args.debug else 1 def optimize(self): """Function that performs bayesian optimization""" trials = Trials() self.best_result = fmin(fn=self.get_loss, space=self.search_space, trials=trials, algo=tpe.suggest, max_evals=self.max_evals) columns = list(self.search_space.keys()) results = pd.DataFrame(columns=['iteration'] + columns + ['loss']) for idx, trial in enumerate(trials.trials): row = [] row.append(idx) translated_eval = space_eval(self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()}) for k in columns: row.append(translated_eval[k]) row.append(trial['result']['loss']) results.loc[idx] = row path = self.trainer.config.path_result / self.trainer.model.model_name path.mkdir(parents=True, exist_ok=True) results.to_csv(str(path / "trials.csv"), index=False) self._logger.info(results) self._logger.info('Found Golden Setting:') self._logger.info(space_eval(self.search_space, self.best_result)) def return_best(self): """Function to return the best hyper-parameters""" return space_eval(self.search_space, self.best_result) def get_loss(self, params): """Function that defines and acquires the loss""" # copy the hyperparameters to trainer config and hyperparameter set. for key, value in params.items(): self.trainer.config.__dict__[key] = value self.trainer.config.hyperparameters[key] = value # configure common setting for a tuning training. self.trainer.config.disp_result = False self.trainer.config.disp_summary = False self.trainer.config.save_model = False # do not overwrite test numbers if set if self.trainer.config.test_num is None: self.trainer.config.test_num = 1000 if self.args.debug: self.trainer.config.epochs = 1 self.trainer.config.hyperparameters['epochs'] = 1 # start the trial. self.trainer.build_model() loss = self.trainer.tune_model() return {'loss': loss, 'status': STATUS_OK}