def test_exception_on_search_space_file_not_exist(): with pytest.raises(FileNotFoundError) as e: hp_loader = HyperparameterLoader(KGEArgParser().get_args( ["-ssf", "not_exist_file"])) hp_loader.load_search_space("analogy") assert str(e.value) == "Cannot find configuration file not_exist_file"
def test_exception_on_search_space_file_with_wrong_extension(): custom_hyperparamter_file = os.path.join(os.path.dirname(__file__), "resource", "custom_hyperparams", "custom.txt") with pytest.raises(ValueError) as e: hp_loader = HyperparameterLoader(KGEArgParser().get_args(["-ssf", custom_hyperparamter_file])) hp_loader.load_search_space("analogy") assert str(e.value) == "Configuration file must have .yaml or .yml extension: %s" % custom_hyperparamter_file
def __init__(self, args): """store the information of database""" if args.model_name.lower() in [ "tucker", "conve", "convkb", "proje_pointwise" ]: raise Exception( "Model %s has not been supported in tuning hyperparameters!" % args.model) self.model_name = args.model_name self.knowledge_graph = KnowledgeGraph( dataset=args.dataset_name, custom_dataset_path=args.dataset_path) self.kge_args = KGEArgParser().get_args([]) self.kge_args.dataset_name = args.dataset_name self.kge_args.debug = args.debug self.kge_args.device = args.device self.max_evals = args.max_number_trials if not args.debug else 3 self.config_obj, self.model_obj = Importer().import_model_config( self.model_name.lower()) self.config_local = self.config_obj(self.kge_args) self.search_space = HyperparameterLoader(args).load_search_space( self.model_name.lower()) self._best_result = None self.trainer = None
def test_load_default_hyperparameter_file(): hp_loader = HyperparameterLoader(KGEArgParser().get_args([])) hyperparams = hp_loader.load_hyperparameter("freebase15k", "analogy") search_space = hp_loader.load_search_space("analogy") assert hyperparams["learning_rate"] == 0.1 assert hyperparams["hidden_size"] == 200 assert str(search_space["epochs"].inputs()[1]) == "0 Literal{10}"
def test_load_custom_hyperparameter_file(): custom_hyperparamter_file = os.path.join(os.path.dirname(__file__), "resource", "custom_hyperparams", "custom.yaml") hp_loader = HyperparameterLoader(KGEArgParser().get_args(["-hpf", custom_hyperparamter_file, "-ssf", custom_hyperparamter_file])) hyperparams = hp_loader.load_hyperparameter("freebase15k", "analogy") search_space = hp_loader.load_search_space("analogy") assert hyperparams["learning_rate"] == 0.01 assert hyperparams["hidden_size"] == 200 assert str(search_space["epochs"].inputs()[1]) == "0 Literal{100}"
def test_search_space_loader(model_name): knowledge_graph = KnowledgeGraph(dataset="freebase15k") knowledge_graph.prepare_data() # getting the customized configurations from the command-line arguments. args = KGEArgParser().get_args([]) hyperparams = HyperparameterLoader(args).load_search_space(model_name) assert hyperparams["epochs"] is not None
def __init__(self, args): for arg_name in vars(args): self.__dict__[arg_name] = getattr(args, arg_name) # Training and evaluating related variables self.hits = [1, 3, 5, 10] self.disp_result = False self.patience = 3 # should make this configurable as well. # Visualization related, # p.s. the visualizer is disable for most of the KGE methods for now. self.disp_triple_num = 20 self.plot_training_result = True self.plot_testing_result = True # Knowledge Graph Information self.knowledge_graph = KnowledgeGraph( dataset=args.dataset_name, custom_dataset_path=args.dataset_path) for key in self.knowledge_graph.kg_meta.__dict__: self.__dict__[key] = self.knowledge_graph.kg_meta.__dict__[key] # The results of training will be stored in the following folders # which are relative to the parent folder (the path of the dataset). dataset_path = self.knowledge_graph.dataset.dataset_path self.path_tmp = dataset_path / 'intermediate' self.path_tmp.mkdir(parents=True, exist_ok=True) self.path_result = dataset_path / 'results' self.path_result.mkdir(parents=True, exist_ok=True) self.path_figures = dataset_path / 'figures' self.path_figures.mkdir(parents=True, exist_ok=True) self.path_embeddings = dataset_path / 'embeddings' self.path_embeddings.mkdir(parents=True, exist_ok=True) if args.exp is True: paper_params = HyperparameterLoader(args).load_hyperparameter( args.dataset_name, args.model_name) for key, value in paper_params.items(): self.__dict__[ key] = value # copy all the setting from the paper.
class BaysOptimizer: """Bayesian optimizer class for tuning hyperparameter. This class implements the Bayesian Optimizer for tuning the hyper-parameter. Args: args (object): The Argument Parser object providing arguments. name_dataset (str): The name of the dataset. sampling (str): sampling to be used for generating negative triples Examples: >>> from pykg2vec.common import KGEArgParser >>> from pykg2vec.utils.bayesian_optimizer import BaysOptimizer >>> model = Complex() >>> args = KGEArgParser().get_args(sys.argv[1:]) >>> bays_opt = BaysOptimizer(args=args) >>> bays_opt.optimize() """ _logger = Logger().get_logger(__name__) def __init__(self, args): """store the information of database""" if args.model_name.lower() in [ "conve", "convkb", "proje_pointwise", "interacte", "hyper", "acre" ]: raise Exception( "Model %s has not been supported in tuning hyperparameters!" % args.model) self.model_name = args.model_name self.knowledge_graph = KnowledgeGraph( dataset=args.dataset_name, custom_dataset_path=args.dataset_path) self.kge_args = args self.max_evals = args.max_number_trials if not args.debug else 3 self.config_obj, self.model_obj = Importer().import_model_config( self.model_name.lower()) self.config_local = self.config_obj(self.kge_args) self.search_space = HyperparameterLoader(args).load_search_space( self.model_name.lower()) self._best_result = None self.trainer = None def optimize(self): """Function that performs bayesian optimization""" trials = Trials() self._best_result = fmin(fn=self._get_loss, space=self.search_space, trials=trials, algo=tpe.suggest, max_evals=self.max_evals) columns = list(self.search_space.keys()) results = pd.DataFrame(columns=['iteration'] + columns + ['loss']) for idx, trial in enumerate(trials.trials): row = [idx] translated_eval = space_eval( self.search_space, {k: v[0] for k, v in trial['misc']['vals'].items()}) for k in columns: row.append(translated_eval[k]) row.append(trial['result']['loss']) results.loc[idx] = row path = self.config_local.path_result / self.model_name path.mkdir(parents=True, exist_ok=True) results.to_csv(str(path / "trials.csv"), index=False) self._logger.info(results) self._logger.info('Found golden setting:') self._logger.info(space_eval(self.search_space, self._best_result)) def return_best(self): """Function to return the best hyper-parameters""" assert self._best_result is not None, 'Cannot find golden setting. Has optimize() been called?' return space_eval(self.search_space, self._best_result) def _get_loss(self, params): """Function that defines and acquires the loss""" # copy the hyperparameters to trainer config and hyperparameter set. for key, value in params.items(): self.config_local.__dict__[key] = value self.config_local.__dict__['device'] = self.kge_args.device model = self.model_obj(**self.config_local.__dict__) self.trainer = Trainer(model, self.config_local) # configure common setting for a tuning training. self.config_local.disp_result = False self.config_local.disp_summary = False self.config_local.save_model = False # do not overwrite test numbers if set if self.config_local.test_num is None: self.config_local.test_num = 1000 if self.kge_args.debug: self.config_local.epochs = 1 # start the trial. self.trainer.build_model() loss = self.trainer.tune_model() return {'loss': loss, 'status': STATUS_OK}