def _run_ablation_experiments( directories: List[Tuple[str, str]], best_replicates: Optional[int] = None, dry_run: bool = False, move_to_cpu: bool = True, discard_replicates: bool = False, ): """Run ablation experiments.""" if dry_run: return from pykeen.hpo import hpo_pipeline_from_path for output_directory, rv_config_path in directories: hpo_pipeline_result = hpo_pipeline_from_path(rv_config_path) hpo_pipeline_result.save_to_directory(output_directory) if not best_replicates: continue best_pipeline_dir = os.path.join(output_directory, 'best_pipeline') os.makedirs(best_pipeline_dir, exist_ok=True) logger.info('Re-training best pipeline and saving artifacts in %s', best_pipeline_dir) hpo_pipeline_result.replicate_best_pipeline( replicates=best_replicates, move_to_cpu=move_to_cpu, save_replicates=not discard_replicates, directory=best_pipeline_dir, )
def _run_ablation_experiments( directories: Sequence[Tuple[Union[str, pathlib.Path], Union[str, pathlib.Path]]], best_replicates: Optional[int] = None, dry_run: bool = False, move_to_cpu: bool = True, discard_replicates: bool = False, ) -> None: """Run ablation experiments.""" if dry_run: return from pykeen.hpo import hpo_pipeline_from_path for output_directory, rv_config_path in directories: if isinstance(output_directory, str): output_directory = pathlib.Path(output_directory).resolve() hpo_pipeline_result = hpo_pipeline_from_path(rv_config_path) hpo_pipeline_result.save_to_directory(output_directory) if not best_replicates: continue best_pipeline_dir = output_directory.joinpath('best_pipeline') best_pipeline_dir.mkdir(exist_ok=True, parents=True) logger.info('Re-training best pipeline and saving artifacts in %s', best_pipeline_dir) hpo_pipeline_result.replicate_best_pipeline( replicates=best_replicates, move_to_cpu=move_to_cpu, save_replicates=not discard_replicates, directory=best_pipeline_dir, )
def ablation( path: str, directory: Optional[str], dry_run: bool, best_replicates: int, save_artifacts: bool, move_to_cpu: bool, discard_replicates: bool, ) -> None: """Generate a set of HPO configurations. A sample file can be run with ``pykeen experiment ablation tests/resources/hpo_complex_nations.json``. """ from pykeen.ablation import prepare_ablation datetime = time.strftime('%Y-%m-%d-%H-%M') directory = os.path.join(directory, f'{datetime}_{uuid4()}') directories = prepare_ablation(path=path, directory=directory, save_artifacts=save_artifacts) if dry_run: return sys.exit(0) from pykeen.hpo import hpo_pipeline_from_path for output_directory, rv_config_path in directories: hpo_pipeline_result = hpo_pipeline_from_path(rv_config_path) hpo_pipeline_result.save_to_directory(output_directory) if not best_replicates: continue best_pipeline_dir = os.path.join(output_directory, 'best_pipeline') os.makedirs(best_pipeline_dir, exist_ok=True) click.echo( f'Re-training best pipeline and saving artifacts in {best_pipeline_dir}' ) hpo_pipeline_result.replicate_best_pipeline( replicates=best_replicates, move_to_cpu=move_to_cpu, save_replicates=not discard_replicates, directory=best_pipeline_dir, )
def optimize(path: str, directory: str): """Run a single HPO experiment.""" from pykeen.hpo import hpo_pipeline_from_path hpo_pipeline_result = hpo_pipeline_from_path(path) hpo_pipeline_result.save_to_directory(directory)
def ablation_pipeline( datasets: Union[str, List[str]], models: Union[str, List[str]], losses: Union[str, List[str]], optimizers: Union[str, List[str]], training_loops: Union[str, List[str]], *, create_inverse_triples: Union[bool, List[bool]] = False, regularizers: Union[None, str, List[str]] = None, model_to_model_kwargs: Optional[Mapping2D] = None, model_to_model_kwargs_ranges: Optional[Mapping2D] = None, model_to_trainer_to_training_kwargs: Optional[Mapping3D] = None, model_to_trainer_to_training_kwargs_ranges: Optional[Mapping3D] = None, ablation_config: Optional[Mapping3D] = None, evaluator: Optional[str] = None, optuna_config: Optional[Mapping[str, Any]] = None, evaluator_kwargs: Optional[Mapping[str, Any]] = None, evaluation_kwargs: Optional[Mapping[str, Any]] = None, directory: Optional[str] = None, dry_run: bool = False, best_replicates: Optional[int] = None, save_artifacts: bool = True, move_to_cpu: bool = True, discard_replicates: bool = False, ) -> None: """Generate a set of HPO configurations. A sample file can be run with``pykeen experiment ablation tests/resources/hpo_complex_nations.json``. :param datasets: A dataset name or list of dataset names :param models: A model name or list of model names :param losses: A loss function name or list of loss function names :param optimizers: An optimizer name or list of optimizer names :param training_loops: A training loop name or list of training loop names :param create_inverse_triples: Either a boolean for a single entry or a list of booleans :param regularizers: A regularizer name, list of regularizer names, or None if no regularizer is desired. Defaults to None. :param evaluator: The name of the evaluator to be used. Defaults to rank-based evaluator. :param evaluator_kwargs: The keyword arguments passed to the evaluator (in the pipeline) :param evaluation_kwargs: The keyword arguments passed during evaluation (in the pipeline) :param model_to_model_kwargs: A mapping from model name to dictionaries of default keyword arguments for the instantiation of that model :param model_to_model_kwargs_ranges: A mapping from model name to dictionaries of keyword argument ranges for that model to be used in HPO. :param model_to_trainer_to_training_kwargs: A mapping from model name to a mapping of trainer name to a mapping of default keyword arguments for the instantiation of that trainer. This is useful becuase for some models, you might want to set the number of epochs differently. :param model_to_trainer_to_training_kwargs_ranges: A mapping from model name to a mapping of trainer name to a mapping of keyword arguments for that trainer to be used in HPO. :param ablation_config: Additional third-order and fourth-order ablation configuration for all other ablation keys to models to either kwargs or kwarg ranges :param optuna_config: Configuration passed to optuna for HPO over all ablation studies :param directory: The directory in which the experimental artifacts will be saved. :param dry_run: Defines whether only the configurations for the single experiments should be created without running them. :param best_replicates: Defines how often the final model should be re-trained and evaluated based on the best hyper-parameters enabling to measure the variance in performance. :param save_artifacts: Defines, whether each trained model sampled during HPO should be saved. :param move_to_cpu: Defines, whether a replicate of the best model should be moved to CPU. We recommend to set this flag to 'True' to avoid unnecessary GPU usage. :param discard_replicates: Defines, whether the best model should be discarded after training and evaluation. """ datetime = time.strftime('%Y-%m-%d-%H-%M') directory = os.path.join(directory, f'{datetime}_{uuid4()}') directories = prepare_ablation( datasets=datasets, create_inverse_triples=create_inverse_triples, models=models, model_to_model_kwargs=model_to_model_kwargs, model_to_model_kwargs_ranges=model_to_model_kwargs_ranges, model_to_trainer_to_training_kwargs=model_to_trainer_to_training_kwargs, model_to_trainer_to_training_kwargs_ranges= model_to_trainer_to_training_kwargs_ranges, losses=losses, regularizers=regularizers, optimizers=optimizers, training_loops=training_loops, evaluator=evaluator, optuna_config=optuna_config, ablation_config=ablation_config, evaluator_kwargs=evaluator_kwargs, evaluation_kwargs=evaluation_kwargs, directory=directory, save_artifacts=save_artifacts, ) if dry_run: return from pykeen.hpo import hpo_pipeline_from_path for output_directory, rv_config_path in directories: hpo_pipeline_result = hpo_pipeline_from_path(rv_config_path) hpo_pipeline_result.save_to_directory(output_directory) if not best_replicates: continue best_pipeline_dir = os.path.join(output_directory, 'best_pipeline') os.makedirs(best_pipeline_dir, exist_ok=True) logger.info('Re-training best pipeline and saving artifacts in %s', best_pipeline_dir) hpo_pipeline_result.replicate_best_pipeline( replicates=best_replicates, move_to_cpu=move_to_cpu, save_replicates=not discard_replicates, directory=best_pipeline_dir, )