def init(self): """initialize the data_dir directory. needs to be called exactly once before any other CLI/API commands of the package are executed. """ _ = self._process_kwargs_optional() nerbb = NerBlackBoxMain("init") nerbb.main()
def download(self): """download & prepare built-in datasets, prepare experiment configuration. needs to be called exactly once before any other CLI/API commands of the package are executed in case built-in datasets shall be used. """ _ = self._process_kwargs_optional() nerbb = NerBlackBoxMain("download") nerbb.main()
def _run_nerblackbox_main(_ctx_obj: Dict[str, Any], _kwargs: Dict[str, str]) -> None: """ given context (_ctx_obj) and all relevant arguments (_kwargs), invoke NerBlackBoxMain is used by every nerbb command """ kwargs = dict(**_ctx_obj, **_kwargs) nerblackbox_main = NerBlackBoxMain(**kwargs) nerblackbox_main.main()
def show_experiment_config(self, experiment_name: str): """show a single experiment configuration in detail or an overview on all available experiment configurations. Args: experiment_name: e.g. "exp0" or "all" """ kwargs = self._process_kwargs_optional() kwargs["experiment_name"] = experiment_name nerbb = NerBlackBoxMain("show_experiment_config", **kwargs) nerbb.main()
def analyze_data(self, dataset_name: str, **kwargs_optional: Any): """analyze a dataset. Args: dataset_name: e.g. "swedish_ner_corpus". kwargs_optional: with optional key-value pairs {"verbose": [bool]}. """ kwargs = self._process_kwargs_optional(kwargs_optional) kwargs["dataset_name"] = dataset_name nerbb = NerBlackBoxMain("analyze_data", **kwargs) nerbb.main()
def get_experiments(self, **kwargs_optional: Any) -> pd.DataFrame: """show list of experiments that have been run. Args: kwargs_optional: with optional key-value pairs \ {"ids": [tuple of int], "as_df": [bool]} Returns: experiments_overview """ kwargs = self._process_kwargs_optional(kwargs_optional) kwargs["usage"] = "api" nerbb = NerBlackBoxMain("get_experiments", **kwargs) return nerbb.main()
def predict(self, experiment_name: str, text_input: Union[str, List[str]]): """predict labels for text_input using the best model of a single experiment. Args: experiment_name: e.g. "exp0" text_input: e.g. "this text needs to be tagged" """ kwargs = self._process_kwargs_optional() kwargs["usage"] = "api" kwargs["experiment_name"] = experiment_name kwargs["text_input"] = text_input nerbb = NerBlackBoxMain("predict", **kwargs) return nerbb.main()
def get_model_from_experiment( self, experiment_name: str) -> Optional[NerModelPredict]: """gets (best) model from experiment. Args: experiment_name: e.g. "exp0" Returns: ner_model_predict """ kwargs = self._process_kwargs_optional() kwargs["usage"] = "api" kwargs["experiment_name"] = experiment_name nerbb = NerBlackBoxMain("get_model_from_experiment", **kwargs) return nerbb.main()
def get_experiment_results( self, experiment_name: str) -> List[ExperimentResults]: """get results for a single experiment. Args: experiment_name: e.g. "exp0" Returns: see ExperimentResults """ kwargs = self._process_kwargs_optional() kwargs["usage"] = "api" kwargs["experiment_name"] = experiment_name kwargs["from_config"] = True nerbb = NerBlackBoxMain("get_experiment_results", **kwargs) return nerbb.main()
def set_up_dataset(self, dataset_name: str, dataset_subset_name: str = "", **kwargs_optional: Any): """set up a dataset using the associated Formatter class. Args: dataset_name: e.g. "swedish_ner_corpus" dataset_subset_name: e.g. "simple_cased" kwargs_optional: with optional key-value pairs \ {"modify": [bool], "val_fraction": [float], "verbose": [bool]} """ kwargs = self._process_kwargs_optional(kwargs_optional) kwargs["dataset_name"] = dataset_name kwargs["dataset_subset_name"] = dataset_subset_name nerbb = NerBlackBoxMain("set_up_dataset", **kwargs) nerbb.main()
class TestMain: main = NerBlackBoxMain(flag="xyz", from_config=True) # 1 ################################################################################################################ @pytest.mark.parametrize( "hparams, from_preset, hparams_processed", [ ( None, None, None, ), ( { "multiple_runs": "2" }, None, { "multiple_runs": "2" }, ), ( { "multiple_runs": "2" }, "adaptive", { "multiple_runs": "2", "max_epochs": 250, "early_stopping": True, "lr_schedule": "constant", }, ), ], ) def test_process_hparams( self, hparams: Optional[Dict[str, Union[str, int, bool]]], from_preset: Optional[str], hparams_processed: Optional[Dict[str, str]], ): test_hparams_processed = self.main._process_hparams( hparams, from_preset) assert ( test_hparams_processed == hparams_processed ), f"ERROR! test_hparams_processed = {test_hparams_processed} != {hparams_processed}"
def run_experiment( self, experiment_name: str, from_config: bool = False, model: Optional[str] = None, dataset: Optional[str] = None, from_preset: Optional[str] = "adaptive", **kwargs_optional: Any, ): """run a single experiment. Note: - from_config == True -> experiment config file is used, no other optional arguments will be used - from_config == False -> experiment config file is created dynamically, optional arguments will be used - model and dataset are mandatory. - All other arguments relate to hyperparameters and are optional. If not specified, they are taken using the following hierarchy: 1) optional argument 2) from_preset (adaptive, original, stable), which specifies e.g. the hyperparameters "max_epochs", "early_stopping", "lr_schedule" 3) default experiment configuration Args: experiment_name: e.g. 'exp0' from_config: e.g. False model: if experiment config file is to be created dynamically, e.g. 'bert-base-uncased' dataset: if experiment config file is to be created dynamically, e.g. 'conll-2003' from_preset: if experiment config file is to be created dynamically, e.g. 'adaptive' kwargs_optional: with optional key-value pairs, e.g. \ {"multiple_runs": [int], "run_name": [str], "device": [torch device], "fp16": [bool]} """ kwargs = self._process_kwargs_optional(kwargs_optional) kwargs["experiment_name"] = experiment_name if model is not None: kwargs["pretrained_model_name"] = model if dataset is not None: kwargs["dataset_name"] = dataset kwargs["hparams"] = self._extract_hparams(kwargs) kwargs["from_config"] = from_config if not from_config: kwargs["from_preset"] = from_preset # get rid of keys in kwargs that are present in kwargs["hparams"] for key in kwargs["hparams"].keys(): kwargs.pop(key) if kwargs["hparams"] == {}: kwargs["hparams"] = None nerbb = NerBlackBoxMain("run_experiment", **kwargs) nerbb.main()