def _parse_split_config(self, instruction_key, instruction: dict, split_key: str, symbol_table: SymbolTable, settings_count: int, label_config: LabelConfiguration) -> SplitConfig: try: default_params = DefaultParamsLoader.load("instructions/", SplitConfig.__name__) report_config_input = self._prepare_report_config(instruction_key, instruction, split_key, symbol_table) instruction[split_key] = {**default_params, **instruction[split_key]} split_strategy = SplitType[instruction[split_key]["split_strategy"].upper()] training_percentage = float(instruction[split_key]["training_percentage"]) if split_strategy == SplitType.RANDOM else -1 if split_strategy == SplitType.RANDOM and training_percentage == 1 and settings_count > 1: raise ValueError(f"{TrainMLModelParser.__name__}: all data under {instruction_key}/{split_key} was specified to be used for " f"training, but {settings_count} settings were specified for evaluation. Please define a test/validation set by " f"reducing the training percentage (e.g., to 0.7) or use only one hyperparameter setting to run the analysis.") if split_strategy == SplitType.STRATIFIED_K_FOLD and len(label_config.get_labels_by_name()) != 1: raise ValueError(f"{TrainMLModelParser.__name__}: Stratified k-fold cross-validation cannot be used when " f"{len(label_config.get_labels_by_name())} labels are specified. It support only one label (and multiple classes).") return SplitConfig(split_strategy=split_strategy, split_count=int(instruction[split_key]["split_count"]), training_percentage=training_percentage, reports=ReportConfig(**report_config_input), manual_config=ManualSplitConfig(**instruction[split_key]["manual_config"]) if "manual_config" in instruction[split_key] else None, leave_one_out_config=LeaveOneOutConfig(**instruction[split_key]["leave_one_out_config"]) if "leave_one_out_config" in instruction[split_key] else None) except KeyError as key_error: raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.")
def __init__(self, split_index: int, train_val_dataset, test_dataset, path: Path, label_configuration: LabelConfiguration): self.split_index = split_index self.train_val_dataset = train_val_dataset self.test_dataset = test_dataset self.path = path self.train_val_data_reports = [] self.test_data_reports = [] # computed self.label_states = {label: HPLabelState(label, label_configuration.get_auxiliary_labels(label)) for label in label_configuration.get_labels_by_name()}