Example #1
0
    def _parse_split_config(self, instruction_key, instruction: dict, split_key: str, symbol_table: SymbolTable, settings_count: int,
                            label_config: LabelConfiguration) -> SplitConfig:

        try:

            default_params = DefaultParamsLoader.load("instructions/", SplitConfig.__name__)
            report_config_input = self._prepare_report_config(instruction_key, instruction, split_key, symbol_table)
            instruction[split_key] = {**default_params, **instruction[split_key]}

            split_strategy = SplitType[instruction[split_key]["split_strategy"].upper()]
            training_percentage = float(instruction[split_key]["training_percentage"]) if split_strategy == SplitType.RANDOM else -1

            if split_strategy == SplitType.RANDOM and training_percentage == 1 and settings_count > 1:
                raise ValueError(f"{TrainMLModelParser.__name__}: all data under {instruction_key}/{split_key} was specified to be used for "
                                 f"training, but {settings_count} settings were specified for evaluation. Please define a test/validation set by "
                                 f"reducing the training percentage (e.g., to 0.7) or use only one hyperparameter setting to run the analysis.")

            if split_strategy == SplitType.STRATIFIED_K_FOLD and len(label_config.get_labels_by_name()) != 1:
                raise ValueError(f"{TrainMLModelParser.__name__}: Stratified k-fold cross-validation cannot be used when "
                                 f"{len(label_config.get_labels_by_name())} labels are specified. It support only one label (and multiple classes).")

            return SplitConfig(split_strategy=split_strategy,
                               split_count=int(instruction[split_key]["split_count"]),
                               training_percentage=training_percentage,
                               reports=ReportConfig(**report_config_input),
                               manual_config=ManualSplitConfig(**instruction[split_key]["manual_config"]) if "manual_config" in instruction[split_key] else None,
                               leave_one_out_config=LeaveOneOutConfig(**instruction[split_key]["leave_one_out_config"])
                               if "leave_one_out_config" in instruction[split_key] else None)

        except KeyError as key_error:
            raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.")
    def __init__(self, split_index: int, train_val_dataset, test_dataset, path: Path, label_configuration: LabelConfiguration):
        self.split_index = split_index
        self.train_val_dataset = train_val_dataset
        self.test_dataset = test_dataset
        self.path = path
        self.train_val_data_reports = []
        self.test_data_reports = []

        # computed
        self.label_states = {label: HPLabelState(label, label_configuration.get_auxiliary_labels(label))
                             for label in label_configuration.get_labels_by_name()}