Beispiel #1
0
    def _compute_optimal_item(self, label: str, index_repr: str):
        optimal_hp_settings = [
            state.label_states[label].optimal_hp_setting
            for state in self.state.assessment_states
        ]
        optimal_hp_setting = Counter(optimal_hp_settings).most_common(1)[0][0]
        if self.state.refit_optimal_model:
            print(
                f"{datetime.datetime.now()}: TrainMLModel: retraining optimal model for label {label} {index_repr}.\n",
                flush=True)
            self.state.optimal_hp_items[label] = MLProcess(
                self.state.dataset,
                None,
                label,
                self.state.metrics,
                self.state.optimization_metric,
                self.state.path / f"optimal_{label}",
                number_of_processes=self.state.number_of_processes,
                label_config=self.state.label_configuration,
                hp_setting=optimal_hp_setting,
                store_encoded_data=self.state.store_encoded_data).run(0)
            print(
                f"{datetime.datetime.now()}: TrainMLModel: finished retraining optimal model for label {label} {index_repr}.\n",
                flush=True)

        else:
            optimal_assessment_state = self.state.assessment_states[
                optimal_hp_settings.index(optimal_hp_setting)]
            self.state.optimal_hp_items[
                label] = optimal_assessment_state.label_states[
                    label].optimal_assessment_item
Beispiel #2
0
    def reeval_on_assessment_split(state, train_val_dataset: Dataset,
                                   test_dataset: Dataset,
                                   hp_setting: HPSetting, path: Path,
                                   label: Label, split_index: int) -> MLMethod:
        """retrain model for specific label, assessment split and hp_setting"""

        assessment_item = MLProcess(
            train_dataset=train_val_dataset,
            test_dataset=test_dataset,
            label=label,
            metrics=state.metrics,
            optimization_metric=state.optimization_metric,
            path=path,
            hp_setting=hp_setting,
            report_context=state.context,
            ml_reports=state.assessment.reports.model_reports.values(),
            number_of_processes=state.number_of_processes,
            encoding_reports=state.assessment.reports.encoding_reports.values(
            ),
            label_config=LabelConfiguration([label])).run(split_index)

        state.assessment_states[split_index].label_states[
            label.name].assessment_items[str(hp_setting)] = assessment_item

        return state
Beispiel #3
0
    def run_setting(state: TrainMLModelState, hp_setting, train_dataset, val_dataset, split_index: int,
                    current_path: Path, label: Label, assessment_index: int):

        hp_item = MLProcess(train_dataset=train_dataset, test_dataset=val_dataset, encoding_reports=state.selection.reports.encoding_reports.values(),
                            label_config=LabelConfiguration([label]), report_context=state.context,
                            number_of_processes=state.number_of_processes, metrics=state.metrics, optimization_metric=state.optimization_metric,
                            ml_reports=state.selection.reports.model_reports.values(), label=label, path=current_path, hp_setting=hp_setting)\
            .run(split_index)

        state.assessment_states[assessment_index].label_states[label.name].selection_state.hp_items[hp_setting.get_key()].append(hp_item)

        return hp_item.performance[state.optimization_metric.name.lower()] if hp_item.performance is not None else None