def _compute_optimal_item(self, label: str, index_repr: str): optimal_hp_settings = [ state.label_states[label].optimal_hp_setting for state in self.state.assessment_states ] optimal_hp_setting = Counter(optimal_hp_settings).most_common(1)[0][0] if self.state.refit_optimal_model: print( f"{datetime.datetime.now()}: TrainMLModel: retraining optimal model for label {label} {index_repr}.\n", flush=True) self.state.optimal_hp_items[label] = MLProcess( self.state.dataset, None, label, self.state.metrics, self.state.optimization_metric, self.state.path / f"optimal_{label}", number_of_processes=self.state.number_of_processes, label_config=self.state.label_configuration, hp_setting=optimal_hp_setting, store_encoded_data=self.state.store_encoded_data).run(0) print( f"{datetime.datetime.now()}: TrainMLModel: finished retraining optimal model for label {label} {index_repr}.\n", flush=True) else: optimal_assessment_state = self.state.assessment_states[ optimal_hp_settings.index(optimal_hp_setting)] self.state.optimal_hp_items[ label] = optimal_assessment_state.label_states[ label].optimal_assessment_item
def reeval_on_assessment_split(state, train_val_dataset: Dataset, test_dataset: Dataset, hp_setting: HPSetting, path: Path, label: Label, split_index: int) -> MLMethod: """retrain model for specific label, assessment split and hp_setting""" assessment_item = MLProcess( train_dataset=train_val_dataset, test_dataset=test_dataset, label=label, metrics=state.metrics, optimization_metric=state.optimization_metric, path=path, hp_setting=hp_setting, report_context=state.context, ml_reports=state.assessment.reports.model_reports.values(), number_of_processes=state.number_of_processes, encoding_reports=state.assessment.reports.encoding_reports.values( ), label_config=LabelConfiguration([label])).run(split_index) state.assessment_states[split_index].label_states[ label.name].assessment_items[str(hp_setting)] = assessment_item return state
def run_setting(state: TrainMLModelState, hp_setting, train_dataset, val_dataset, split_index: int, current_path: Path, label: Label, assessment_index: int): hp_item = MLProcess(train_dataset=train_dataset, test_dataset=val_dataset, encoding_reports=state.selection.reports.encoding_reports.values(), label_config=LabelConfiguration([label]), report_context=state.context, number_of_processes=state.number_of_processes, metrics=state.metrics, optimization_metric=state.optimization_metric, ml_reports=state.selection.reports.model_reports.values(), label=label, path=current_path, hp_setting=hp_setting)\ .run(split_index) state.assessment_states[assessment_index].label_states[label.name].selection_state.hp_items[hp_setting.get_key()].append(hp_item) return hp_item.performance[state.optimization_metric.name.lower()] if hp_item.performance is not None else None