コード例 #1
0
ファイル: MLProcess.py プロジェクト: sailfish009/immuneML
    def _assess_on_test_dataset(self, encoded_train_dataset,
                                encoding_train_results, method,
                                split_index) -> HPItem:
        if self.test_dataset is not None and self.test_dataset.get_example_count(
        ) > 0:
            processed_test_dataset = HPUtil.preprocess_dataset(
                self.test_dataset, self.hp_setting.preproc_sequence,
                self.path / "preprocessed_test_dataset")

            encoded_test_dataset = HPUtil.encode_dataset(
                processed_test_dataset,
                self.hp_setting,
                self.path / "encoded_datasets",
                learn_model=False,
                context=self.report_context,
                number_of_processes=self.number_of_processes,
                label_configuration=self.label_config,
                store_encoded_data=self.store_encoded_data)

            performance = HPUtil.assess_performance(
                method, self.metrics, self.optimization_metric,
                encoded_test_dataset, split_index, self.path,
                self.test_predictions_path, self.label, self.ml_score_path)

            encoding_test_results = ReportUtil.run_encoding_reports(
                encoded_test_dataset, self.encoding_reports,
                self.report_path / "encoding_test")

            model_report_results = ReportUtil.run_ML_reports(
                encoded_train_dataset, encoded_test_dataset, method,
                self.ml_reports, self.report_path / "ml_method",
                self.hp_setting, self.label, self.report_context)

            hp_item = HPItem(
                method=method,
                hp_setting=self.hp_setting,
                train_predictions_path=self.train_predictions_path,
                test_predictions_path=self.test_predictions_path,
                ml_details_path=self.ml_details_path,
                train_dataset=self.train_dataset,
                test_dataset=self.test_dataset,
                split_index=split_index,
                model_report_results=model_report_results,
                encoding_train_results=encoding_train_results,
                encoding_test_results=encoding_test_results,
                performance=performance,
                encoder=self.hp_setting.encoder)
        else:
            hp_item = HPItem(
                method=method,
                hp_setting=self.hp_setting,
                train_predictions_path=self.train_predictions_path,
                test_predictions_path=None,
                ml_details_path=self.ml_details_path,
                train_dataset=self.train_dataset,
                split_index=split_index,
                encoding_train_results=encoding_train_results,
                encoder=self.hp_setting.encoder)

        return hp_item
コード例 #2
0
ファイル: HPAssessment.py プロジェクト: sailfish009/immuneML
    def run_assessment_split(state, train_val_dataset, test_dataset,
                             split_index: int, n_splits):
        """run inner CV loop (selection) and retrain on the full train_val_dataset after optimal model is chosen"""

        print(
            f'{datetime.datetime.now()}: Training ML model: running outer CV loop: started split {split_index + 1}/{n_splits}.\n',
            flush=True)

        current_path = HPAssessment.create_assessment_path(state, split_index)

        assessment_state = HPAssessmentState(split_index, train_val_dataset,
                                             test_dataset, current_path,
                                             state.label_configuration)
        state.assessment_states.append(assessment_state)

        state = HPSelection.run_selection(state, train_val_dataset,
                                          current_path, split_index)
        state = HPAssessment.run_assessment_split_per_label(state, split_index)

        assessment_state.train_val_data_reports = ReportUtil.run_data_reports(
            train_val_dataset,
            state.assessment.reports.data_split_reports.values(),
            current_path / "data_report_train", state.context)
        assessment_state.test_data_reports = ReportUtil.run_data_reports(
            test_dataset, state.assessment.reports.data_split_reports.values(),
            current_path / "data_report_test", state.context)

        print(
            f'{datetime.datetime.now()}: Training ML model: running outer CV loop: finished split {split_index + 1}/{n_splits}.\n',
            flush=True)

        return state
コード例 #3
0
    def run_selection_reports(state: TrainMLModelState, dataset, train_datasets: list, val_datasets: list, selection_state: HPSelectionState):
        path = selection_state.path
        data_split_reports = state.selection.reports.data_split_reports.values()
        for index in range(len(train_datasets)):
            split_reports_path = path / f"split_{index + 1}"

            selection_state.train_data_reports += ReportUtil.run_data_reports(train_datasets[index], data_split_reports,
                                                                              split_reports_path / "data_reports_train", state.context)
            selection_state.val_data_reports += ReportUtil.run_data_reports(val_datasets[index], data_split_reports,
                                                                            split_reports_path / "data_reports_test", state.context)

        data_reports = state.selection.reports.data_reports.values()
        selection_state.data_reports = ReportUtil.run_data_reports(dataset, data_reports, path / "reports", state.context)
コード例 #4
0
ファイル: MLProcess.py プロジェクト: sailfish009/immuneML
    def run(self, split_index: int) -> HPItem:

        print(
            f"{datetime.datetime.now()}: Evaluating hyperparameter setting: {self.hp_setting}...",
            flush=True)

        PathBuilder.build(self.path)
        self._set_paths()

        processed_dataset = HPUtil.preprocess_dataset(
            self.train_dataset, self.hp_setting.preproc_sequence,
            self.path / "preprocessed_train_dataset")

        encoded_train_dataset = HPUtil.encode_dataset(
            processed_dataset,
            self.hp_setting,
            self.path / "encoded_datasets",
            learn_model=True,
            context=self.report_context,
            number_of_processes=self.number_of_processes,
            label_configuration=self.label_config,
            store_encoded_data=self.store_encoded_data)

        method = HPUtil.train_method(self.label, encoded_train_dataset,
                                     self.hp_setting, self.path,
                                     self.train_predictions_path,
                                     self.ml_details_path,
                                     self.number_of_processes,
                                     self.optimization_metric)

        encoding_train_results = ReportUtil.run_encoding_reports(
            encoded_train_dataset, self.encoding_reports,
            self.report_path / "encoding_train")

        hp_item = self._assess_on_test_dataset(encoded_train_dataset,
                                               encoding_train_results, method,
                                               split_index)

        print(
            f"{datetime.datetime.now()}: Completed hyperparameter setting {self.hp_setting}.\n",
            flush=True)

        return hp_item