예제 #1
0
    def _prepare_data(self, X, y=None, **kwargs):
        logger.info(
            "Preparing data for outer fold "
            + str(self.cross_validaton_info.outer_folds[self.outer_fold_id].fold_nr)
            + "..."
        )
        # Prepare Train and validation set data
        train_indices = self.cross_validaton_info.outer_folds[
            self.outer_fold_id
        ].train_indices
        test_indices = self.cross_validaton_info.outer_folds[
            self.outer_fold_id
        ].test_indices
        self._validation_X, self._validation_y, self._validation_kwargs = PhotonDataHelper.split_data(
            X, y, kwargs, indices=train_indices
        )
        self._test_X, self._test_y, self._test_kwargs = PhotonDataHelper.split_data(
            X, y, kwargs, indices=test_indices
        )

        # write numbers to database info object
        self.result_object.number_samples_validation = self._validation_y.shape[0]
        self.result_object.number_samples_test = self._test_y.shape[0]
        if self._pipe._estimator_type == "classifier":
            self.result_object.class_distribution_validation = FoldInfo.data_overview(
                self._validation_y
            )
            self.result_object.class_distribution_test = FoldInfo.data_overview(
                self._test_y
            )
예제 #2
0
 def test_data_overview(self):
     expected_outcome = {str(i): 10 for i in range(10)}
     data_count = FoldInfo.data_overview(self.kwargs['groups'].astype(int))
     self.assertDictEqual(expected_outcome, data_count)
예제 #3
0
 def test_class_distribution_info(self):
     unique, counts = np.unique(self.__y, return_counts=True)
     nr_dict = FoldInfo.data_overview(self.__y)
     self.assertEqual(counts[1], nr_dict["1"])