def _prepare_data(self, X, y=None, **kwargs): logger.info( "Preparing data for outer fold " + str(self.cross_validaton_info.outer_folds[self.outer_fold_id].fold_nr) + "..." ) # Prepare Train and validation set data train_indices = self.cross_validaton_info.outer_folds[ self.outer_fold_id ].train_indices test_indices = self.cross_validaton_info.outer_folds[ self.outer_fold_id ].test_indices self._validation_X, self._validation_y, self._validation_kwargs = PhotonDataHelper.split_data( X, y, kwargs, indices=train_indices ) self._test_X, self._test_y, self._test_kwargs = PhotonDataHelper.split_data( X, y, kwargs, indices=test_indices ) # write numbers to database info object self.result_object.number_samples_validation = self._validation_y.shape[0] self.result_object.number_samples_test = self._test_y.shape[0] if self._pipe._estimator_type == "classifier": self.result_object.class_distribution_validation = FoldInfo.data_overview( self._validation_y ) self.result_object.class_distribution_test = FoldInfo.data_overview( self._test_y )
def test_data_overview(self): expected_outcome = {str(i): 10 for i in range(10)} data_count = FoldInfo.data_overview(self.kwargs['groups'].astype(int)) self.assertDictEqual(expected_outcome, data_count)
def test_class_distribution_info(self): unique, counts = np.unique(self.__y, return_counts=True) nr_dict = FoldInfo.data_overview(self.__y) self.assertEqual(counts[1], nr_dict["1"])