コード例 #1
0
    def test_concatenate_dict(self):
        dict_a = {
            "variable_one": np.random.randn(10),
            "variable_two": np.random.randn(15),
        }
        dict_b = {
            "variable_one": np.random.randn(20),
            "variable_two": np.random.randn(20),
        }
        dict_c = {
            "variable_one": np.random.randn(10, 10),
            "variable_two": np.random.randn(15, 15),
        }
        dict_d = {
            "variable_one": np.random.randn(20, 10),
            "variable_two": np.random.randn(20, 15),
        }
        dict_e = {}

        dict_a_b = PhotonDataHelper.join_dictionaries(dict_a, dict_b)
        dict_c_d = PhotonDataHelper.join_dictionaries(dict_c, dict_d)
        dict_e_a = PhotonDataHelper.join_dictionaries(dict_e, dict_a)
        self.assertEqual(len(dict_a_b["variable_one"]), 30)
        self.assertEqual(len(dict_a_b["variable_two"]), 35)
        self.assertEqual(dict_c_d["variable_one"].shape, (30, 10))
        self.assertEqual(dict_c_d["variable_two"].shape, (35, 15))
        self.assertEqual(len(dict_e_a["variable_one"]), 10)
        self.assertEqual(len(dict_e_a["variable_two"]), 15)
コード例 #2
0
ファイル: neuro_branch.py プロジェクト: nkourkou/photon
    def apply_transform_parallelized(self, X):
        """

        :param X: the data to which the delegate should be applied in parallel
        """

        if self.nr_of_processes > 1:

            jobs_to_do = list()

            # distribute the data equally to all available cores
            number_of_items_to_process = PhotonDataHelper.find_n(X)
            number_of_items_for_each_core = int(
                np.ceil(number_of_items_to_process / self.nr_of_processes))
            logger.info("NeuroBranch " + self.name + ": Using " +
                        str(self.nr_of_processes) + " cores calculating " +
                        str(number_of_items_for_each_core) + " items each")
            for start, stop in PhotonDataHelper.chunker(
                    number_of_items_to_process, number_of_items_for_each_core):
                X_batched, _, _ = PhotonDataHelper.split_data(
                    X, None, {}, start, stop)

                # copy my pipeline
                new_pipe_mr = self.copy_me()
                new_pipe_copy = new_pipe_mr.base_element
                new_pipe_copy.cache_folder = self.base_element.cache_folder
                new_pipe_copy.skip_loading = True
                new_pipe_copy._parallel_use = True

                del_job = dask.delayed(NeuroBranch.parallel_application)(
                    new_pipe_copy, X_batched)
                jobs_to_do.append(del_job)

            dask.compute(*jobs_to_do)
コード例 #3
0
ファイル: outer_folds.py プロジェクト: nkourkou/photon
    def _prepare_data(self, X, y=None, **kwargs):
        logger.info(
            "Preparing data for outer fold "
            + str(self.cross_validaton_info.outer_folds[self.outer_fold_id].fold_nr)
            + "..."
        )
        # Prepare Train and validation set data
        train_indices = self.cross_validaton_info.outer_folds[
            self.outer_fold_id
        ].train_indices
        test_indices = self.cross_validaton_info.outer_folds[
            self.outer_fold_id
        ].test_indices
        self._validation_X, self._validation_y, self._validation_kwargs = PhotonDataHelper.split_data(
            X, y, kwargs, indices=train_indices
        )
        self._test_X, self._test_y, self._test_kwargs = PhotonDataHelper.split_data(
            X, y, kwargs, indices=test_indices
        )

        # write numbers to database info object
        self.result_object.number_samples_validation = self._validation_y.shape[0]
        self.result_object.number_samples_test = self._test_y.shape[0]
        if self._pipe._estimator_type == "classifier":
            self.result_object.class_distribution_validation = FoldInfo.data_overview(
                self._validation_y
            )
            self.result_object.class_distribution_test = FoldInfo.data_overview(
                self._test_y
            )
コード例 #4
0
 def test_index_dict(self):
     labels = np.asarray([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
     dict_a = {
         "variable_one": np.random.randn(10),
         "variable_two": np.random.randn(10, 10),
     }
     dict_a_1 = PhotonDataHelper.index_dict(dict_a, labels == 0)
     dict_a_2 = PhotonDataHelper.index_dict(dict_a, labels == 1)
     self.assertEqual(len(dict_a_1["variable_one"]), 5)
     self.assertEqual(dict_a_2["variable_two"].shape, (5, 10))
コード例 #5
0
    def test_split_join_resorting(self):
        X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
        y = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
        kwargs = {"test": np.array([-1, -2, -3, -4, -5, -6, -7, -8, -9, -10])}

        X_new, y_new, kwargs_new = list(), list(), dict()

        # first randomly split the data and append them to X_new, y_new, kwargs_new
        idx_list_one, idx_list_two = list(), list()
        for idx in range(len(X)):
            if bool(random.getrandbits(1)):
                idx_list_one.append(idx)
            else:
                idx_list_two.append(idx)

        for ilist in [idx_list_two, idx_list_one]:
            for idx in ilist:

                X_batched, y_batched, kwargs_batched = PhotonDataHelper.split_data(
                    X, y, kwargs, idx, idx)

                # test if batching works
                self.assertEqual(X_batched, X[idx])
                self.assertEqual(y_batched, y[idx])
                self.assertDictEqual(kwargs_batched,
                                     {"test": [kwargs["test"][idx]]})

                # then join again
                X_new, y_new, kwargs_new = PhotonDataHelper.join_data(
                    X_new, X_batched, y_new, y_batched, kwargs_new,
                    kwargs_batched)

        # test if joining works
        joined_idx = PhotonDataHelper.stack_data_vertically(
            idx_list_two, idx_list_one)
        self.assertTrue(np.array_equal(X_new, X[joined_idx]))
        self.assertTrue(np.array_equal(y_new, y[joined_idx]))
        self.assertTrue(
            np.array_equal(kwargs_new["test"], kwargs["test"][joined_idx]))

        # now resort and see if that works too
        X_resorted, y_resorted, kwargs_resorted = PhotonDataHelper.resort_splitted_data(
            X_new, y_new, kwargs_new, joined_idx)
        self.assertTrue(np.array_equal(X_resorted, X))
        self.assertTrue(np.array_equal(y_resorted, y))
        self.assertListEqual(list(kwargs_resorted.keys()), list(kwargs.keys()))
        self.assertTrue(np.array_equal(kwargs_resorted["test"],
                                       kwargs["test"]))
コード例 #6
0
 def compute_learning_curves(self, new_pipe, train_X, train_y, train,
                             kwargs_cv_train, test_X, test_y, test,
                             kwargs_cv_test):
     self.cross_validation_infos.learning_curves_cut.transform()
     cut_range = [
         round(cut * train_X.shape[0])
         for cut in self.cross_validation_infos.learning_curves_cut.values
     ]
     learning_curves = []
     for i, cut in enumerate(cut_range[1:]):
         cut_indices = np.arange(cut)
         train_cut_X, train_cut_y, train_cut_kwargs = PhotonDataHelper.split_data(
             train_X, train_y, kwargs_cv_train, indices=cut_indices)
         train_cut = train[:cut]
         job_data = self.InnerCVJob(
             pipe=new_pipe,
             config=dict(self.params),
             metrics=self.optimization_infos.metrics,
             callbacks=self.optimization_constraints,
             train_data=self.JobData(train_cut_X, train_cut_y, train_cut,
                                     train_cut_kwargs),
             test_data=self.JobData(test_X, test_y, test, kwargs_cv_test))
         curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(
             job_data)
         learning_curves.append([
             self.cross_validation_infos.learning_curves_cut.values[i],
             curr_test_cut.metrics, curr_train_cut.metrics
         ])
     return learning_curves
コード例 #7
0
    def predict(self, X, training=False, **kwargs):
        """
        Transforms the data for every step that offers a transform function
        and then calls the estimator with predict on transformed data.
        It returns the predictions made.

        In case the last step is no estimator, it returns the transformed data.
        """

        # first transform
        if not training:
            X, _, kwargs = self.transform(X, y=None, **kwargs)

        # then call predict on final estimator
        if self._final_estimator is not None:
            if self._final_estimator.is_estimator:
                logger.debug('PhotonPipeline: Predicting with ' +
                             self._final_estimator.name + ' ...')
                predict_start_time = datetime.datetime.now()
                y_pred = self._final_estimator.predict(X, **kwargs)
                predict_duration = (datetime.datetime.now() -
                                    predict_start_time).total_seconds()
                n = PhotonDataHelper.find_n(X)
                self.time_monitor['predict'].append(
                    (self.elements[-1][0], predict_duration, n))
                return y_pred
            else:
                return X
        else:
            return None
コード例 #8
0
        def objective_function_simple(self, cfg):
            cfg = {k: cfg[k] for k in cfg if cfg[k]}
            values = []

            train_indices = list(self.pipe.cross_validation.outer_folds.values(
            ))[0].train_indices
            self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data(
                self.X, self.y, kwargs=None, indices=train_indices)

            for inner_fold in list(
                    list(self.pipe.cross_validation.inner_folds.values())
                [0].values()):
                sc = PipelineElement("StandardScaler", {})
                pca = PipelineElement("PCA", {}, random_state=42)
                svc = PipelineElement("SVC", {}, random_state=42, gamma='auto')
                my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca),
                                          ('SVC', svc)])
                my_pipe.set_params(**cfg)
                my_pipe.fit(self._validation_X[inner_fold.train_indices, :],
                            self._validation_y[inner_fold.train_indices])
                values.append(
                    accuracy_score(
                        self._validation_y[inner_fold.test_indices],
                        my_pipe.predict(
                            self._validation_X[inner_fold.test_indices, :])))
            return 1 - np.mean(values)
コード例 #9
0
    def transform(self, X, y=None, **kwargs):
        """
        Generates "new samples" by computing the mean between all or n_draws pairs of existing samples and appends them to X
        The target for each new sample is computed as the mean between the constituent targets
        :param X: data
        :param y: targets (optional)
        :param draw_limit: in case the full number of combinations is > 10k, how many to draw?
        :param rand_seed: sets seed for random sampling of combinations (for reproducibility only)
        :return: X_new: X and X_augmented; (y_new: the correspoding targets)
        """

        logger.debug("Pairing " + str(self.draw_limit) + " samples...")

        # ensure class balance in the training set if balance_classes is True
        unique_classes = np.unique(y)
        n_pairs = list()
        for label in unique_classes:
            if self.balance_classes:
                n_pairs.append(self.draw_limit - np.sum(y == label))
            else:
                n_pairs.append(self.draw_limit)

        # run get_samples for each class independently
        X_extended = list()
        y_extended = list()
        kwargs_extended = dict()

        for label, limit in zip(unique_classes, n_pairs):
            X_new_class, y_new_class, kwargs_new_class = self._return_samples(
                X[y == label],
                y[y == label],
                PhotonDataHelper.index_dict(kwargs, y == label),
                generator=self.generator,
                distance_metric=self.distance_metric,
                draw_limit=limit,
                rand_seed=self.random_state,
            )

            X_extended.extend(X_new_class)
            y_extended.extend(y_new_class)

            # get the corresponding kwargs
            if kwargs:
                kwargs_extended = PhotonDataHelper.join_dictionaries(
                    kwargs_extended, kwargs_new_class)

        return X_extended, y_extended, kwargs_extended
コード例 #10
0
 def test_transform(self):
     for elements, stack in self.stacks:
         np.random.seed(42)
         Xt_stack, _, _ = stack.fit(self.X, self.y).transform(self.X)
         np.random.seed(42)
         Xt_elements = None
         for i, element in enumerate(elements):
             Xt_element, _, _ = element.fit(self.X,
                                            self.y).transform(self.X)
             Xt_elements = PhotonDataHelper.stack_data_horizontally(
                 Xt_elements, Xt_element)
         np.testing.assert_array_equal(Xt_stack, Xt_elements)
コード例 #11
0
 def inverse_transform(self, X, y=None, **kwargs):
     new_X = None
     for i in range(X.shape[1]):
         feature = X[:, i]
         transformer = self.encoder_list[i]
         if transformer is not None:
             feature = np.reshape(feature, (-1, 1))
             trans_X = transformer.inverse_transform(feature)
         else:
             trans_X = feature
         new_X = PhotonDataHelper.stack_data_horizontally(new_X, trans_X)
     return new_X
コード例 #12
0
 def test_predict(self):
     for elements, stack in [
         ([self.svc, self.tree], self.estimator_stack),
         ([self.estimator_branch_1,
           self.estimator_branch_2], self.estimator_branch_stack)
     ]:
         np.random.seed(42)
         stack = stack.fit(self.X, self.y)
         yt_stack = stack.predict(self.X)
         np.random.seed(42)
         Xt_elements = None
         for i, element in enumerate(elements):
             Xt_element = element.fit(self.X, self.y).predict(self.X)
             Xt_elements = PhotonDataHelper.stack_data_horizontally(
                 Xt_elements, Xt_element)
         np.testing.assert_array_equal(yt_stack, Xt_elements)
コード例 #13
0
    def fit(self, X, y=None, **kwargs):

        self._validate_elements()
        X, y, kwargs = self._caching_fit_transform(X, y, kwargs, fit=True)

        if self._final_estimator is not None:
            logger.debug("PhotonPipeline: Fitting " + self._final_estimator.name)
            fit_start_time = datetime.datetime.now()
            if self.random_state:
                self._final_estimator.random_state = self.random_state
            self._final_estimator.fit(X, y, **kwargs)
            #todo after fit final_estimator actions by estimtor go
            n = PhotonDataHelper.find_n(X)
            fit_duration = (datetime.datetime.now() - fit_start_time).total_seconds()
            self.time_monitor["fit"].append((self.elements[-1][0], fit_duration, n))
        return self
コード例 #14
0
 def test_data_split_indices(self):
     vals = np.array([-1, -2, -3, -4, -5, -6, -7, -8, -9, -10])
     vals_str = np.array([ascii(i) for i in vals])
     random_features = np.random.randn(10, 20)
     kwargs = {"test": vals, "subtest": vals_str, "random": random_features}
     pick_list = [1, 3, 5]
     splitted_X, splitted_y, splitted_example = PhotonDataHelper.split_data(
         random_features, vals, kwargs, indices=pick_list)
     self.assertTrue(np.array_equal(splitted_X, random_features[pick_list]))
     self.assertTrue(np.array_equal(splitted_y, vals[pick_list]))
     self.assertTrue(
         np.array_equal(splitted_example["test"], vals[pick_list]))
     self.assertTrue(
         np.array_equal(splitted_example["subtest"], vals_str[pick_list]))
     self.assertTrue(
         np.array_equal(splitted_example["random"],
                        random_features[pick_list]))
コード例 #15
0
    def _do_timed_fit_transform(self, name, transformer, fit, X, y, **kwargs):

        n = PhotonDataHelper.find_n(X)
        if self.random_state:
            transformer.random_state = self.random_state

        if fit:
            logger.debug("PhotonPipeline: Fitting " + transformer.name)
            fit_start_time = datetime.datetime.now()
            transformer.fit(X, y, **kwargs)
            fit_duration = (datetime.datetime.now() - fit_start_time).total_seconds()
            self.time_monitor["fit"].append((name, fit_duration, n))

        logger.debug("PhotonPipeline: Transforming data with " + transformer.name)
        transform_start_time = datetime.datetime.now()
        X, y, kwargs = transformer.transform(X, y, **kwargs)
        transform_duration = (
            datetime.datetime.now() - transform_start_time
        ).total_seconds()
        self.time_monitor["transform_computed"].append((name, transform_duration, n))
        return X, y, kwargs
コード例 #16
0
        def objective_function_switch(self, cfg):
            cfg = {k: cfg[k] for k in cfg if cfg[k]}
            values = []

            train_indices = list(self.pipe.cross_validation.outer_folds.values(
            ))[0].train_indices
            self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data(
                self.X, self.y, kwargs=None, indices=train_indices)

            switch = cfg["Estimator_switch"]
            del cfg["Estimator_switch"]
            for inner_fold in list(
                    list(self.pipe.cross_validation.inner_folds.values())
                [0].values()):
                sc = PipelineElement("StandardScaler", {})
                pca = PipelineElement("PCA", {}, random_state=42)
                if switch == 'svc':
                    est = PipelineElement("SVC", {},
                                          random_state=42,
                                          gamma='auto')
                    name = 'SVC'
                else:
                    est = PipelineElement("RandomForestClassifier", {},
                                          random_state=42)
                    name = "RandomForestClassifier"
                my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca),
                                          (name, est)])
                my_pipe.set_params(**cfg)
                my_pipe.fit(self._validation_X[inner_fold.train_indices, :],
                            self._validation_y[inner_fold.train_indices])
                values.append(
                    accuracy_score(
                        self._validation_y[inner_fold.test_indices],
                        my_pipe.predict(
                            self._validation_X[inner_fold.test_indices, :])))
            return 1 - np.mean(values)
コード例 #17
0
    def load_or_save_cached_data(self,
                                 name,
                                 X,
                                 y,
                                 kwargs,
                                 transformer,
                                 fit=False,
                                 needed_for_further_computation=False,
                                 initial_X=None):
        if not self.single_subject_caching:
            # if we do it group-wise then its easy
            if self.skip_loading and not needed_for_further_computation:
                # check if data is already calculated
                if self.cache_man.check_cache(name):
                    # if so, do nothing
                    return X, y, kwargs
                else:
                    # otherwise, do the calculation and save it
                    cached_result = None
            else:
                start_time_for_loading = datetime.datetime.now()
                cached_result = self.cache_man.load_cached_data(name)

            if cached_result is None:
                X, y, kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X, y, **kwargs)

                start_time_saving = datetime.datetime.now()
                self.cache_man.save_data_to_cache(name, (X, y, kwargs))
                saving_duration = (datetime.datetime.now() -
                                   start_time_saving).total_seconds()
                self.time_monitor['transform_cached'].append(
                    (name, saving_duration, 1))
            else:
                X, y, kwargs = cached_result[0], cached_result[
                    1], cached_result[2]
                loading_duration = (datetime.datetime.now() -
                                    start_time_for_loading).total_seconds()
                n = PhotonDataHelper.find_n(X)
                self.time_monitor['transform_cached'].append(
                    (name, loading_duration, n))
            return X, y, kwargs
        else:
            # if we do it subject-wise we need to iterate and collect the results
            processed_X, processed_y, processed_kwargs = list(), list(), dict()
            X_uncached, y_uncached, kwargs_uncached, initial_X_uncached = list(
            ), list(), dict(), list()
            list_of_idx_cached, list_of_idx_non_cached = list(), list()

            nr = PhotonDataHelper.find_n(X)
            for start, stop in PhotonDataHelper.chunker(nr, 1):
                # split data in single entities, find key from first element = PATH to file
                X_key, _, _ = PhotonDataHelper.split_data(
                    initial_X, None, {}, start, stop)
                X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                    X, y, kwargs, start, stop)
                self.cache_man.update_single_subject_state_info(X_key)

                # check if item has been processed
                if self.cache_man.check_cache(name):
                    list_of_idx_cached.append(start)
                else:
                    list_of_idx_non_cached.append(start)
                    X_uncached = PhotonDataHelper.stack_data_vertically(
                        X_uncached, X_batched)
                    y_uncached = PhotonDataHelper.stack_data_vertically(
                        y_uncached, y_batched)
                    initial_X_uncached = PhotonDataHelper.stack_data_vertically(
                        initial_X_uncached, X_key)
                    kwargs_uncached = PhotonDataHelper.join_dictionaries(
                        kwargs_uncached, kwargs_dict_batched)

            # now we know which part can be loaded and which part should be transformed
            # first apply the transformation to the group, then save it single-subject-wise
            if len(list_of_idx_non_cached) > 0:

                # apply transformation groupwise
                new_group_X, new_group_y, new_group_kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X_uncached, y_uncached,
                    **kwargs_uncached)

                # then save it single
                nr = PhotonDataHelper.find_n(new_group_X)
                for start in range(nr):
                    # split data in single entities
                    X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                        new_group_X, new_group_y, new_group_kwargs, start,
                        start)
                    X_key, _, _ = PhotonDataHelper.split_data(
                        initial_X_uncached, None, {}, start, start)
                    # we save the data in relation to the input path (X_key = hash(input X))
                    self.cache_man.update_single_subject_state_info(X_key)

                    start_time_saving = datetime.datetime.now()
                    self.cache_man.save_data_to_cache(
                        name, (X_batched, y_batched, kwargs_dict_batched))
                    saving_duration = (datetime.datetime.now() -
                                       start_time_saving).total_seconds()
                    self.time_monitor['transform_cached'].append(
                        (name, saving_duration, 1))

                # we need to collect the data only when we want to load them
                # we can skip that process if we only want them to get into the cache (case: parallelisation)
                if not self.skip_loading or needed_for_further_computation:
                    # stack results
                    processed_X, processed_y, processed_kwargs = new_group_X, new_group_y, new_group_kwargs

            # afterwards load everything that has been cached
            if len(list_of_idx_cached) > 0:
                if not self.skip_loading or needed_for_further_computation:
                    for cache_idx in list_of_idx_cached:
                        # we identify the data according to the input path (X before any transformation)
                        self.cache_man.update_single_subject_state_info(
                            [initial_X[cache_idx]])

                        # time the loading of the cached item
                        start_time_for_loading = datetime.datetime.now()
                        transformed_X, transformed_y, transformed_kwargs = self.cache_man.load_cached_data(
                            name)
                        loading_duration = (
                            datetime.datetime.now() -
                            start_time_for_loading).total_seconds()
                        self.time_monitor['transform_cached'].append(
                            (name, loading_duration,
                             PhotonDataHelper.find_n(X)))

                        processed_X, processed_y, processed_kwargs = PhotonDataHelper.join_data(
                            processed_X, transformed_X, processed_y,
                            transformed_y, processed_kwargs,
                            transformed_kwargs)

            logger.debug(name + " loaded " + str(len(list_of_idx_cached)) +
                         " items from cache and computed " +
                         str(len(list_of_idx_non_cached)))
            if not self.skip_loading or needed_for_further_computation:
                # now sort the data in the correct order again
                processed_X, processed_y, processed_kwargs = PhotonDataHelper.resort_splitted_data(
                    processed_X, processed_y, processed_kwargs,
                    PhotonDataHelper.stack_data_vertically(
                        list_of_idx_cached, list_of_idx_non_cached))

            return processed_X, processed_y, processed_kwargs
コード例 #18
0
    def fit(self, X, y, **kwargs):
        """
        Iterates over cross-validation folds and trains the pipeline, then uses it for predictions.
        Calculates metrics per fold and averages them over fold.
        :param X: Training and test data
        :param y: Training and test targets
        :returns: configuration class for result tree that monitors training and test performance
        """

        # needed for testing Timeboxed Random Grid Search
        # time.sleep(35)

        config_item = MDBConfig()
        config_item.config_dict = self.params
        config_item.inner_folds = []
        config_item.metrics_test = []
        config_item.metrics_train = []
        config_item.computation_start_time = datetime.datetime.now()

        try:
            # do inner cv
            for idx, (inner_fold_id, inner_fold) in enumerate(
                    self.cross_validation_infos.inner_folds[
                        self.outer_fold_id].items()):

                train, test = inner_fold.train_indices, inner_fold.test_indices

                # split kwargs according to cross validation
                train_X, train_y, kwargs_cv_train = PhotonDataHelper.split_data(
                    X, y, kwargs, indices=train)
                test_X, test_y, kwargs_cv_test = PhotonDataHelper.split_data(
                    X, y, kwargs, indices=test)

                new_pipe = self.pipe()
                if self.cache_folder is not None and self.cache_updater is not None:
                    self.cache_updater(new_pipe, self.cache_folder,
                                       inner_fold_id)

                if not config_item.human_readable_config:
                    config_item.human_readable_config = PhotonPrintHelper.config_to_human_readable_dict(
                        new_pipe, self.params)
                    logger.clean_info(
                        json.dumps(config_item.human_readable_config,
                                   indent=4,
                                   sort_keys=True))

                job_data = InnerFoldManager.InnerCVJob(
                    pipe=new_pipe,
                    config=dict(self.params),
                    metrics=self.optimization_infos.metrics,
                    callbacks=self.optimization_constraints,
                    train_data=InnerFoldManager.JobData(
                        train_X, train_y, train, kwargs_cv_train),
                    test_data=InnerFoldManager.JobData(test_X, test_y, test,
                                                       kwargs_cv_test),
                )

                # only for unparallel processing
                # inform children in which inner fold we are
                # self.pipe.distribute_cv_info_to_hyperpipe_children(inner_fold_counter=fold_cnt)
                # self.mother_inner_fold_handle(fold_cnt)

                # --> write that output in InnerFoldManager!
                # logger.debug(config_item.human_readable_config)
                fold_nr = idx + 1
                logger.debug("calculating inner fold " + str(fold_nr) + "...")

                curr_test_fold, curr_train_fold = InnerFoldManager.fit_and_score(
                    job_data)
                logger.debug("Performance inner fold " + str(fold_nr))
                print_double_metrics(
                    curr_train_fold.metrics,
                    curr_test_fold.metrics,
                    photon_system_log=False,
                )

                durations = job_data.pipe.time_monitor

                self.update_config_item_with_inner_fold(
                    config_item=config_item,
                    fold_cnt=fold_nr,
                    curr_train_fold=curr_train_fold,
                    curr_test_fold=curr_test_fold,
                    time_monitor=durations,
                    feature_importances=new_pipe.feature_importances_,
                )

                if isinstance(self.optimization_constraints, list):
                    break_cv = 0
                    for cf in self.optimization_constraints:
                        if not cf.shall_continue(config_item):
                            logger.info(
                                "Skipped further cross validation after fold "
                                + str(fold_nr) +
                                " due to performance constraints in " +
                                cf.metric)
                            break_cv += 1
                            break
                    if break_cv > 0:
                        break
                elif self.optimization_constraints is not None:
                    if not self.optimization_constraints.shall_continue(
                            config_item):
                        logger.info(
                            "Skipped further cross validation after fold " +
                            str(fold_nr) +
                            " due to performance constraints in " + cf.metric)
                        break

            InnerFoldManager.process_fit_results(
                config_item,
                self.cross_validation_infos.calculate_metrics_across_folds,
                self.cross_validation_infos.calculate_metrics_per_fold,
                self.optimization_infos.metrics,
            )

        except Exception as e:
            if self.raise_error:
                raise e
            logger.error(e)
            logger.error(traceback.format_exc())
            traceback.print_exc()
            if not isinstance(e, Warning):
                config_item.config_failed = True
            config_item.config_error = str(e)
            warnings.warn("One test iteration of pipeline failed with error")

        logger.debug("...done with")
        logger.debug(
            json.dumps(config_item.human_readable_config,
                       indent=4,
                       sort_keys=True))

        config_item.computation_end_time = datetime.datetime.now()
        return config_item