Beispiel #1
0
    def _add_mask_to_library(self, mask_name: str = '', target_affine=None, target_shape=None, mask_threshold=0.5):
        # Todo: find solution for multiprocessing spaming

        if mask_name in self.photon_masks.keys():
            original_mask_object = self.photon_masks[mask_name]
        else:
            logger.debug("Checking custom mask")
            original_mask_object = self._check_custom_mask(mask_name)

        mask_object = MaskObject(name=mask_name, mask_file=original_mask_object.mask_file)

        #mask_object.mask = image.threshold_img(mask_object.mask_file, threshold=mask_threshold)
        mask_object.mask = image.math_img('img > {}'.format(mask_threshold), img=mask_object.mask_file)

        if target_affine is not None and target_shape is not None:
            mask_object.mask = self._resample(mask_object.mask, target_affine=target_affine, target_shape=target_shape)

        # check if roi is empty
        if np.sum(mask_object.mask.dataobj != 0) == 0:
            mask_object.is_empty = True
            msg = 'No voxels in mask after resampling (' + mask_object.name + ').'
            logger.error(msg)
            raise ValueError(msg)

        AtlasLibrary.LIBRARY[(mask_object.name, str(target_affine), str(target_shape), str(mask_threshold))] = mask_object
        logger.debug("BrainMask: Done adding mask to library!")
Beispiel #2
0
    def fit_and_score(job: InnerCVJob):

        pipe = job.pipe

        # set params to current config
        pipe.set_params(**job.config)

        # start fitting
        pipe.fit(job.train_data.X, job.train_data.y,
                 **job.train_data.cv_kwargs)

        logger.debug("Scoring Training Data")

        # score test data
        curr_test_fold = InnerFoldManager.score(pipe,
                                                job.test_data.X,
                                                job.test_data.y,
                                                job.metrics,
                                                indices=job.test_data.indices,
                                                **job.test_data.cv_kwargs)

        logger.debug("Scoring Test Data")
        # score train data
        curr_train_fold = InnerFoldManager.score(
            pipe,
            job.train_data.X,
            job.train_data.y,
            job.metrics,
            indices=job.train_data.indices,
            training=True,
            **job.train_data.cv_kwargs)

        return curr_test_fold, curr_train_fold
Beispiel #3
0
    def transform(self, X, y=None, **kwargs):
        """
        Calls transform on every step that offers a transform function
        including the last step if it has the transformer flag,
        and excluding the last step if it has the estimator flag but no transformer flag.

        Returns transformed X, y and kwargs
        """
        if self.single_subject_caching:
            initial_X = np.array(X)
        else:
            initial_X = None

        X, y, kwargs = self._caching_fit_transform(X, y, kwargs)

        if self._final_estimator is not None:
            if self._estimator_type is None:
                if self.caching:
                    X, y, kwargs = self.load_or_save_cached_data(
                        self._final_estimator.name,
                        X,
                        y,
                        kwargs,
                        self._final_estimator,
                        initial_X=initial_X)
                else:
                    logger.debug('PhotonPipeline: Transforming data with ' +
                                 self._final_estimator.name)
                    X, y, kwargs = self._final_estimator.transform(
                        X, y, **kwargs)

        return X, y, kwargs
Beispiel #4
0
    def predict(self, X, training=False, **kwargs):
        """
        Transforms the data for every step that offers a transform function
        and then calls the estimator with predict on transformed data.
        It returns the predictions made.

        In case the last step is no estimator, it returns the transformed data.
        """

        # first transform
        if not training:
            X, _, kwargs = self.transform(X, y=None, **kwargs)

        # then call predict on final estimator
        if self._final_estimator is not None:
            if self._final_estimator.is_estimator:
                logger.debug('PhotonPipeline: Predicting with ' +
                             self._final_estimator.name + ' ...')
                predict_start_time = datetime.datetime.now()
                y_pred = self._final_estimator.predict(X, **kwargs)
                predict_duration = (datetime.datetime.now() -
                                    predict_start_time).total_seconds()
                n = PhotonDataHelper.find_n(X)
                self.time_monitor['predict'].append(
                    (self.elements[-1][0], predict_duration, n))
                return y_pred
            else:
                return X
        else:
            return None
Beispiel #5
0
    def transform(self, X, y=None, **kwargs):

        if self.base_element.cache_folder is not None:
            # make sure we cache individually
            self.base_element.single_subject_caching = True
            self.base_element.caching = True
        if self.nr_of_processes > 1:

            if self.base_element.cache_folder is not None:
                # at first apply the transformation on several cores, everything gets written to the cache,
                # so the next step only has to reload the data ...
                self.apply_transform_parallelized(X)
            else:
                logger.error(
                    "Cannot use parallelization without a cache folder specified in the hyperpipe."
                    "Using single core instead")

            logger.debug("NeuroBranch " + self.name +
                         " is collecting data from the different cores...")
        X_new, _, _ = self.base_element.transform(X)

        # check if we have a list of niftis, should avoid this, except when output_image = True
        if not self.output_img:
            if ((isinstance(X_new, list) and len(X_new) > 0) or
                (isinstance(X_new, np.ndarray)
                 and len(X_new.shape) == 1)) and isinstance(
                     X_new[0], Nifti1Image):
                X_new = np.asarray([i.dataobj for i in X_new])
        return X_new, y, kwargs
Beispiel #6
0
def print_double_metrics(metric_dict_train, metric_dict_test, photon_system_log=True):
    t = PrettyTable(["METRIC", "PERFORMANCE TRAIN", "PERFORMANCE TEST"])
    for m_key, m_value in metric_dict_train.items():
        t.add_row([m_key, "%.4f" % m_value, "%.4f" % metric_dict_test[m_key]])
    if photon_system_log:
        logger.photon_system_log(t)
    else:
        logger.debug(t)
Beispiel #7
0
    def _generate_log(self, conf, subset, result, cost, tracking_vars):
        """
        Generates the log and stores it into the logfile and calculate the incumbent if the constructor-parameter log['incumbent'] was true

        :param conf: used configuration
        :type conf: dict
        :param subset: used subset-fragmentation
        :type subset: int
        :param result: the result of the evaluation
        :type result: float
        :param cost: the cost of the evaluation
        :type cost: float
        :param tracking_vars: Fabolas' tracking vars
        :type tracking_vars: dict
        """
        if self._log is None:
            return

        logger.debug("Fabolas: generating log")
        l = {
            "config": conf,
            "subset_frac": subset,
            "config_result": result,
            "config_cost": cost,
            "iteration": self._it,
            "operation": "init" if self._it < self._n_init else "opt",
        }
        if self._it == self._num_iterations:
            l["operation"] = "final"

        if self._log["incumbents"] and self._it < self._num_iterations:
            start = time()
            if self._it < self._n_init:
                best_i = np.argmin(self._Y)
                l["incumbents"], _, track = self._create_param_dict(
                    (self._X[best_i][:-1], 1), {}
                )
                l["incumbents_estimated_performance"] = -1
                l["incumbents_log"] = track["config_log"]
            else:
                inc, inc_val = self._projected_incumbent_estimation(
                    self._model_objective, self._X[:, :-1]
                )
                l["incumbents"], _, track = self._create_param_dict((inc[:-1], 1), {})
                l["incumbents_estimated_performance"] = inc_val
                l.update({"incumbent_time": time() - start})

        l.update(tracking_vars)

        with open(
            os.path.join(
                self._log["path"], self._log["bn"] + "_it{it}.json".format(it=self._it)
            ),
            "w",
        ) as f:
            json.dump(l, f)
Beispiel #8
0
    def train(self, X, y, do_optimize=True):
        """
        Computes the Cholesky decomposition of the covariance of X and
        estimates the GP hyperparameters by optimizing the marginal
        loglikelihood. The prior mean of the GP is set to the empirical
        mean of X.

        :param X: Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        :type X: np.ndarray (N, D)
        :param y: The corresponding target values.
        :type y: np.ndarray (N,)
        :param do_optimize: If set to true the hyperparameters are optimized otherwise
            the default hyperparameters of the kernel are used.
        :type do_optimize: boolean
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(
                X, self.lower, self.upper)
        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(
                y)
            if self.y_std == 0:
                raise ValueError(
                    "Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the empirical mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)

        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            self.hypers = self.optimize()
            self.gp.kernel[:] = self.hypers[:-1]
            self.noise = np.exp(self.hypers[-1])  # sigma^2
        else:
            self.hypers = self.gp.kernel[:]
            self.hypers = np.append(self.hypers, np.log(self.noise))

        if self.verbose:
            logger.debug("Fabolas.GaussianProcess: GP Hyperparameters: " +
                         str(self.hypers))

        self.gp.compute(self.X, yerr=np.sqrt(self.noise))

        self.is_trained = True
Beispiel #9
0
    def _generate_log(self, conf, subset, result, cost, tracking_vars):
        '''
        Generates the log and stores it into the logfile and calculate the incumbent if the constructor-parameter log['incumbent'] was true

        :param conf: used configuration
        :type conf: dict
        :param subset: used subset-fragmentation
        :type subset: int
        :param result: the result of the evaluation
        :type result: float
        :param cost: the cost of the evaluation
        :type cost: float
        :param tracking_vars: Fabolas' tracking vars
        :type tracking_vars: dict
        '''
        if self._log is None:
            return

        logger.debug("Fabolas: generating log")
        l = {
            'config': conf,
            'subset_frac': subset,
            'config_result': result,
            'config_cost': cost,
            'iteration': self._it,
            'operation': 'init' if self._it < self._n_init else 'opt'
        }
        if self._it == self._num_iterations:
            l['operation'] = 'final'

        if self._log['incumbents'] and self._it < self._num_iterations:
            start = time()
            if self._it < self._n_init:
                best_i = np.argmin(self._Y)
                l['incumbents'], _, track = self._create_param_dict(
                    (self._X[best_i][:-1], 1), {})
                l['incumbents_estimated_performance'] = -1
                l['incumbents_log'] = track['config_log']
            else:
                inc, inc_val = self._projected_incumbent_estimation(
                    self._model_objective, self._X[:, :-1])
                l['incumbents'], _, track = self._create_param_dict(
                    (inc[:-1], 1), {})
                l['incumbents_estimated_performance'] = inc_val
                l.update({'incumbent_time': time() - start})

        l.update(tracking_vars)

        with open(
                os.path.join(
                    self._log['path'],
                    self._log['bn'] + '_it{it}.json'.format(it=self._it)),
                'w') as f:
            json.dump(l, f)
Beispiel #10
0
    def save_data_to_cache(self, pipe_element_name, data):
        cache_query = self.generate_cache_key(pipe_element_name)
        filename = os.path.join(self.cache_folder, str(cache_query) + ".p")
        self.cache_index[cache_query] = filename
        if not self.single_subject_caching:
            logger.debug("Saving data to cache for " + pipe_element_name +
                         ": " + str(self.state.nr_items) + " items " +
                         self.state.first_data_str + " - " +
                         str(self.state.config))

        # write cached data to filesystem
        with open(filename, 'wb') as f:
            joblib.dump(data, f)
Beispiel #11
0
    def ask_generator(self):
        while True:
            self.flag = False
            start_time = time.time()

            X, Y = self.optimizer.rh2EPM.transform(self.optimizer.runhistory)

            self.optimizer.logger.debug("Search for next configuration.")
            # get all configurations sorted according to acquision function
            challengers = self.optimizer.choose_next(X, Y)
            self.test += 1
            print("TEST # of trains", self.test)
            time_spent = time.time() - start_time
            time_left = self.optimizer._get_timebound_for_intensification(time_spent)

            self.to_run = self.intensify(
                challengers=challengers,
                incumbent=self.optimizer.incumbent,
                run_history=self.optimizer.runhistory,
                aggregate_func=self.optimizer.aggregate_func,
                time_bound=max(self.optimizer.intensifier._min_time, time_left),
            )

            if self.flag:
                if self.optimizer.stats.is_budget_exhausted():
                    # yield self.optimizer.incumbent.get_dictionary()
                    return None
                else:
                    yield self.check(self.to_run.get_dictionary())

            else:
                print("Size of challenger list: ", len(self.to_run))
                for challenger in self.to_run[: min(len(self.to_run), 25)]:
                    if self.optimizer.stats.is_budget_exhausted():
                        # yield self.optimizer.incumbent.get_dictionary()
                        return None
                    else:
                        yield self.check(challenger.get_dictionary())

            logger.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (
                    self.optimizer.stats.get_remaing_time_budget(),
                    self.optimizer.stats.get_remaining_ta_budget(),
                    self.optimizer.stats.get_remaining_ta_runs(),
                )
            )

            self.optimizer.stats.print_stats(debug_out=True)
Beispiel #12
0
    def fit(self, X, y=None, **kwargs):

        self._validate_elements()
        X, y, kwargs = self._caching_fit_transform(X, y, kwargs, fit=True)

        if self._final_estimator is not None:
            logger.debug("PhotonPipeline: Fitting " + self._final_estimator.name)
            fit_start_time = datetime.datetime.now()
            if self.random_state:
                self._final_estimator.random_state = self.random_state
            self._final_estimator.fit(X, y, **kwargs)
            #todo after fit final_estimator actions by estimtor go
            n = PhotonDataHelper.find_n(X)
            fit_duration = (datetime.datetime.now() - fit_start_time).total_seconds()
            self.time_monitor["fit"].append((self.elements[-1][0], fit_duration, n))
        return self
Beispiel #13
0
 def _standardize(self, covariates, is_fit):
     logger.debug('Standardizing confounder prior to removal.')
     scaled_covs = list()
     if is_fit:
         # standardize covariates
         for cov in covariates.T:
             self.scalers.append(StandardScaler())
             scaled_covs.append(self.scalers[-1].fit_transform(
                 cov.reshape(-1, 1)).squeeze())
         scaled_covs = np.asarray(scaled_covs).T
     else:
         for i, cov in enumerate(covariates.T):
             scaled_covs.append(self.scalers[i].transform(cov.reshape(
                 -1, 1)).squeeze())
         scaled_covs = np.asarray(scaled_covs).T
     return scaled_covs
Beispiel #14
0
    def transform(self, X, y=None, **kwargs):
        """
        Generates "new samples" by computing the mean between all or n_draws pairs of existing samples and appends them to X
        The target for each new sample is computed as the mean between the constituent targets
        :param X: data
        :param y: targets (optional)
        :param draw_limit: in case the full number of combinations is > 10k, how many to draw?
        :param rand_seed: sets seed for random sampling of combinations (for reproducibility only)
        :return: X_new: X and X_augmented; (y_new: the correspoding targets)
        """

        logger.debug("Pairing " + str(self.draw_limit) + " samples...")

        # ensure class balance in the training set if balance_classes is True
        unique_classes = np.unique(y)
        n_pairs = list()
        for label in unique_classes:
            if self.balance_classes:
                n_pairs.append(self.draw_limit - np.sum(y == label))
            else:
                n_pairs.append(self.draw_limit)

        # run get_samples for each class independently
        X_extended = list()
        y_extended = list()
        kwargs_extended = dict()

        for label, limit in zip(unique_classes, n_pairs):
            X_new_class, y_new_class, kwargs_new_class = self._return_samples(
                X[y == label],
                y[y == label],
                PhotonDataHelper.index_dict(kwargs, y == label),
                generator=self.generator,
                distance_metric=self.distance_metric,
                draw_limit=limit,
                rand_seed=self.random_state,
            )

            X_extended.extend(X_new_class)
            y_extended.extend(y_new_class)

            # get the corresponding kwargs
            if kwargs:
                kwargs_extended = PhotonDataHelper.join_dictionaries(
                    kwargs_extended, kwargs_new_class)

        return X_extended, y_extended, kwargs_extended
Beispiel #15
0
    def load_cached_data(self, pipe_element_name):

        cache_query = self.generate_cache_key(pipe_element_name)
        if cache_query in self.cache_index:
            if not self.single_subject_caching:
                logger.debug("Loading data from cache for " +
                             pipe_element_name + ": " +
                             str(self.state.nr_items) + " items " +
                             self.state.first_data_str + " - " +
                             str(self.state.config))
            filename = self.cache_index[cache_query]
            # lock = Lock(filename)
            # lock.acquire()
            with open(filename, 'rb') as f:
                (X, y, kwargs) = joblib.load(f)

            return X, y, kwargs
        return None
Beispiel #16
0
    def _add_mask_to_library(
        self,
        mask_name: str = "",
        target_affine=None,
        target_shape=None,
        mask_threshold=0.5,
    ):
        # Todo: find solution for multiprocessing spaming
        # print('Adding mask to library: {} - Shape {} - Affine {} - Threshold {}'.format(mask_name,
        #                                                                                      target_shape,
        #                                                                                      target_affine,
        #                                                                                      mask_threshold))

        if mask_name in self.photon_masks.keys():
            original_mask_object = self.photon_masks[mask_name]
        else:
            logger.debug("Checking custom mask")
            original_mask_object = self._check_custom_mask(mask_name)

        mask_object = MaskObject(name=mask_name,
                                 mask_file=original_mask_object.mask_file)

        # mask_object.mask = image.threshold_img(mask_object.mask_file, threshold=mask_threshold)
        mask_object.mask = math_img("img > {}".format(mask_threshold),
                                    img=mask_object.mask_file)

        if target_affine is not None and target_shape is not None:
            mask_object.mask = self._resample(mask_object.mask,
                                              target_affine=target_affine,
                                              target_shape=target_shape)

        # check if roi is empty
        if np.sum(mask_object.mask.dataobj != 0) == 0:
            logger.error("No voxels in mask after resampling (" +
                         mask_object.name + ").")
            mask_object.is_empty = True

        self.library[(
            mask_object.name,
            str(target_affine),
            str(target_shape),
            str(mask_threshold),
        )] = mask_object
        logger.debug("BrainMask: Done adding mask to library!")
Beispiel #17
0
    def save(self):

        if self.output_settings.mongodb_connect_url:
            connect(self.output_settings.mongodb_connect_url,
                    alias="photon_core")
            logger.debug("Write results to mongodb...")
            try:
                self.results.save()
            except DocumentTooLarge as e:
                logger.error(
                    "Could not save document into MongoDB: Document too large")
                # try to reduce the amount of configs saved
                # if len(results_tree.outer_folds[0].tested_config_list) > 100:
                #     for outer_fold in results_tree.outer_folds:
                #         metrics_configs = [outer_fold.tested_configlist

        if self.output_settings.save_output:
            logger.info("Writing results to project folder...")
            self.write_result_tree_to_file()
Beispiel #18
0
    def calc_config(self):
        '''
            Calculates the configurations and the subset-fragmentation to evaluate.
            Implemented as a generator.

            The returned tracking vars are for internal use and need to be passed to process_result.
        :return: next configuration to test, subset-frag to use, tracking-vars
        :rtype: dict, int, dict
        '''
        logger.info('**Fabolas: Starting initialization')
        for self._it in range(0, self._n_init):
            logger.debug('Fabolas: step ' + str(self._it) + ' (init)')
            start = time()
            result = self._init_models()
            tracking = {'overhead_time': time() - start}
            logger.debug(
                'Fabolas: needed {t!s}s'.format(t=tracking['overhead_time']))
            yield self._create_param_dict(result, tracking)

        self._X = np.array(self._X)
        self._Y = np.array(self._Y)
        self._cost = np.array(self._cost)

        logger.info('**Fabolas: Starting optimization')
        for self._it in range(self._n_init, self._num_iterations):
            logger.debug('Fabolas: step ' + str(self._it) + ' (opt)')
            start = time()
            result = self._optimize_config()
            tracking = {'overhead_time': time() - start}
            logger.debug(
                'Fabolas: needed {t!s}s'.format(t=tracking['overhead_time']))
            yield self._create_param_dict(result, tracking)

        logger.info('Fabolas: Final config')
        start = time()
        self._model_objective.train(self._X, self._Y, do_optimize=True)
        result = self.get_incumbent()
        tracking = {'overhead_time': time() - start}
        logger.debug(
            'Fabolas: needed {t!s}s'.format(t=tracking['overhead_time']))
        yield self._create_param_dict(result, tracking)
Beispiel #19
0
    def sample_representer_points(self):
        self.sampling_acquisition.update(self.model)

        for i in range(5):
            restarts = np.zeros((self.Nb, self.D))
            restarts[0:self.Nb, ] = self.lower + (self.upper - self.lower) \
                                                 * self.rng.uniform(size=(self.Nb, self.D))
            sampler = emcee.EnsembleSampler(self.Nb, self.D,
                                            self.sampling_acquisition_wrapper)
            # zb are the representer points and lmb are their log EI values
            self.zb, self.lmb, _ = sampler.run_mcmc(restarts, 50)
            if not np.any(np.isinf(self.lmb)):
                break
            else:
                if self.verbose:
                    logger.debug("Fabolas.InformationGain: Infinity")

        if len(self.zb.shape) == 1:
            self.zb = self.zb[:, None]
        if len(self.lmb.shape) == 1:
            self.lmb = self.lmb[:, None]
Beispiel #20
0
    def transform(self, X, y=None, **kwargs):
        logger.debug('Regress out confounder.')
        sample_ols_confounder = self._check_for_confounders(kwargs)
        self._validate_dimension(X, sample_ols_confounder)

        # standardize covariates
        if self.standardize_covariates:
            sample_ols_confounder = self._standardize(sample_ols_confounder,
                                                      is_fit=False)

        sample_ols_confounder = sm.add_constant(sample_ols_confounder)
        X_new = np.empty(X.shape)
        for i in range(X.shape[1]):
            preds = np.matmul(sample_ols_confounder,
                              np.squeeze(self.olsModel_params[i]))
            residuum_feature_vector = np.squeeze(X[:, i]) - preds
            # residuum_feature_vector += self.olsModel_params[i, 0]  # add intercept
            X_new[:, i] = np.asarray(
                residuum_feature_vector
            )  # writing back the residuum of the feature vector
        return X_new, kwargs
Beispiel #21
0
    def _do_timed_fit_transform(self, name, transformer, fit, X, y, **kwargs):

        n = PhotonDataHelper.find_n(X)
        if self.random_state:
            transformer.random_state = self.random_state

        if fit:
            logger.debug("PhotonPipeline: Fitting " + transformer.name)
            fit_start_time = datetime.datetime.now()
            transformer.fit(X, y, **kwargs)
            fit_duration = (datetime.datetime.now() - fit_start_time).total_seconds()
            self.time_monitor["fit"].append((name, fit_duration, n))

        logger.debug("PhotonPipeline: Transforming data with " + transformer.name)
        transform_start_time = datetime.datetime.now()
        X, y, kwargs = transformer.transform(X, y, **kwargs)
        transform_duration = (
            datetime.datetime.now() - transform_start_time
        ).total_seconds()
        self.time_monitor["transform_computed"].append((name, transform_duration, n))
        return X, y, kwargs
Beispiel #22
0
    def sample_representer_points(self):
        # Sample representer points only in the
        # configuration space by setting all environmental
        # variables to 1
        D = np.where(self.is_env == 0)[0].shape[0]

        lower = self.lower[np.where(self.is_env == 0)]
        upper = self.upper[np.where(self.is_env == 0)]

        self.sampling_acquisition.update(self.model)

        for i in range(5):
            restarts = np.random.uniform(low=lower,
                                         high=upper,
                                         size=(self.Nb, D))
            sampler = emcee.EnsembleSampler(self.Nb, D,
                                            self.sampling_acquisition_wrapper)

            self.zb, self.lmb, _ = sampler.run_mcmc(restarts, 50)
            if not np.any(np.isinf(self.lmb)):
                break
            else:
                if self.verbose:
                    logger.debug(
                        "Fabolas.InformationGainPerUnitCost: Infinity")
        if np.any(np.isinf(self.lmb)):
            raise ValueError(
                "Could not sample valid representer points! LogEI is -infinity"
            )
        if len(self.zb.shape) == 1:
            self.zb = self.zb[:, None]
        if len(self.lmb.shape) == 1:
            self.lmb = self.lmb[:, None]

        # Project representer points to subspace
        proj = np.ones(
            [self.zb.shape[0], self.upper[self.is_env == 1].shape[0]])
        proj *= self.upper[self.is_env == 1].shape[0]
        self.zb = np.concatenate((self.zb, proj), axis=1)
Beispiel #23
0
    def _optimize_config(self):
        """
        Train models and calculate the configuration and subset-fragmentation in the optimization-phase

        :return: configuration, subset-fragmentation
        :rtype: list, int
        """
        # Train models
        logger.debug("Fabolas: Train model_objective")
        self._model_objective.train(self._X, self._Y, do_optimize=True)
        logger.debug("Fabolas: Train model_cost")
        self._model_cost.train(self._X, self._cost, do_optimize=True)

        # Maximize acquisition function
        logger.debug("Fabolas: Update acquisition func")
        self._acquisition_func.update(self._model_objective, self._model_cost)
        logger.debug("Fabolas: Generate new config by maximizing")
        new_x = self._maximizer.maximize()

        s = self._s_max / self._retransform(new_x[-1])
        logger.debug("Fabolas: config generation done for this step")

        return new_x[:-1], int(s)
Beispiel #24
0
    def load_or_save_cached_data(self,
                                 name,
                                 X,
                                 y,
                                 kwargs,
                                 transformer,
                                 fit=False,
                                 needed_for_further_computation=False,
                                 initial_X=None):
        if not self.single_subject_caching:
            # if we do it group-wise then its easy
            if self.skip_loading and not needed_for_further_computation:
                # check if data is already calculated
                if self.cache_man.check_cache(name):
                    # if so, do nothing
                    return X, y, kwargs
                else:
                    # otherwise, do the calculation and save it
                    cached_result = None
            else:
                start_time_for_loading = datetime.datetime.now()
                cached_result = self.cache_man.load_cached_data(name)

            if cached_result is None:
                X, y, kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X, y, **kwargs)

                start_time_saving = datetime.datetime.now()
                self.cache_man.save_data_to_cache(name, (X, y, kwargs))
                saving_duration = (datetime.datetime.now() -
                                   start_time_saving).total_seconds()
                self.time_monitor['transform_cached'].append(
                    (name, saving_duration, 1))
            else:
                X, y, kwargs = cached_result[0], cached_result[
                    1], cached_result[2]
                loading_duration = (datetime.datetime.now() -
                                    start_time_for_loading).total_seconds()
                n = PhotonDataHelper.find_n(X)
                self.time_monitor['transform_cached'].append(
                    (name, loading_duration, n))
            return X, y, kwargs
        else:
            # if we do it subject-wise we need to iterate and collect the results
            processed_X, processed_y, processed_kwargs = list(), list(), dict()
            X_uncached, y_uncached, kwargs_uncached, initial_X_uncached = list(
            ), list(), dict(), list()
            list_of_idx_cached, list_of_idx_non_cached = list(), list()

            nr = PhotonDataHelper.find_n(X)
            for start, stop in PhotonDataHelper.chunker(nr, 1):
                # split data in single entities, find key from first element = PATH to file
                X_key, _, _ = PhotonDataHelper.split_data(
                    initial_X, None, {}, start, stop)
                X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                    X, y, kwargs, start, stop)
                self.cache_man.update_single_subject_state_info(X_key)

                # check if item has been processed
                if self.cache_man.check_cache(name):
                    list_of_idx_cached.append(start)
                else:
                    list_of_idx_non_cached.append(start)
                    X_uncached = PhotonDataHelper.stack_data_vertically(
                        X_uncached, X_batched)
                    y_uncached = PhotonDataHelper.stack_data_vertically(
                        y_uncached, y_batched)
                    initial_X_uncached = PhotonDataHelper.stack_data_vertically(
                        initial_X_uncached, X_key)
                    kwargs_uncached = PhotonDataHelper.join_dictionaries(
                        kwargs_uncached, kwargs_dict_batched)

            # now we know which part can be loaded and which part should be transformed
            # first apply the transformation to the group, then save it single-subject-wise
            if len(list_of_idx_non_cached) > 0:

                # apply transformation groupwise
                new_group_X, new_group_y, new_group_kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X_uncached, y_uncached,
                    **kwargs_uncached)

                # then save it single
                nr = PhotonDataHelper.find_n(new_group_X)
                for start in range(nr):
                    # split data in single entities
                    X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                        new_group_X, new_group_y, new_group_kwargs, start,
                        start)
                    X_key, _, _ = PhotonDataHelper.split_data(
                        initial_X_uncached, None, {}, start, start)
                    # we save the data in relation to the input path (X_key = hash(input X))
                    self.cache_man.update_single_subject_state_info(X_key)

                    start_time_saving = datetime.datetime.now()
                    self.cache_man.save_data_to_cache(
                        name, (X_batched, y_batched, kwargs_dict_batched))
                    saving_duration = (datetime.datetime.now() -
                                       start_time_saving).total_seconds()
                    self.time_monitor['transform_cached'].append(
                        (name, saving_duration, 1))

                # we need to collect the data only when we want to load them
                # we can skip that process if we only want them to get into the cache (case: parallelisation)
                if not self.skip_loading or needed_for_further_computation:
                    # stack results
                    processed_X, processed_y, processed_kwargs = new_group_X, new_group_y, new_group_kwargs

            # afterwards load everything that has been cached
            if len(list_of_idx_cached) > 0:
                if not self.skip_loading or needed_for_further_computation:
                    for cache_idx in list_of_idx_cached:
                        # we identify the data according to the input path (X before any transformation)
                        self.cache_man.update_single_subject_state_info(
                            [initial_X[cache_idx]])

                        # time the loading of the cached item
                        start_time_for_loading = datetime.datetime.now()
                        transformed_X, transformed_y, transformed_kwargs = self.cache_man.load_cached_data(
                            name)
                        loading_duration = (
                            datetime.datetime.now() -
                            start_time_for_loading).total_seconds()
                        self.time_monitor['transform_cached'].append(
                            (name, loading_duration,
                             PhotonDataHelper.find_n(X)))

                        processed_X, processed_y, processed_kwargs = PhotonDataHelper.join_data(
                            processed_X, transformed_X, processed_y,
                            transformed_y, processed_kwargs,
                            transformed_kwargs)

            logger.debug(name + " loaded " + str(len(list_of_idx_cached)) +
                         " items from cache and computed " +
                         str(len(list_of_idx_non_cached)))
            if not self.skip_loading or needed_for_further_computation:
                # now sort the data in the correct order again
                processed_X, processed_y, processed_kwargs = PhotonDataHelper.resort_splitted_data(
                    processed_X, processed_y, processed_kwargs,
                    PhotonDataHelper.stack_data_vertically(
                        list_of_idx_cached, list_of_idx_non_cached))

            return processed_X, processed_y, processed_kwargs
Beispiel #25
0
    def fit(self, X, y, **kwargs):
        """
        Iterates over cross-validation folds and trains the pipeline, then uses it for predictions.
        Calculates metrics per fold and averages them over fold.
        :param X: Training and test data
        :param y: Training and test targets
        :returns: configuration class for result tree that monitors training and test performance
        """

        # needed for testing Timeboxed Random Grid Search
        # time.sleep(35)

        config_item = MDBConfig()
        config_item.config_dict = self.params
        config_item.inner_folds = []
        config_item.metrics_test = []
        config_item.metrics_train = []
        config_item.computation_start_time = datetime.datetime.now()

        try:
            # do inner cv
            for idx, (inner_fold_id, inner_fold) in enumerate(
                    self.cross_validation_infos.inner_folds[
                        self.outer_fold_id].items()):

                train, test = inner_fold.train_indices, inner_fold.test_indices

                # split kwargs according to cross validation
                train_X, train_y, kwargs_cv_train = PhotonDataHelper.split_data(
                    X, y, kwargs, indices=train)
                test_X, test_y, kwargs_cv_test = PhotonDataHelper.split_data(
                    X, y, kwargs, indices=test)

                new_pipe = self.pipe()
                if self.cache_folder is not None and self.cache_updater is not None:
                    self.cache_updater(new_pipe, self.cache_folder,
                                       inner_fold_id)

                if not config_item.human_readable_config:
                    config_item.human_readable_config = PhotonPrintHelper.config_to_human_readable_dict(
                        new_pipe, self.params)
                    logger.clean_info(
                        json.dumps(config_item.human_readable_config,
                                   indent=4,
                                   sort_keys=True))

                job_data = InnerFoldManager.InnerCVJob(
                    pipe=new_pipe,
                    config=dict(self.params),
                    metrics=self.optimization_infos.metrics,
                    callbacks=self.optimization_constraints,
                    train_data=InnerFoldManager.JobData(
                        train_X, train_y, train, kwargs_cv_train),
                    test_data=InnerFoldManager.JobData(test_X, test_y, test,
                                                       kwargs_cv_test),
                )

                # only for unparallel processing
                # inform children in which inner fold we are
                # self.pipe.distribute_cv_info_to_hyperpipe_children(inner_fold_counter=fold_cnt)
                # self.mother_inner_fold_handle(fold_cnt)

                # --> write that output in InnerFoldManager!
                # logger.debug(config_item.human_readable_config)
                fold_nr = idx + 1
                logger.debug("calculating inner fold " + str(fold_nr) + "...")

                curr_test_fold, curr_train_fold = InnerFoldManager.fit_and_score(
                    job_data)
                logger.debug("Performance inner fold " + str(fold_nr))
                print_double_metrics(
                    curr_train_fold.metrics,
                    curr_test_fold.metrics,
                    photon_system_log=False,
                )

                durations = job_data.pipe.time_monitor

                self.update_config_item_with_inner_fold(
                    config_item=config_item,
                    fold_cnt=fold_nr,
                    curr_train_fold=curr_train_fold,
                    curr_test_fold=curr_test_fold,
                    time_monitor=durations,
                    feature_importances=new_pipe.feature_importances_,
                )

                if isinstance(self.optimization_constraints, list):
                    break_cv = 0
                    for cf in self.optimization_constraints:
                        if not cf.shall_continue(config_item):
                            logger.info(
                                "Skipped further cross validation after fold "
                                + str(fold_nr) +
                                " due to performance constraints in " +
                                cf.metric)
                            break_cv += 1
                            break
                    if break_cv > 0:
                        break
                elif self.optimization_constraints is not None:
                    if not self.optimization_constraints.shall_continue(
                            config_item):
                        logger.info(
                            "Skipped further cross validation after fold " +
                            str(fold_nr) +
                            " due to performance constraints in " + cf.metric)
                        break

            InnerFoldManager.process_fit_results(
                config_item,
                self.cross_validation_infos.calculate_metrics_across_folds,
                self.cross_validation_infos.calculate_metrics_per_fold,
                self.optimization_infos.metrics,
            )

        except Exception as e:
            if self.raise_error:
                raise e
            logger.error(e)
            logger.error(traceback.format_exc())
            traceback.print_exc()
            if not isinstance(e, Warning):
                config_item.config_failed = True
            config_item.config_error = str(e)
            warnings.warn("One test iteration of pipeline failed with error")

        logger.debug("...done with")
        logger.debug(
            json.dumps(config_item.human_readable_config,
                       indent=4,
                       sort_keys=True))

        config_item.computation_end_time = datetime.datetime.now()
        return config_item
Beispiel #26
0
    def transform(self, X, y=None, **kwargs):
        """

        :param X: input data
        :param y: targets
        :param kwargs:
        :return: roi_data: np.ndarray, ROIs data for given brain atlas in concat or list form.
        """

        X, n_subjects = NiftiConverter.transform(X)

        if self.collection_mode == 'list' or self.collection_mode == 'concat':
            collection_mode = self.collection_mode
        else:
            msg = "Collection mode {} not supported. Use 'list' or 'concat' instead." +\
                           "Falling back to concat mode.".format(self.collection_mode)
            logger.error(msg)
            raise ValueError(msg)

        # 1. validate if all X are in the same space and have the same voxelsize and have the same orientation

        # get ROI mask
        self.affine, self.shape = BrainMask.get_format_info_from_first_image(X)
        atlas_obj = AtlasLibrary().get_atlas(self.atlas_name, self.affine, self.shape, self.mask_threshold)
        roi_objects = self._get_rois(atlas_obj, which_rois=self.rois, background_id=self.background_id)

        roi_data = [list() for i in range(n_subjects)]
        roi_data_concat = list()
        t1 = time.time()

        # convert to series and C ordering since this will speed up the masking process
        series = _utils.as_ndarray(_utils.niimg._safe_get_data(X), dtype='float32', order="C", copy=True)
        mask_indices = list()

        # calculate roi_data for every ROI object by looping
        for i, roi in enumerate(roi_objects):
            self.roi_allocation[roi.label] = i

            logger.debug("Extracting ROI {}".format(roi.label))
            # simply call apply_mask to extract one roi
            extraction = self.apply_mask(series, roi.mask)
            if collection_mode == 'list':
                for sub_i in range(extraction.shape[0]):
                    roi_data[sub_i].append(extraction[sub_i])
                mask_indices.append(i)
            else:
                roi_data_concat.append(extraction)
                mask_indices.append(np.ones(extraction[0].size) * i)

        if self.collection_mode == 'concat':
            if n_subjects > 1:
                roi_data = np.concatenate(roi_data_concat, axis=1)
                self.mask_indices = np.concatenate(mask_indices)
            else:
                roi_data = np.array(roi_data_concat)
                self.mask_indices = mask_indices
        else:
            self.mask_indices = mask_indices

        elapsed_time = time.time() - t1
        logger.debug("Time for extracting {} ROIs in {} subjects: {} seconds".format(len(roi_objects),
                                                                                     n_subjects, elapsed_time))
        return roi_data
Beispiel #27
0
    def transform(self, X, y=None, **kwargs):

        if len(X) < 1:
            raise Exception("Brain Atlas: Did not get any data in parameter X")

        if self.collection_mode == "list" or self.collection_mode == "concat":
            collection_mode = self.collection_mode
        else:
            collection_mode = "concat"
            logger.error(
                "Collection mode {} not supported. Use 'list' or 'concat' instead."
                "Falling back to concat mode.".format(self.collection_mode))

        # 1. validate if all X are in the same space and have the same voxelsize and have the same orientation

        # 2. load sample data to get target affine and target shape to adapt the brain atlas

        self.affine, self.shape = BrainMask.get_format_info_from_first_image(X)

        # load all niftis to memory
        if isinstance(X, list):
            n_subjects = len(X)
            X = image.load_img(X)
        elif isinstance(X, str):
            n_subjects = 1
            X = image.load_img(X)
        elif isinstance(X, np.ndarray):
            n_subjects = X.shape[0]
            X = image.load_img(X)
        else:
            n_subjects = X.shape[-1]

        # get ROI mask
        atlas_obj = AtlasLibrary().get_atlas(self.atlas_name, self.affine,
                                             self.shape, self.mask_threshold)
        roi_objects = self._get_rois(atlas_obj,
                                     which_rois=self.rois,
                                     background_id=self.background_id)

        roi_data = [list() for i in range(n_subjects)]
        roi_data_concat = list()
        t1 = time.time()

        # convert to series and C ordering since this will speed up the masking process
        series = _utils.as_ndarray(_safe_get_data(X),
                                   dtype="float32",
                                   order="C",
                                   copy=True)
        mask_indices = list()

        for i, roi in enumerate(roi_objects):
            logger.debug("Extracting ROI {}".format(roi.label))
            # simply call apply_mask to extract one roi
            extraction = self.apply_mask(series, roi.mask)
            if collection_mode == "list":
                for sub_i in range(extraction.shape[0]):
                    roi_data[sub_i].append(extraction[sub_i])
                mask_indices.append(i)
            else:
                roi_data_concat.append(extraction)
                mask_indices.append(np.ones(extraction[0].size) * i)

        if self.collection_mode == "concat":
            roi_data = np.concatenate(roi_data_concat, axis=1)
            self.mask_indices = np.concatenate(mask_indices)
        else:
            self.mask_indices = mask_indices

        elapsed_time = time.time() - t1
        logger.debug(
            "Time for extracting {} ROIs in {} subjects: {} seconds".format(
                len(roi_objects), n_subjects, elapsed_time))
        return roi_data
Beispiel #28
0
    def fit(self, X, y=None, **kwargs):
        logger.photon_system_log('')
        logger.photon_system_log(
            '***************************************************************************************************************'
        )
        logger.photon_system_log('Outer Cross validation Fold {}'.format(
            self.cross_validaton_info.outer_folds[self.outer_fold_id].fold_nr))
        logger.photon_system_log(
            '***************************************************************************************************************'
        )

        self._prepare_data(X, y, **kwargs)
        self._fit_dummy()
        self._generate_inner_folds()
        self._prepare_optimization()

        outer_fold_fit_start_time = datetime.datetime.now()
        self.best_metric_yet = None
        self.tested_config_counter = 0

        # distribute number of folds to encapsulated child hyperpipes
        # self.__distribute_cv_info_to_hyperpipe_children(num_of_folds=num_folds,
        #                                                 outer_fold_counter=outer_fold_counter)

        if self.cross_validaton_info.calculate_metrics_per_fold:
            self.fold_operation = FoldOperations.MEAN
        else:
            self.fold_operation = FoldOperations.RAW

        self.max_nr_of_configs = ''
        if hasattr(self.optimizer, 'n_configurations'):
            self.max_nr_of_configs = str(self.optimizer.n_configurations)

        if isinstance(self.optimizer, PhotonMasterOptimizer):
            self.optimizer.optimize()
        else:
            # do the optimizing
            for current_config in self.optimizer.ask:
                self.objective_function(current_config)

        logger.clean_info(
            '---------------------------------------------------------------------------------------------------------------'
        )
        logger.info(
            'Hyperparameter Optimization finished. Now finding best configuration .... '
        )
        print(self.tested_config_counter)
        # now go on with the best config found
        if self.tested_config_counter > 0:
            best_config_outer_fold = self.optimization_info.get_optimum_config(
                self.result_object.tested_config_list, self.fold_operation)

            if not best_config_outer_fold:
                raise Exception("No best config was found!")

            # ... and create optimal pipeline
            optimum_pipe = self.copy_pipe_fnc()
            if self.cache_updater is not None:
                self.cache_updater(optimum_pipe, self.cache_folder,
                                   "fixed_fold_id")
            optimum_pipe.caching = False
            # set self to best config
            optimum_pipe.set_params(**best_config_outer_fold.config_dict)

            # Todo: set all children to best config and inform to NOT optimize again, ONLY fit
            # for child_name, child_config in best_config_outer_fold_mdb.children_config_dict.items():
            #     if child_config:
            #         # in case we have a pipeline stacking we need to identify the particular subhyperpipe
            #         splitted_name = child_name.split('__')
            #         if len(splitted_name) > 1:
            #             stacking_element = self.optimum_pipe.named_steps[splitted_name[0]]
            #             pipe_element = stacking_element.elements[splitted_name[1]]
            #         else:
            #             pipe_element = self.optimum_pipe.named_steps[child_name]
            #         pipe_element.set_params(**child_config)
            #         pipe_element.is_final_fit = True

            # self.__distribute_cv_info_to_hyperpipe_children(reset=True)

            logger.debug(
                'Fitting model with best configuration of outer fold...')
            optimum_pipe.fit(self._validation_X, self._validation_y,
                             **self._validation_kwargs)

            self.result_object.best_config = best_config_outer_fold

            # save test performance
            best_config_performance_mdb = MDBInnerFold()
            best_config_performance_mdb.fold_nr = -99
            best_config_performance_mdb.number_samples_training = self._validation_y.shape[
                0]
            best_config_performance_mdb.number_samples_validation = self._test_y.shape[
                0]
            best_config_performance_mdb.feature_importances = optimum_pipe.feature_importances_

            if self.cross_validaton_info.eval_final_performance:
                # Todo: generate mean and std over outer folds as well. move this items to the top
                logger.info(
                    'Calculating best model performance on test set...')

                logger.debug('...scoring test data')
                test_score_mdb = InnerFoldManager.score(
                    optimum_pipe,
                    self._test_X,
                    self._test_y,
                    indices=self.cross_validaton_info.outer_folds[
                        self.outer_fold_id].test_indices,
                    metrics=self.optimization_info.metrics,
                    **self._test_kwargs)

                logger.debug('... scoring training data')

                train_score_mdb = InnerFoldManager.score(
                    optimum_pipe,
                    self._validation_X,
                    self._validation_y,
                    indices=self.cross_validaton_info.outer_folds[
                        self.outer_fold_id].train_indices,
                    metrics=self.optimization_info.metrics,
                    training=True,
                    **self._validation_kwargs)

                best_config_performance_mdb.training = train_score_mdb
                best_config_performance_mdb.validation = test_score_mdb

                print_double_metrics(train_score_mdb.metrics,
                                     test_score_mdb.metrics)
            else:

                def _copy_inner_fold_means(metric_dict):
                    # We copy all mean values from validation to the best config
                    # training
                    train_item_metrics = {}
                    for m in metric_dict:
                        if m.operation == str(self.fold_operation):
                            train_item_metrics[m.metric_name] = m.value
                    train_item = MDBScoreInformation()
                    train_item.metrics_copied_from_inner = True
                    train_item.metrics = train_item_metrics
                    return train_item

                # training
                best_config_performance_mdb.training = _copy_inner_fold_means(
                    best_config_outer_fold.metrics_train)
                # validation
                best_config_performance_mdb.validation = _copy_inner_fold_means(
                    best_config_outer_fold.metrics_test)

            # write best config performance to best config item
            self.result_object.best_config.best_config_score = best_config_performance_mdb

        logger.info('Computations in outer fold {} took {} minutes.'.format(
            self.cross_validaton_info.outer_folds[self.outer_fold_id].fold_nr,
            (datetime.datetime.now() -
             outer_fold_fit_start_time).total_seconds() / 60))
Beispiel #29
0
    def _add_atlas_to_library(self,
                              atlas_name,
                              target_affine=None,
                              target_shape=None,
                              mask_threshold=None):
        # Todo: find solution for multiprocessing spaming
        # print('Adding atlas to library: {} - Shape {} - Affine {} - Threshold {}'.format(atlas_name,
        #                                                                                        target_shape,
        #                                                                                        target_affine,
        #                                                                                        mask_threshold))

        # load atlas object from photon_atlasses
        if atlas_name in self.photon_atlases.keys():
            original_atlas_object = self.photon_atlases[atlas_name]
        else:
            logger.debug("Checking custom atlas")
            original_atlas_object = self._check_custom_atlas(atlas_name)

        # now create new atlas object with different affine, shape and mask_threshold
        atlas_object = AtlasObject(
            name=original_atlas_object.name,
            path=original_atlas_object.path,
            labels_file=original_atlas_object.labels_file,
            mask_threshold=mask_threshold,
            affine=target_affine,
            shape=target_shape,
        )

        # load atlas
        img = image.load_img(atlas_object.path)
        resampled_img = self._resample(img,
                                       target_affine=target_affine,
                                       target_shape=target_shape)
        atlas_object.atlas = resampled_img
        atlas_object.map = np.asarray(atlas_object.atlas.get_data())

        # apply mask threshold
        if mask_threshold is not None:
            atlas_object.map[atlas_object.map < mask_threshold] = 0
            atlas_object.map = atlas_object.map.astype(int)

        # now get indices
        atlas_object.indices = list(np.unique(atlas_object.map))

        # check labels
        if Path(atlas_object.labels_file).is_file(
        ):  # if we have a file with indices and labels
            labels = pd.read_table(atlas_object.labels_file, header=None)
            labels_dict = pd.Series(labels.iloc[:, 1].values,
                                    index=labels.iloc[:, 0]).to_dict()

            # check if background has been defined in labels.txt
            if 0 not in labels_dict.keys() and 0 in atlas_object.indices:
                # add 0 as background
                labels_dict[0] = "Background"

            # check if map indices correspond with indices in the labels file
            if not sorted(atlas_object.indices) == sorted(
                    list(labels_dict.keys())):
                logger.error("""
                The indices in map image ARE NOT the same as those in your *_labels.txt! Ignoring *_labels.txt.
                MapImage: 
                {}
                File:
                {}
                """.format(str(sorted(self.indices)),
                           str(sorted(list(labels_dict.keys())))))

                atlas_object.roi_list = [
                    RoiObject(index=i,
                              label=str(i),
                              size=np.sum(i == atlas_object.map))
                    for i in atlas_object.indices
                ]
            else:
                for i in range(len(atlas_object.indices)):
                    roi_index = atlas_object.indices[i]
                    new_roi = RoiObject(
                        index=roi_index,
                        label=labels_dict[roi_index].replace("\n", ""),
                        size=np.sum(roi_index == atlas_object.map),
                    )
                    atlas_object.roi_list.append(new_roi)

        else:  # if we don't have a labels file, we just use str(indices) as labels
            atlas_object.roi_list = [
                RoiObject(index=i,
                          label=str(i),
                          size=np.sum(i == atlas_object.map))
                for i in atlas_object.indices
            ]

        # check for empty ROIs and create roi mask
        for roi in atlas_object.roi_list:

            if roi.size == 0:
                continue

            roi.mask = image.new_img_like(atlas_object.path,
                                          atlas_object.map == roi.index)

            # check if roi is empty
            if np.sum(roi.mask.dataobj != 0) == 0:
                roi.is_empty = True

        # finally add atlas to atlas library
        self.library[(atlas_name, str(target_affine), str(target_shape),
                      str(mask_threshold))] = atlas_object
        logger.debug("BrainAtlas: Done adding atlas to library!")
Beispiel #30
0
    def objective_function(self, current_config):
        if current_config is None:
            return
        logger.clean_info(
            '---------------------------------------------------------------------------------------------------------------'
        )
        self.tested_config_counter += 1

        if hasattr(self.optimizer, 'ask_for_pipe'):
            pipe_ctor = self.optimizer.ask_for_pipe()
        else:
            pipe_ctor = self.copy_pipe_fnc

        # self.__distribute_cv_info_to_hyperpipe_children(reset=True, config_counter=tested_config_counter)

        hp = InnerFoldManager(pipe_ctor,
                              current_config,
                              self.optimization_info,
                              self.cross_validaton_info,
                              self.outer_fold_id,
                              self.constraint_objects,
                              cache_folder=self.cache_folder,
                              cache_updater=self.cache_updater)

        # Test the configuration cross validated by inner_cv object
        current_config_mdb = hp.fit(self._validation_X, self._validation_y,
                                    **self._validation_kwargs)
        current_config_mdb.config_nr = self.tested_config_counter

        if not current_config_mdb.config_failed:
            metric_train = MDBHelper.get_metric(
                current_config_mdb, self.fold_operation,
                self.optimization_info.best_config_metric)
            metric_test = MDBHelper.get_metric(
                current_config_mdb,
                self.fold_operation,
                self.optimization_info.best_config_metric,
                train=False)

            if metric_train is None or metric_test is None:
                raise Exception(
                    "Config did not fail, but did not get any metrics either....!!?"
                )
            config_performance = (metric_train, metric_test)
            if self.best_metric_yet is None:
                self.best_metric_yet = config_performance
                self.current_best_config = current_config_mdb
            else:
                # check if we have the next superstar around that exceeds any old performance
                if self.optimization_info.maximize_metric:
                    if metric_test > self.best_metric_yet[1]:
                        self.best_metric_yet = config_performance
                        self.current_best_config.save_memory()
                        self.current_best_config = current_config_mdb
                    else:
                        current_config_mdb.save_memory()
                else:
                    if metric_test < self.best_metric_yet[1]:
                        self.best_metric_yet = config_performance
                        self.current_best_config.save_memory()
                        self.current_best_config = current_config_mdb
                    else:
                        current_config_mdb.save_memory()

            # Print Result for config
            computation_duration = current_config_mdb.computation_end_time - current_config_mdb.computation_start_time
            logger.info('Computed configuration ' +
                        str(self.tested_config_counter) + "/" +
                        self.max_nr_of_configs + " in " +
                        str(computation_duration))
            logger.info("Performance:             " +
                        self.optimization_info.best_config_metric +
                        " - Train: " + "%.4f" % config_performance[0] +
                        ", Validation: " + "%.4f" % config_performance[1])
            logger.info("Best Performance So Far: " +
                        self.optimization_info.best_config_metric +
                        " - Train: " + "%.4f" % self.best_metric_yet[0] +
                        ", Validation: " + "%.4f" % self.best_metric_yet[1])
        else:
            config_performance = (-1, -1)
            # Print Result for config
            logger.debug('...failed:')
            logger.error(current_config_mdb.config_error)

        # add config to result tree
        self.result_object.tested_config_list.append(current_config_mdb)

        # 3. inform optimizer about performance
        logger.debug(
            "Telling hyperparameter optimizer about recent performance.")
        if isinstance(self.optimizer, PhotonSlaveOptimizer):
            self.optimizer.tell(current_config, config_performance)
        logger.debug("Asking hyperparameter optimizer for new config.")

        if self.optimization_info.maximize_metric:
            return 1 - config_performance[1]
        else:
            return config_performance[1]