Python PhotonDataHelper.chunker Beispiele

Programmiersprache: Python

Namespace / Paketname: photonai.helper.helper

Klasse / Typ: PhotonDataHelper

Methode / Funktion: chunker

Beispiele auf hotexamples.com: 2

Python PhotonDataHelper.chunker - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die photonai.helper.helper.PhotonDataHelper.chunker, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

split_data(9)

find_n(5)

join_dictionaries(3)

stack_data_horizontally(3)

chunker(2)

index_dict(2)

join_data(2)

resort_splitted_data(2)

stack_data_vertically(2)

Beispiel #1

Datei anzeigen

Datei: neuro_branch.py Projekt: nkourkou/photon

    def apply_transform_parallelized(self, X):
        """

        :param X: the data to which the delegate should be applied in parallel
        """

        if self.nr_of_processes > 1:

            jobs_to_do = list()

            # distribute the data equally to all available cores
            number_of_items_to_process = PhotonDataHelper.find_n(X)
            number_of_items_for_each_core = int(
                np.ceil(number_of_items_to_process / self.nr_of_processes))
            logger.info("NeuroBranch " + self.name + ": Using " +
                        str(self.nr_of_processes) + " cores calculating " +
                        str(number_of_items_for_each_core) + " items each")
            for start, stop in PhotonDataHelper.chunker(
                    number_of_items_to_process, number_of_items_for_each_core):
                X_batched, _, _ = PhotonDataHelper.split_data(
                    X, None, {}, start, stop)

                # copy my pipeline
                new_pipe_mr = self.copy_me()
                new_pipe_copy = new_pipe_mr.base_element
                new_pipe_copy.cache_folder = self.base_element.cache_folder
                new_pipe_copy.skip_loading = True
                new_pipe_copy._parallel_use = True

                del_job = dask.delayed(NeuroBranch.parallel_application)(
                    new_pipe_copy, X_batched)
                jobs_to_do.append(del_job)

            dask.compute(*jobs_to_do)

Beispiel #2

Datei anzeigen

    def load_or_save_cached_data(self,
                                 name,
                                 X,
                                 y,
                                 kwargs,
                                 transformer,
                                 fit=False,
                                 needed_for_further_computation=False,
                                 initial_X=None):
        if not self.single_subject_caching:
            # if we do it group-wise then its easy
            if self.skip_loading and not needed_for_further_computation:
                # check if data is already calculated
                if self.cache_man.check_cache(name):
                    # if so, do nothing
                    return X, y, kwargs
                else:
                    # otherwise, do the calculation and save it
                    cached_result = None
            else:
                start_time_for_loading = datetime.datetime.now()
                cached_result = self.cache_man.load_cached_data(name)

            if cached_result is None:
                X, y, kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X, y, **kwargs)

                start_time_saving = datetime.datetime.now()
                self.cache_man.save_data_to_cache(name, (X, y, kwargs))
                saving_duration = (datetime.datetime.now() -
                                   start_time_saving).total_seconds()
                self.time_monitor['transform_cached'].append(
                    (name, saving_duration, 1))
            else:
                X, y, kwargs = cached_result[0], cached_result[
                    1], cached_result[2]
                loading_duration = (datetime.datetime.now() -
                                    start_time_for_loading).total_seconds()
                n = PhotonDataHelper.find_n(X)
                self.time_monitor['transform_cached'].append(
                    (name, loading_duration, n))
            return X, y, kwargs
        else:
            # if we do it subject-wise we need to iterate and collect the results
            processed_X, processed_y, processed_kwargs = list(), list(), dict()
            X_uncached, y_uncached, kwargs_uncached, initial_X_uncached = list(
            ), list(), dict(), list()
            list_of_idx_cached, list_of_idx_non_cached = list(), list()

            nr = PhotonDataHelper.find_n(X)
            for start, stop in PhotonDataHelper.chunker(nr, 1):
                # split data in single entities, find key from first element = PATH to file
                X_key, _, _ = PhotonDataHelper.split_data(
                    initial_X, None, {}, start, stop)
                X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                    X, y, kwargs, start, stop)
                self.cache_man.update_single_subject_state_info(X_key)

                # check if item has been processed
                if self.cache_man.check_cache(name):
                    list_of_idx_cached.append(start)
                else:
                    list_of_idx_non_cached.append(start)
                    X_uncached = PhotonDataHelper.stack_data_vertically(
                        X_uncached, X_batched)
                    y_uncached = PhotonDataHelper.stack_data_vertically(
                        y_uncached, y_batched)
                    initial_X_uncached = PhotonDataHelper.stack_data_vertically(
                        initial_X_uncached, X_key)
                    kwargs_uncached = PhotonDataHelper.join_dictionaries(
                        kwargs_uncached, kwargs_dict_batched)

            # now we know which part can be loaded and which part should be transformed
            # first apply the transformation to the group, then save it single-subject-wise
            if len(list_of_idx_non_cached) > 0:

                # apply transformation groupwise
                new_group_X, new_group_y, new_group_kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X_uncached, y_uncached,
                    **kwargs_uncached)

                # then save it single
                nr = PhotonDataHelper.find_n(new_group_X)
                for start in range(nr):
                    # split data in single entities
                    X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                        new_group_X, new_group_y, new_group_kwargs, start,
                        start)
                    X_key, _, _ = PhotonDataHelper.split_data(
                        initial_X_uncached, None, {}, start, start)
                    # we save the data in relation to the input path (X_key = hash(input X))
                    self.cache_man.update_single_subject_state_info(X_key)

                    start_time_saving = datetime.datetime.now()
                    self.cache_man.save_data_to_cache(
                        name, (X_batched, y_batched, kwargs_dict_batched))
                    saving_duration = (datetime.datetime.now() -
                                       start_time_saving).total_seconds()
                    self.time_monitor['transform_cached'].append(
                        (name, saving_duration, 1))

                # we need to collect the data only when we want to load them
                # we can skip that process if we only want them to get into the cache (case: parallelisation)
                if not self.skip_loading or needed_for_further_computation:
                    # stack results
                    processed_X, processed_y, processed_kwargs = new_group_X, new_group_y, new_group_kwargs

            # afterwards load everything that has been cached
            if len(list_of_idx_cached) > 0:
                if not self.skip_loading or needed_for_further_computation:
                    for cache_idx in list_of_idx_cached:
                        # we identify the data according to the input path (X before any transformation)
                        self.cache_man.update_single_subject_state_info(
                            [initial_X[cache_idx]])

                        # time the loading of the cached item
                        start_time_for_loading = datetime.datetime.now()
                        transformed_X, transformed_y, transformed_kwargs = self.cache_man.load_cached_data(
                            name)
                        loading_duration = (
                            datetime.datetime.now() -
                            start_time_for_loading).total_seconds()
                        self.time_monitor['transform_cached'].append(
                            (name, loading_duration,
                             PhotonDataHelper.find_n(X)))

                        processed_X, processed_y, processed_kwargs = PhotonDataHelper.join_data(
                            processed_X, transformed_X, processed_y,
                            transformed_y, processed_kwargs,
                            transformed_kwargs)

            logger.debug(name + " loaded " + str(len(list_of_idx_cached)) +
                         " items from cache and computed " +
                         str(len(list_of_idx_non_cached)))
            if not self.skip_loading or needed_for_further_computation:
                # now sort the data in the correct order again
                processed_X, processed_y, processed_kwargs = PhotonDataHelper.resort_splitted_data(
                    processed_X, processed_y, processed_kwargs,
                    PhotonDataHelper.stack_data_vertically(
                        list_of_idx_cached, list_of_idx_non_cached))

            return processed_X, processed_y, processed_kwargs