Python PhotonDataHelper.join_dictionariesの例

プログラミング言語: Python

名前空間/パッケージ名: photonai.helper.helper

クラス/型: PhotonDataHelper

メソッド/関数: join_dictionaries

hotexamples.comのコード掲載数: 3

Python PhotonDataHelper.join_dictionaries - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのphotonai.helper.helper.PhotonDataHelper.join_dictionariesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

split_data(9)

find_n(5)

join_dictionaries(3)

stack_data_horizontally(3)

chunker(2)

index_dict(2)

join_data(2)

resort_splitted_data(2)

stack_data_vertically(2)

コード例 #1

ファイルを表示

    def test_concatenate_dict(self):
        dict_a = {
            "variable_one": np.random.randn(10),
            "variable_two": np.random.randn(15),
        }
        dict_b = {
            "variable_one": np.random.randn(20),
            "variable_two": np.random.randn(20),
        }
        dict_c = {
            "variable_one": np.random.randn(10, 10),
            "variable_two": np.random.randn(15, 15),
        }
        dict_d = {
            "variable_one": np.random.randn(20, 10),
            "variable_two": np.random.randn(20, 15),
        }
        dict_e = {}

        dict_a_b = PhotonDataHelper.join_dictionaries(dict_a, dict_b)
        dict_c_d = PhotonDataHelper.join_dictionaries(dict_c, dict_d)
        dict_e_a = PhotonDataHelper.join_dictionaries(dict_e, dict_a)
        self.assertEqual(len(dict_a_b["variable_one"]), 30)
        self.assertEqual(len(dict_a_b["variable_two"]), 35)
        self.assertEqual(dict_c_d["variable_one"].shape, (30, 10))
        self.assertEqual(dict_c_d["variable_two"].shape, (35, 15))
        self.assertEqual(len(dict_e_a["variable_one"]), 10)
        self.assertEqual(len(dict_e_a["variable_two"]), 15)

コード例 #2

ファイルを表示

    def transform(self, X, y=None, **kwargs):
        """
        Generates "new samples" by computing the mean between all or n_draws pairs of existing samples and appends them to X
        The target for each new sample is computed as the mean between the constituent targets
        :param X: data
        :param y: targets (optional)
        :param draw_limit: in case the full number of combinations is > 10k, how many to draw?
        :param rand_seed: sets seed for random sampling of combinations (for reproducibility only)
        :return: X_new: X and X_augmented; (y_new: the correspoding targets)
        """

        logger.debug("Pairing " + str(self.draw_limit) + " samples...")

        # ensure class balance in the training set if balance_classes is True
        unique_classes = np.unique(y)
        n_pairs = list()
        for label in unique_classes:
            if self.balance_classes:
                n_pairs.append(self.draw_limit - np.sum(y == label))
            else:
                n_pairs.append(self.draw_limit)

        # run get_samples for each class independently
        X_extended = list()
        y_extended = list()
        kwargs_extended = dict()

        for label, limit in zip(unique_classes, n_pairs):
            X_new_class, y_new_class, kwargs_new_class = self._return_samples(
                X[y == label],
                y[y == label],
                PhotonDataHelper.index_dict(kwargs, y == label),
                generator=self.generator,
                distance_metric=self.distance_metric,
                draw_limit=limit,
                rand_seed=self.random_state,
            )

            X_extended.extend(X_new_class)
            y_extended.extend(y_new_class)

            # get the corresponding kwargs
            if kwargs:
                kwargs_extended = PhotonDataHelper.join_dictionaries(
                    kwargs_extended, kwargs_new_class)

        return X_extended, y_extended, kwargs_extended

コード例 #3

ファイルを表示

    def load_or_save_cached_data(self,
                                 name,
                                 X,
                                 y,
                                 kwargs,
                                 transformer,
                                 fit=False,
                                 needed_for_further_computation=False,
                                 initial_X=None):
        if not self.single_subject_caching:
            # if we do it group-wise then its easy
            if self.skip_loading and not needed_for_further_computation:
                # check if data is already calculated
                if self.cache_man.check_cache(name):
                    # if so, do nothing
                    return X, y, kwargs
                else:
                    # otherwise, do the calculation and save it
                    cached_result = None
            else:
                start_time_for_loading = datetime.datetime.now()
                cached_result = self.cache_man.load_cached_data(name)

            if cached_result is None:
                X, y, kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X, y, **kwargs)

                start_time_saving = datetime.datetime.now()
                self.cache_man.save_data_to_cache(name, (X, y, kwargs))
                saving_duration = (datetime.datetime.now() -
                                   start_time_saving).total_seconds()
                self.time_monitor['transform_cached'].append(
                    (name, saving_duration, 1))
            else:
                X, y, kwargs = cached_result[0], cached_result[
                    1], cached_result[2]
                loading_duration = (datetime.datetime.now() -
                                    start_time_for_loading).total_seconds()
                n = PhotonDataHelper.find_n(X)
                self.time_monitor['transform_cached'].append(
                    (name, loading_duration, n))
            return X, y, kwargs
        else:
            # if we do it subject-wise we need to iterate and collect the results
            processed_X, processed_y, processed_kwargs = list(), list(), dict()
            X_uncached, y_uncached, kwargs_uncached, initial_X_uncached = list(
            ), list(), dict(), list()
            list_of_idx_cached, list_of_idx_non_cached = list(), list()

            nr = PhotonDataHelper.find_n(X)
            for start, stop in PhotonDataHelper.chunker(nr, 1):
                # split data in single entities, find key from first element = PATH to file
                X_key, _, _ = PhotonDataHelper.split_data(
                    initial_X, None, {}, start, stop)
                X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                    X, y, kwargs, start, stop)
                self.cache_man.update_single_subject_state_info(X_key)

                # check if item has been processed
                if self.cache_man.check_cache(name):
                    list_of_idx_cached.append(start)
                else:
                    list_of_idx_non_cached.append(start)
                    X_uncached = PhotonDataHelper.stack_data_vertically(
                        X_uncached, X_batched)
                    y_uncached = PhotonDataHelper.stack_data_vertically(
                        y_uncached, y_batched)
                    initial_X_uncached = PhotonDataHelper.stack_data_vertically(
                        initial_X_uncached, X_key)
                    kwargs_uncached = PhotonDataHelper.join_dictionaries(
                        kwargs_uncached, kwargs_dict_batched)

            # now we know which part can be loaded and which part should be transformed
            # first apply the transformation to the group, then save it single-subject-wise
            if len(list_of_idx_non_cached) > 0:

                # apply transformation groupwise
                new_group_X, new_group_y, new_group_kwargs = self._do_timed_fit_transform(
                    name, transformer, fit, X_uncached, y_uncached,
                    **kwargs_uncached)

                # then save it single
                nr = PhotonDataHelper.find_n(new_group_X)
                for start in range(nr):
                    # split data in single entities
                    X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(
                        new_group_X, new_group_y, new_group_kwargs, start,
                        start)
                    X_key, _, _ = PhotonDataHelper.split_data(
                        initial_X_uncached, None, {}, start, start)
                    # we save the data in relation to the input path (X_key = hash(input X))
                    self.cache_man.update_single_subject_state_info(X_key)

                    start_time_saving = datetime.datetime.now()
                    self.cache_man.save_data_to_cache(
                        name, (X_batched, y_batched, kwargs_dict_batched))
                    saving_duration = (datetime.datetime.now() -
                                       start_time_saving).total_seconds()
                    self.time_monitor['transform_cached'].append(
                        (name, saving_duration, 1))

                # we need to collect the data only when we want to load them
                # we can skip that process if we only want them to get into the cache (case: parallelisation)
                if not self.skip_loading or needed_for_further_computation:
                    # stack results
                    processed_X, processed_y, processed_kwargs = new_group_X, new_group_y, new_group_kwargs

            # afterwards load everything that has been cached
            if len(list_of_idx_cached) > 0:
                if not self.skip_loading or needed_for_further_computation:
                    for cache_idx in list_of_idx_cached:
                        # we identify the data according to the input path (X before any transformation)
                        self.cache_man.update_single_subject_state_info(
                            [initial_X[cache_idx]])

                        # time the loading of the cached item
                        start_time_for_loading = datetime.datetime.now()
                        transformed_X, transformed_y, transformed_kwargs = self.cache_man.load_cached_data(
                            name)
                        loading_duration = (
                            datetime.datetime.now() -
                            start_time_for_loading).total_seconds()
                        self.time_monitor['transform_cached'].append(
                            (name, loading_duration,
                             PhotonDataHelper.find_n(X)))

                        processed_X, processed_y, processed_kwargs = PhotonDataHelper.join_data(
                            processed_X, transformed_X, processed_y,
                            transformed_y, processed_kwargs,
                            transformed_kwargs)

            logger.debug(name + " loaded " + str(len(list_of_idx_cached)) +
                         " items from cache and computed " +
                         str(len(list_of_idx_non_cached)))
            if not self.skip_loading or needed_for_further_computation:
                # now sort the data in the correct order again
                processed_X, processed_y, processed_kwargs = PhotonDataHelper.resort_splitted_data(
                    processed_X, processed_y, processed_kwargs,
                    PhotonDataHelper.stack_data_vertically(
                        list_of_idx_cached, list_of_idx_non_cached))

            return processed_X, processed_y, processed_kwargs