Ejemplo n.º 1
0
    def test_postprocessing_valid(self):
        """Test valid postprocessing and its automatic detection."""
        results = [], [], []

        _internal.post_processing(results=results,
                                  groups=tuple(),
                                  custom_class_=MFETestClass)

        assert all(map(lambda l: len(l) > 0, results))
Ejemplo n.º 2
0
    def test_postprocessing_invalid_2(self):
        """Test incorrect return value in postprocessing methods."""
        results = [], [], []

        with pytest.warns(UserWarning):
            _internal.post_processing(results=results,
                                      groups=tuple(),
                                      custom_class_=MFETestClass,
                                      number_of_lists=2)
Ejemplo n.º 3
0
    def test_postprocessing_invalid_1(self):
        """Test exception handling in invalid postprocessing."""
        results = [], [], []

        with pytest.warns(UserWarning):
            _internal.post_processing(results=results,
                                      groups=tuple(),
                                      custom_class_=MFETestClass,
                                      raise_exception=True)
Ejemplo n.º 4
0
    def test_mem_err_postprocess(self):
        """Test memory error in postprocessing methods."""
        results = [], [], []

        with pytest.warns(UserWarning):
            _internal.post_processing(
                results=results,
                groups=tuple(),
                custom_class_=MFETestClass,
                raise_mem_err=True,
            )
Ejemplo n.º 5
0
    def extract(
            self,
            remove_nan: bool = True,
            verbose: bool = False,
            enable_parallel: bool = False,
            suppress_warnings: bool = False,
            **kwargs) -> t.Tuple[t.Sequence, ...]:
        """Extracts metafeatures from the previously fitted dataset.

        Parameters
        ----------
        remove_nan : :obj:`bool`, optional
            If True, remove any non-numeric values features before summarizing
            values from all feature extraction methods. Note that the summary
            methods may still remove non-numeric values by itself. In this
            case, the user must modify this behavior using built-in summary
            method arguments via kwargs, if possible.

        verbose : :obj:`bool`, optional
            If True, print messages related to the metafeature extraction
            process. Note that warning messages are not affected by this option
            (see ``suppress_warnings`` argument below).

        enable_parallel : :obj:`bool`, optional
            If True, then the meta-feature extraction is done with
            multi-processes. Currently, this argument has no effect by now
            (to be implemented).

        by_class : :obj:`bool`, optional
            Not implemented yet.

        suppress_warnings : :obj:`bool`, optional
            If True, do not show warnings about unknown user custom parameters
            for feature extraction and summary methods passed via kwargs. Note
            that both feature extraction and summary methods may still raise
            warnings by itself. In this case, just like the ``remove_nan``
            situation, the user must suppress them by built-in args from these
            methods via kwargs, if possible.

        kwargs:
            Used to pass custom arguments for both feature-extraction and
            summary methods. The expected format is the following:

            {``mtd_name``: {``arg_name``: arg_value, ...}, ...}

            In words, the key values of ``**kwargs`` should be the target
            methods which receives the custom arguments, and each method has
            another dictionary containing customs method argument names as keys
            and their correspondent values, as values. See ``Examples``
            subsection for a clearer explanation.

            For more information see Examples.

        Returns
        -------
        :obj:`tuple`(:obj:`list`, :obj:`list`)
            A tuple containing two lists.

            The first field is the identifiers of each summarized value in the
            form ``feature_name.summary_mtd_name`` (i.e., the feature
            extraction name concatenated by the summary method name, separated
            by a dot).

            The second field is the summarized values.

            Both lists have a 1-1 correspondence by the index of each element
            (i.e., the value at index ``i`` in the second list has its
            identifier at the same index in the first list and vice-versa).

            Example:
                ([``attr_ent.mean``, ``attr_ent.sd``], [``0.983``, ``0.344``])
                is the return value for the feature ``attr_end`` summarized by
                both ``mean`` and ``sd`` (standard deviation), giving the valu-
                es ``0.983`` and ``0.344``, respectively.

        Raises
        ------
        TypeError
            If calling ``extract`` method before ``fit`` method.

        Examples
        --------
        Using kwargs. Option 1 to pass ft. extraction custom arguments:

        >>> args = {
        >>> 'sd': {'ddof': 2},
        >>> '1NN': {'metric': 'minkowski', 'p': 2},
        >>> 'leaves': {'max_depth': 4},
        >>> }

        >>> model = MFE().fit(X=data, y=labels)
        >>> result = model.extract(**args)

        Option 2 (note: metafeatures with name starting with numbers are not
        allowed!):

        >>> model = MFE().fit(X=data, y=labels)
        >>> res = extract(sd={'ddof': 2}, leaves={'max_depth': 4})

        """
        if self.X is None or self.y is None:
            raise TypeError("Fitted data not found. Call "
                            '"fit" method before "extract".')

        if (not isinstance(self.X, np.ndarray)
                or not isinstance(self.y, np.ndarray)):
            self.X, self.y = _internal.check_data(self.X, self.y)

        if verbose:
            print("Started the metafeature extraction process.")

        results = self._call_feature_methods(
            remove_nan=remove_nan,
            verbose=verbose,
            enable_parallel=enable_parallel,
            suppress_warnings=suppress_warnings,
            **kwargs)

        _internal.post_processing(
            results=results,
            groups=self.groups,
            suppress_warnings=suppress_warnings,
            **self._postprocess_args_ft,
            **kwargs)

        if results and results[0]:
            # Sort results by metafeature name
            results = tuple(
                map(list, zip(*sorted(zip(*results),
                                      key=lambda item: item[0]))))

        res_names, res_vals, res_times = results

        if verbose:
            if self._timeopt_type_is_avg():
                time_type = "average"
            else:
                time_type = "total"

            print(
                "Metafeature extraction process done.",
                "Total of {0} values obtained. Time elapsed "
                "({1}) = {2:.8f} seconds.".format(
                    len(res_vals), time_type, sum(res_times)),
                sep="\n")

        if self.timeopt:
            return res_names, res_vals, res_times

        return res_names, res_vals