Ejemplo n.º 1
0
        def extract_mtf_by_group():
            all_mtf_names = []
            all_mtf_vals = []

            for cur_group in mtf_groups:
                cur_precomp_group = cur_group if precompute else None

                mfe = MFE(
                    groups=cur_group, summary="mean", random_state=1234
                ).fit(
                    X.values,
                    y.values if supervised else None,
                    precomp_groups=cur_precomp_group,
                )

                cur_names, cur_vals = mfe.extract()

                all_mtf_names += cur_names
                all_mtf_vals += cur_vals

            _, all_mtf_vals = zip(
                *sorted(
                    zip(all_mtf_names, all_mtf_vals), key=lambda item: item[0]
                )
            )

            return all_mtf_vals
Ejemplo n.º 2
0
 def test_extract_with_time_output_dictionary(self):
     X, y = load_xy(2)
     extractor = MFE(groups="general",
                     measure_time="total").fit(X.values, y.values)
     res = extractor.extract(out_type=dict)
     assert isinstance(res, dict)
     assert len(res) == 3
Ejemplo n.º 3
0
def extract_from_object(dataset: Union[np.ndarray, list], mfe_params: dict = None) -> Sequence:
    if mfe_params is None or len(mfe_params) == 0:
        mfe_params = __default_mfe_params

    mfe = MFE(**mfe_params)
    mfe.fit(dataset, suppress_warnings=True)
    return mfe.extract(suppress_warnings=True)[1]
Ejemplo n.º 4
0
 def test_extract_with_confidence_output_dictionary_unsupervised(self):
     X, _ = load_xy(2)
     extractor = MFE(groups="general").fit(X.values)
     res = extractor.extract_with_confidence(
         3, arguments_extract=dict(out_type=dict))
     assert isinstance(res, dict)
     assert len(res) == 3
Ejemplo n.º 5
0
    def test_none_cancor(self):
        X, y = load_xy(0)

        feats = [
            "w_lambda",
            "p_trace",
            "lh_trace",
            "roy_root",
        ]

        mfe = MFE(groups=[GNAME], features=feats)

        custom_args = {
            "can_cors": np.array([]),
            "can_cor_eigvals": np.array([]),
        }

        mfe.fit(X.values, y.values, precomp_groups=None)

        extract_args = {cur_feat: custom_args for cur_feat in feats}
        vals = mfe.extract(**extract_args, suppress_warnings=True)[1]

        assert np.allclose(vals,
                           np.full(shape=len(vals), fill_value=np.nan),
                           equal_nan=True)
Ejemplo n.º 6
0
 def test_extract_with_time_output_pandas_dataframe_unsupervised(self):
     X, _ = load_xy(2)
     extractor = MFE(measure_time="total", groups="general").fit(X.values)
     expected_mtfs = extractor.extract_metafeature_names()
     res = extractor.extract(out_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     assert res.values.shape == (2, len(expected_mtfs)) and np.array_equal(
         res.columns, expected_mtfs)
Ejemplo n.º 7
0
    def test_one_hot_encoding_02(self):
        X, y = utils.load_xy(1)
        mfe = MFE()
        mfe.fit(X.values, y.values, transform_cat="one-hot-full")

        exp_value = np.sum([np.unique(attr).size for attr in X.values.T])

        assert mfe._custom_args_ft["N"].shape[1] == exp_value
Ejemplo n.º 8
0
 def test_extract_output_pandas_dataframe(self):
     X, y = load_xy(2)
     extractor = MFE(groups="general").fit(X.values, y.values)
     expected_mtfs = extractor.extract_metafeature_names()
     res = extractor.extract(out_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     assert res.values.shape == (1, len(expected_mtfs)) and np.array_equal(
         res.columns, expected_mtfs)
Ejemplo n.º 9
0
    def test_one_hot_encoding_03(self):
        X, y = utils.load_xy(2)
        mfe = MFE()
        mfe.fit(X.values, y.values, transform_cat="one-hot")

        exp_value = X.values.shape[1]

        assert mfe._custom_args_ft["N"].shape[1] == exp_value
Ejemplo n.º 10
0
    def test_silhouette_subsampling(self, precompute):
        X, y = load_xy(0)
        precomp_group = GNAME if precompute else None
        mfe = MFE(groups="clustering", features="sil", random_state=1234).fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract(sil={"sample_frac": 0.5})[1]

        assert np.allclose(value, -0.07137712254830314)
Ejemplo n.º 11
0
    def test_threshold_attr_conc(self):
        X, y = load_xy(1)
        mfe = MFE(features="attr_conc", random_state=1234).fit(
            X.values, y.values, precomp_groups=False
        )

        value = mfe.extract(attr_conc={"max_attr_num": 25})[1]

        assert np.allclose(value, [0.01682327, 0.04715381], rtol=0.2)
Ejemplo n.º 12
0
 def _get_feats(cls):
     from sklearn.datasets import load_iris
     from pymfe.mfe import MFE
     data = load_iris()
     mfe = MFE()
     mfe.fit(data.data, data.target)
     ft = mfe.extract()
     _feats = [feature.replace(".", "_") for feature in ft[0]]
     return _feats
Ejemplo n.º 13
0
    def test_one_hot_encoding_04(self):
        X, y = utils.load_xy(2)
        mfe = MFE()

        X = np.hstack((X.values, np.ones((y.size, 1), dtype=str)))
        y = y.values

        with pytest.raises(ValueError):
            mfe.fit(X=X, y=y, transform_cat="one-hot")
Ejemplo n.º 14
0
    def test_integration_general(self, dt_id, exp_value, precompute):
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
Ejemplo n.º 15
0
    def test_integration_infotheo(self, dt_id, exp_value, precompute):
        """Function to test all info-theory meta-features."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
Ejemplo n.º 16
0
 def transform(self, X, y):
     if isinstance(X, pd.DataFrame):
         X = X.to_numpy(dtype='int8')
     if isinstance(y, pd.Series):
         y = y.to_numpy(dtype='int32')
     mfe = MFE(groups=["general"],
               summary=['kurtosis', 'min', 'max', 'median', 'skewness'])
     mfe.fit(X, y)
     ft = mfe.extract()[1]
     return np.nan_to_num(np.array(ft), 0)
Ejemplo n.º 17
0
    def test_extract_metafeature_names_unsupervised_01(self, groups, summary):
        """Test .extract_metafeature_names method."""
        X, _ = utils.load_xy(0)

        mfe = MFE(groups=groups, summary=summary)

        mtf_names_1 = mfe.extract_metafeature_names(supervised=False)
        mtf_names_2 = mfe.fit(X.values).extract(suppress_warnings=True)[0]

        assert mtf_names_1 == tuple(mtf_names_2)
Ejemplo n.º 18
0
    def test_integration_clustering(self, dt_id, exp_value, precompute):
        """Function to test each all clustering meta-features."""

        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
Ejemplo n.º 19
0
    def test_gray_encoding_missing_value(self):
        X, y = utils.load_xy(1)
        mfe = MFE()

        X = np.copy(X.values)
        y = y.values

        X[5, 0] = np.nan

        with pytest.raises(ValueError):
            mfe.fit(X, y, transform_cat="gray")
Ejemplo n.º 20
0
        def extract_all_mtf():
            mfe = MFE(
                groups=mtf_groups, summary="mean", random_state=1234
            ).fit(
                X.values,
                y.values if supervised else None,
                precomp_groups=precomp_group,
            )

            all_mtf_vals = mfe.extract()[1]

            return all_mtf_vals
Ejemplo n.º 21
0
    def test_integration_complexity(self, dt_id, exp_value, precompute):
        """Function to test each meta-feature belongs to complexity group."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean", random_state=1234)

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True, rtol=0.025)
Ejemplo n.º 22
0
    def test_verbose(self, capsys):
        X, y = load_xy(0)
        model = MFE(
            features=["freq_class", "mean", "class_conc", "one_nn", "nodes"
                      ]).fit(X=X.values, y=y.values)
        model.extract(verbose=True)
        captured = capsys.readouterr().out

        # Expected number of messages in verbose mode of mtf extraction
        expected_msg_num = 21

        assert captured.count("\n") == expected_msg_num
Ejemplo n.º 23
0
    def test_integration_model_based(self, dt_id, exp_value, precompute):
        """Function to test all model-based meta-features."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean", random_state=1234)

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
Ejemplo n.º 24
0
    def test_error__set_data_numeric(self):
        with pytest.raises(TypeError):
            mfe = MFE()
            mfe._set_data_numeric(True)

        with pytest.raises(TypeError):
            mfe = MFE()
            mfe.X = np.array([])
            mfe._set_data_numeric(True)
Ejemplo n.º 25
0
    def test_extract_with_confidence_random_state1(self):
        X, y = utils.load_xy(2)

        _, mtf_vals_1, mtf_conf_int_1 = (MFE(
            features=["mean", "sd"], random_state=16).fit(
                X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        _, mtf_vals_2, mtf_conf_int_2 = (MFE(
            features=["mean", "sd"], random_state=16).fit(
                X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        assert np.allclose(mtf_vals_1, mtf_vals_2) and np.allclose(
            mtf_conf_int_1, mtf_conf_int_2)
Ejemplo n.º 26
0
    def test_extract_with_confidence_random_state3(self):
        X, y = utils.load_xy(2)

        np.random.seed(1234)
        _, mtf_vals_1, mtf_conf_int_1 = (MFE(features=["mean", "sd"]).fit(
            X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        np.random.seed(1234)
        _, mtf_vals_2, mtf_conf_int_2 = (MFE(features=["mean", "sd"]).fit(
            X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        assert np.allclose(
            mtf_vals_1,
            mtf_vals_2) and np.any(~np.isclose(mtf_conf_int_1, mtf_conf_int_2))
Ejemplo n.º 27
0
    def test_ft_methods_general(self, dt_id, ft_name, exp_value, precompute):
        """Function to test each meta-feature belongs to general group."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], features=[ft_name]).fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        if exp_value is np.nan:
            assert value[0] is exp_value

        else:
            assert np.allclose(value, exp_value)
Ejemplo n.º 28
0
    def test_roy_largest_root(self, dt_id, exp_value, precompute, criterion):
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME],
                  features="roy_root").fit(X.values,
                                           y.values,
                                           precomp_groups=precomp_group)
        value = mfe.extract(roy_root={"criterion": criterion})[1]

        assert np.allclose(value,
                           exp_value,
                           atol=0.001,
                           rtol=0.05,
                           equal_nan=True)
Ejemplo n.º 29
0
    def test_integration_statistical(self, dt_id, exp_value, precompute):
        """Function to test all statistical meta-features simultaneously."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME],
                  summary="mean").fit(X.values,
                                      y.values,
                                      precomp_groups=precomp_group)
        value = mfe.extract()[1]

        assert np.allclose(value,
                           exp_value,
                           atol=0.001,
                           rtol=0.05,
                           equal_nan=True)
Ejemplo n.º 30
0
    def test_extract_metafeature_names_unsupervised_02(self, groups, summary):
        """Test .extract_metafeature_names method."""
        X, _ = utils.load_xy(0)

        mfe = MFE(groups=groups, summary=summary)

        mtf_names_1 = mfe.fit(X.values).extract(suppress_warnings=True)[0]
        # Note: by default, .extract_metafeature_names should check wether
        # 'y' was fitted or not if .fit was called before. Therefore, here,
        # supervised=True is expected to be ignored and behave like
        # supervised=False.
        mtf_names_2 = mfe.extract_metafeature_names(supervised=True)
        mtf_names_3 = mfe.extract_metafeature_names(supervised=False)

        assert tuple(mtf_names_1) == mtf_names_2 == mtf_names_3