예제 #1
0
 def test_extract_with_confidence_output_dictionary_unsupervised(self):
     X, _ = load_xy(2)
     extractor = MFE(groups="general").fit(X.values)
     res = extractor.extract_with_confidence(
         3, arguments_extract=dict(out_type=dict))
     assert isinstance(res, dict)
     assert len(res) == 3
예제 #2
0
    def test_none_cancor(self):
        X, y = load_xy(0)

        feats = [
            "w_lambda",
            "p_trace",
            "lh_trace",
            "roy_root",
        ]

        mfe = MFE(groups=[GNAME], features=feats)

        custom_args = {
            "can_cors": np.array([]),
            "can_cor_eigvals": np.array([]),
        }

        mfe.fit(X.values, y.values, precomp_groups=None)

        extract_args = {cur_feat: custom_args for cur_feat in feats}
        vals = mfe.extract(**extract_args, suppress_warnings=True)[1]

        assert np.allclose(vals,
                           np.full(shape=len(vals), fill_value=np.nan),
                           equal_nan=True)
예제 #3
0
    def test_output_lengths_2(self, dt_id, measure_time):
        X, y = load_xy(dt_id)
        res = (MFE(measure_time=measure_time).fit(X=X.values,
                                                  y=y.values).extract())
        vals, names, time = res

        assert len(vals) == len(names) == len(time)
예제 #4
0
    def test_verbosity_3(self, verbosity, msg_expected, capsys):
        X, y = load_xy(0)

        MFE().fit(X=X.values, y=y.values).extract(verbose=verbosity)

        captured = capsys.readouterr().out
        assert (not msg_expected) or captured
예제 #5
0
    def test_error_transform_num(self):
        X, y = load_xy(0)
        with pytest.raises(TypeError):
            _internal.transform_num(X, num_bins='')

        with pytest.raises(ValueError):
            _internal.transform_num(X, num_bins=-1)
예제 #6
0
    def test_ft_methods_model_based_02(self, dt_id, ft_name, exp_value,
                                       precompute):
        """Function to test each meta-feature belongs to model-based group."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(
            groups=[GNAME],
            features=[ft_name],
            hypparam_model_dt={
                "max_depth": 5,
                "min_samples_split": 10,
                "criterion": "entropy",
            },
            random_state=1234,
        )

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        if precomp_group is None:
            # Note: the precomputation of 'model-based' group is always
            # forced due to the need of the 'dt_model' value
            mfe._precomp_args_ft = {
                "dt_model": mfe._precomp_args_ft.get("dt_model")
            }

        value = mfe.extract()[1]

        if exp_value is np.nan:
            assert value[0] is exp_value

        else:
            assert np.allclose(value, exp_value)
예제 #7
0
 def test_extract_with_time_output_dictionary(self):
     X, y = load_xy(2)
     extractor = MFE(groups="general",
                     measure_time="total").fit(X.values, y.values)
     res = extractor.extract(out_type=dict)
     assert isinstance(res, dict)
     assert len(res) == 3
예제 #8
0
 def test_scaling_error_1(self):
     with pytest.raises(ValueError):
         X, y = load_xy(0)
         MFE().fit(X=X.values,
                   y=y.values,
                   rescale="invalid",
                   transform_cat=False)
예제 #9
0
 def test_extract_output_pandas_dataframe(self):
     X, y = load_xy(2)
     extractor = MFE(groups="general").fit(X.values, y.values)
     expected_mtfs = extractor.extract_metafeature_names()
     res = extractor.extract(out_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     assert res.values.shape == (1, len(expected_mtfs)) and np.array_equal(
         res.columns, expected_mtfs)
예제 #10
0
 def test_extract_with_time_output_pandas_dataframe_unsupervised(self):
     X, _ = load_xy(2)
     extractor = MFE(measure_time="total", groups="general").fit(X.values)
     expected_mtfs = extractor.extract_metafeature_names()
     res = extractor.extract(out_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     assert res.values.shape == (2, len(expected_mtfs)) and np.array_equal(
         res.columns, expected_mtfs)
예제 #11
0
    def test_verbosity_2(self, capsys):
        X, y = load_xy(0)

        MFE().fit(X=X.values, y=y.values).extract(verbose=0)

        captured = capsys.readouterr().out

        assert not captured
예제 #12
0
    def test_integration_general(self, dt_id, exp_value, precompute):
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
예제 #13
0
    def test_threshold_attr_conc(self):
        X, y = load_xy(1)
        mfe = MFE(features="attr_conc", random_state=1234).fit(
            X.values, y.values, precomp_groups=False
        )

        value = mfe.extract(attr_conc={"max_attr_num": 25})[1]

        assert np.allclose(value, [0.01682327, 0.04715381], rtol=0.2)
예제 #14
0
    def test_verbosity_with_confidence(self, verbosity, msg_expected, capsys):
        X, y = load_xy(2)

        MFE().fit(X.values,
                  y.values).extract_with_confidence(verbose=verbosity)

        captured = capsys.readouterr().out
        assert ((not msg_expected) and
                (not captured)) or (msg_expected and captured)
예제 #15
0
    def test_silhouette_subsampling(self, precompute):
        X, y = load_xy(0)
        precomp_group = GNAME if precompute else None
        mfe = MFE(groups="clustering", features="sil", random_state=1234).fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract(sil={"sample_frac": 0.5})[1]

        assert np.allclose(value, -0.07137712254830314)
예제 #16
0
    def test_integration_infotheo(self, dt_id, exp_value, precompute):
        """Function to test all info-theory meta-features."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
예제 #17
0
    def test_verbosity_from_model(self, verbosity, msg_expected, capsys):
        X, y = load_xy(2)

        model = sklearn.tree.DecisionTreeClassifier().fit(X.values, y.values)

        MFE().extract_from_model(model, verbose=verbosity)

        captured = capsys.readouterr().out
        assert ((not msg_expected) and
                (not captured)) or (msg_expected and captured)
예제 #18
0
    def test_integration_clustering(self, dt_id, exp_value, precompute):
        """Function to test each all clustering meta-features."""

        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
예제 #19
0
    def test_output_lengths_2(self, dt_id, scaler, exp_mean, exp_var, exp_min,
                              exp_max):
        X, y = load_xy(dt_id)
        model = MFE().fit(
            X=X.values, y=y.values, rescale=scaler, transform_cat=False)

        numeric_data = model._custom_args_ft["N"]

        assert (np.allclose(numeric_data.mean(axis=0), exp_mean)
                and np.allclose(numeric_data.var(axis=0), exp_var)
                and np.allclose(numeric_data.min(axis=0), exp_min)
                and np.allclose(numeric_data.max(axis=0), exp_max))
예제 #20
0
    def test_integration_model_based(self, dt_id, exp_value, precompute):
        """Function to test all model-based meta-features."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean", random_state=1234)

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
예제 #21
0
    def test_integration_complexity(self, dt_id, exp_value, precompute):
        """Function to test each meta-feature belongs to complexity group."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean", random_state=1234)

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True, rtol=0.025)
예제 #22
0
    def test_verbose(self, capsys):
        X, y = load_xy(0)
        model = MFE(
            features=["freq_class", "mean", "class_conc", "one_nn", "nodes"
                      ]).fit(X=X.values, y=y.values)
        model.extract(verbose=True)
        captured = capsys.readouterr().out

        # Expected number of messages in verbose mode of mtf extraction
        expected_msg_num = 21

        assert captured.count("\n") == expected_msg_num
예제 #23
0
    def test_ft_methods_general(self, dt_id, ft_name, exp_value, precompute):
        """Function to test each meta-feature belongs to general group."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], features=[ft_name]).fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        if exp_value is np.nan:
            assert value[0] is exp_value

        else:
            assert np.allclose(value, exp_value)
예제 #24
0
    def test_roy_largest_root(self, dt_id, exp_value, precompute, criterion):
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME],
                  features="roy_root").fit(X.values,
                                           y.values,
                                           precomp_groups=precomp_group)
        value = mfe.extract(roy_root={"criterion": criterion})[1]

        assert np.allclose(value,
                           exp_value,
                           atol=0.001,
                           rtol=0.05,
                           equal_nan=True)
예제 #25
0
    def test_ft_methods_itemset(self, dt_id, ft_name, exp_value, precompute):
        """Function to test each meta-feature belongs to itemset group."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], features=[ft_name], random_state=1234)

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        value = mfe.extract()[1]

        if exp_value is np.nan:
            assert value[0] is exp_value
        else:
            assert np.allclose(value, exp_value, equal_nan=True)
예제 #26
0
    def test_integration_statistical(self, dt_id, exp_value, precompute):
        """Function to test all statistical meta-features simultaneously."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME],
                  summary="mean").fit(X.values,
                                      y.values,
                                      precomp_groups=precomp_group)
        value = mfe.extract()[1]

        assert np.allclose(value,
                           exp_value,
                           atol=0.001,
                           rtol=0.05,
                           equal_nan=True)
예제 #27
0
    def test_ft_method_relative(self, dt_id, summary, precompute, sample_size,
                                exp_value):
        """Test relative and subsampling relative landmarking."""
        precomp_group = "relative" if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(groups=["relative"],
                  summary=summary,
                  sample_size=sample_size,
                  random_state=1234)

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        _, vals = mfe.extract()

        assert np.allclose(vals, exp_value)
예제 #28
0
    def test_ft_methods_statistical(self, dt_id, ft_name, exp_value,
                                    precompute):
        """Function to test each meta-feature belongs to statistical group."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME],
                  features=[ft_name]).fit(X.values,
                                          y.values,
                                          precomp_groups=precomp_group)
        value = mfe.extract()[1]

        assert np.allclose(value,
                           exp_value,
                           atol=0.001,
                           rtol=0.05,
                           equal_nan=True)
예제 #29
0
    def test_normality_tests(self, dt_id, exp_value, precompute, test,
                             failure):
        """Test normality tests included in ``nr_norm`` statistical method."""
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME],
                  features="nr_norm").fit(X.values,
                                          y.values,
                                          precomp_groups=precomp_group)
        value = mfe.extract(nr_norm={"failure": failure, "method": test})[1]

        assert np.allclose(value,
                           exp_value,
                           atol=0.001,
                           rtol=0.05,
                           equal_nan=True)
예제 #30
0
    def test_parse_valid_metafeatures(self, groups):
        """Check the length of valid metafeatures per group."""
        X, y = load_xy(0)

        mfe = MFE(groups="all",
                  summary=None,
                  sample_size=0.5,
                  random_state=1234)

        mfe.fit(X.values, y.values)

        res = mfe.extract()

        target_mtf = mfe.valid_metafeatures(groups=groups)
        names, _ = mfe.parse_by_group(groups, res)

        assert not set(names).symmetric_difference(target_mtf)