def test_extract_with_confidence_output_dictionary_unsupervised(self): X, _ = load_xy(2) extractor = MFE(groups="general").fit(X.values) res = extractor.extract_with_confidence( 3, arguments_extract=dict(out_type=dict)) assert isinstance(res, dict) assert len(res) == 3
def test_none_cancor(self): X, y = load_xy(0) feats = [ "w_lambda", "p_trace", "lh_trace", "roy_root", ] mfe = MFE(groups=[GNAME], features=feats) custom_args = { "can_cors": np.array([]), "can_cor_eigvals": np.array([]), } mfe.fit(X.values, y.values, precomp_groups=None) extract_args = {cur_feat: custom_args for cur_feat in feats} vals = mfe.extract(**extract_args, suppress_warnings=True)[1] assert np.allclose(vals, np.full(shape=len(vals), fill_value=np.nan), equal_nan=True)
def test_output_lengths_2(self, dt_id, measure_time): X, y = load_xy(dt_id) res = (MFE(measure_time=measure_time).fit(X=X.values, y=y.values).extract()) vals, names, time = res assert len(vals) == len(names) == len(time)
def test_verbosity_3(self, verbosity, msg_expected, capsys): X, y = load_xy(0) MFE().fit(X=X.values, y=y.values).extract(verbose=verbosity) captured = capsys.readouterr().out assert (not msg_expected) or captured
def test_error_transform_num(self): X, y = load_xy(0) with pytest.raises(TypeError): _internal.transform_num(X, num_bins='') with pytest.raises(ValueError): _internal.transform_num(X, num_bins=-1)
def test_ft_methods_model_based_02(self, dt_id, ft_name, exp_value, precompute): """Function to test each meta-feature belongs to model-based group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE( groups=[GNAME], features=[ft_name], hypparam_model_dt={ "max_depth": 5, "min_samples_split": 10, "criterion": "entropy", }, random_state=1234, ) mfe.fit(X.values, y.values, precomp_groups=precomp_group) if precomp_group is None: # Note: the precomputation of 'model-based' group is always # forced due to the need of the 'dt_model' value mfe._precomp_args_ft = { "dt_model": mfe._precomp_args_ft.get("dt_model") } value = mfe.extract()[1] if exp_value is np.nan: assert value[0] is exp_value else: assert np.allclose(value, exp_value)
def test_extract_with_time_output_dictionary(self): X, y = load_xy(2) extractor = MFE(groups="general", measure_time="total").fit(X.values, y.values) res = extractor.extract(out_type=dict) assert isinstance(res, dict) assert len(res) == 3
def test_scaling_error_1(self): with pytest.raises(ValueError): X, y = load_xy(0) MFE().fit(X=X.values, y=y.values, rescale="invalid", transform_cat=False)
def test_extract_output_pandas_dataframe(self): X, y = load_xy(2) extractor = MFE(groups="general").fit(X.values, y.values) expected_mtfs = extractor.extract_metafeature_names() res = extractor.extract(out_type=pd.DataFrame) assert isinstance(res, pd.DataFrame) assert res.values.shape == (1, len(expected_mtfs)) and np.array_equal( res.columns, expected_mtfs)
def test_extract_with_time_output_pandas_dataframe_unsupervised(self): X, _ = load_xy(2) extractor = MFE(measure_time="total", groups="general").fit(X.values) expected_mtfs = extractor.extract_metafeature_names() res = extractor.extract(out_type=pd.DataFrame) assert isinstance(res, pd.DataFrame) assert res.values.shape == (2, len(expected_mtfs)) and np.array_equal( res.columns, expected_mtfs)
def test_verbosity_2(self, capsys): X, y = load_xy(0) MFE().fit(X=X.values, y=y.values).extract(verbose=0) captured = capsys.readouterr().out assert not captured
def test_integration_general(self, dt_id, exp_value, precompute): precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True)
def test_threshold_attr_conc(self): X, y = load_xy(1) mfe = MFE(features="attr_conc", random_state=1234).fit( X.values, y.values, precomp_groups=False ) value = mfe.extract(attr_conc={"max_attr_num": 25})[1] assert np.allclose(value, [0.01682327, 0.04715381], rtol=0.2)
def test_verbosity_with_confidence(self, verbosity, msg_expected, capsys): X, y = load_xy(2) MFE().fit(X.values, y.values).extract_with_confidence(verbose=verbosity) captured = capsys.readouterr().out assert ((not msg_expected) and (not captured)) or (msg_expected and captured)
def test_silhouette_subsampling(self, precompute): X, y = load_xy(0) precomp_group = GNAME if precompute else None mfe = MFE(groups="clustering", features="sil", random_state=1234).fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract(sil={"sample_frac": 0.5})[1] assert np.allclose(value, -0.07137712254830314)
def test_integration_infotheo(self, dt_id, exp_value, precompute): """Function to test all info-theory meta-features.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_verbosity_from_model(self, verbosity, msg_expected, capsys): X, y = load_xy(2) model = sklearn.tree.DecisionTreeClassifier().fit(X.values, y.values) MFE().extract_from_model(model, verbose=verbosity) captured = capsys.readouterr().out assert ((not msg_expected) and (not captured)) or (msg_expected and captured)
def test_integration_clustering(self, dt_id, exp_value, precompute): """Function to test each all clustering meta-features.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True)
def test_output_lengths_2(self, dt_id, scaler, exp_mean, exp_var, exp_min, exp_max): X, y = load_xy(dt_id) model = MFE().fit( X=X.values, y=y.values, rescale=scaler, transform_cat=False) numeric_data = model._custom_args_ft["N"] assert (np.allclose(numeric_data.mean(axis=0), exp_mean) and np.allclose(numeric_data.var(axis=0), exp_var) and np.allclose(numeric_data.min(axis=0), exp_min) and np.allclose(numeric_data.max(axis=0), exp_max))
def test_integration_model_based(self, dt_id, exp_value, precompute): """Function to test all model-based meta-features.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean", random_state=1234) mfe.fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True)
def test_integration_complexity(self, dt_id, exp_value, precompute): """Function to test each meta-feature belongs to complexity group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean", random_state=1234) mfe.fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True, rtol=0.025)
def test_verbose(self, capsys): X, y = load_xy(0) model = MFE( features=["freq_class", "mean", "class_conc", "one_nn", "nodes" ]).fit(X=X.values, y=y.values) model.extract(verbose=True) captured = capsys.readouterr().out # Expected number of messages in verbose mode of mtf extraction expected_msg_num = 21 assert captured.count("\n") == expected_msg_num
def test_ft_methods_general(self, dt_id, ft_name, exp_value, precompute): """Function to test each meta-feature belongs to general group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features=[ft_name]).fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] if exp_value is np.nan: assert value[0] is exp_value else: assert np.allclose(value, exp_value)
def test_roy_largest_root(self, dt_id, exp_value, precompute, criterion): precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features="roy_root").fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract(roy_root={"criterion": criterion})[1] assert np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_ft_methods_itemset(self, dt_id, ft_name, exp_value, precompute): """Function to test each meta-feature belongs to itemset group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features=[ft_name], random_state=1234) mfe.fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] if exp_value is np.nan: assert value[0] is exp_value else: assert np.allclose(value, exp_value, equal_nan=True)
def test_integration_statistical(self, dt_id, exp_value, precompute): """Function to test all statistical meta-features simultaneously.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_ft_method_relative(self, dt_id, summary, precompute, sample_size, exp_value): """Test relative and subsampling relative landmarking.""" precomp_group = "relative" if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=["relative"], summary=summary, sample_size=sample_size, random_state=1234) mfe.fit(X.values, y.values, precomp_groups=precomp_group) _, vals = mfe.extract() assert np.allclose(vals, exp_value)
def test_ft_methods_statistical(self, dt_id, ft_name, exp_value, precompute): """Function to test each meta-feature belongs to statistical group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features=[ft_name]).fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_normality_tests(self, dt_id, exp_value, precompute, test, failure): """Test normality tests included in ``nr_norm`` statistical method.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features="nr_norm").fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract(nr_norm={"failure": failure, "method": test})[1] assert np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_parse_valid_metafeatures(self, groups): """Check the length of valid metafeatures per group.""" X, y = load_xy(0) mfe = MFE(groups="all", summary=None, sample_size=0.5, random_state=1234) mfe.fit(X.values, y.values) res = mfe.extract() target_mtf = mfe.valid_metafeatures(groups=groups) names, _ = mfe.parse_by_group(groups, res) assert not set(names).symmetric_difference(target_mtf)