def extract_mtf_by_group(): all_mtf_names = [] all_mtf_vals = [] for cur_group in mtf_groups: cur_precomp_group = cur_group if precompute else None mfe = MFE( groups=cur_group, summary="mean", random_state=1234 ).fit( X.values, y.values if supervised else None, precomp_groups=cur_precomp_group, ) cur_names, cur_vals = mfe.extract() all_mtf_names += cur_names all_mtf_vals += cur_vals _, all_mtf_vals = zip( *sorted( zip(all_mtf_names, all_mtf_vals), key=lambda item: item[0] ) ) return all_mtf_vals
def test_extract_with_time_output_dictionary(self): X, y = load_xy(2) extractor = MFE(groups="general", measure_time="total").fit(X.values, y.values) res = extractor.extract(out_type=dict) assert isinstance(res, dict) assert len(res) == 3
def extract_from_object(dataset: Union[np.ndarray, list], mfe_params: dict = None) -> Sequence: if mfe_params is None or len(mfe_params) == 0: mfe_params = __default_mfe_params mfe = MFE(**mfe_params) mfe.fit(dataset, suppress_warnings=True) return mfe.extract(suppress_warnings=True)[1]
def test_extract_with_confidence_output_dictionary_unsupervised(self): X, _ = load_xy(2) extractor = MFE(groups="general").fit(X.values) res = extractor.extract_with_confidence( 3, arguments_extract=dict(out_type=dict)) assert isinstance(res, dict) assert len(res) == 3
def test_none_cancor(self): X, y = load_xy(0) feats = [ "w_lambda", "p_trace", "lh_trace", "roy_root", ] mfe = MFE(groups=[GNAME], features=feats) custom_args = { "can_cors": np.array([]), "can_cor_eigvals": np.array([]), } mfe.fit(X.values, y.values, precomp_groups=None) extract_args = {cur_feat: custom_args for cur_feat in feats} vals = mfe.extract(**extract_args, suppress_warnings=True)[1] assert np.allclose(vals, np.full(shape=len(vals), fill_value=np.nan), equal_nan=True)
def test_extract_with_time_output_pandas_dataframe_unsupervised(self): X, _ = load_xy(2) extractor = MFE(measure_time="total", groups="general").fit(X.values) expected_mtfs = extractor.extract_metafeature_names() res = extractor.extract(out_type=pd.DataFrame) assert isinstance(res, pd.DataFrame) assert res.values.shape == (2, len(expected_mtfs)) and np.array_equal( res.columns, expected_mtfs)
def test_one_hot_encoding_02(self): X, y = utils.load_xy(1) mfe = MFE() mfe.fit(X.values, y.values, transform_cat="one-hot-full") exp_value = np.sum([np.unique(attr).size for attr in X.values.T]) assert mfe._custom_args_ft["N"].shape[1] == exp_value
def test_extract_output_pandas_dataframe(self): X, y = load_xy(2) extractor = MFE(groups="general").fit(X.values, y.values) expected_mtfs = extractor.extract_metafeature_names() res = extractor.extract(out_type=pd.DataFrame) assert isinstance(res, pd.DataFrame) assert res.values.shape == (1, len(expected_mtfs)) and np.array_equal( res.columns, expected_mtfs)
def test_one_hot_encoding_03(self): X, y = utils.load_xy(2) mfe = MFE() mfe.fit(X.values, y.values, transform_cat="one-hot") exp_value = X.values.shape[1] assert mfe._custom_args_ft["N"].shape[1] == exp_value
def test_silhouette_subsampling(self, precompute): X, y = load_xy(0) precomp_group = GNAME if precompute else None mfe = MFE(groups="clustering", features="sil", random_state=1234).fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract(sil={"sample_frac": 0.5})[1] assert np.allclose(value, -0.07137712254830314)
def test_threshold_attr_conc(self): X, y = load_xy(1) mfe = MFE(features="attr_conc", random_state=1234).fit( X.values, y.values, precomp_groups=False ) value = mfe.extract(attr_conc={"max_attr_num": 25})[1] assert np.allclose(value, [0.01682327, 0.04715381], rtol=0.2)
def _get_feats(cls): from sklearn.datasets import load_iris from pymfe.mfe import MFE data = load_iris() mfe = MFE() mfe.fit(data.data, data.target) ft = mfe.extract() _feats = [feature.replace(".", "_") for feature in ft[0]] return _feats
def test_one_hot_encoding_04(self): X, y = utils.load_xy(2) mfe = MFE() X = np.hstack((X.values, np.ones((y.size, 1), dtype=str))) y = y.values with pytest.raises(ValueError): mfe.fit(X=X, y=y, transform_cat="one-hot")
def test_integration_general(self, dt_id, exp_value, precompute): precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True)
def test_integration_infotheo(self, dt_id, exp_value, precompute): """Function to test all info-theory meta-features.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def transform(self, X, y): if isinstance(X, pd.DataFrame): X = X.to_numpy(dtype='int8') if isinstance(y, pd.Series): y = y.to_numpy(dtype='int32') mfe = MFE(groups=["general"], summary=['kurtosis', 'min', 'max', 'median', 'skewness']) mfe.fit(X, y) ft = mfe.extract()[1] return np.nan_to_num(np.array(ft), 0)
def test_extract_metafeature_names_unsupervised_01(self, groups, summary): """Test .extract_metafeature_names method.""" X, _ = utils.load_xy(0) mfe = MFE(groups=groups, summary=summary) mtf_names_1 = mfe.extract_metafeature_names(supervised=False) mtf_names_2 = mfe.fit(X.values).extract(suppress_warnings=True)[0] assert mtf_names_1 == tuple(mtf_names_2)
def test_integration_clustering(self, dt_id, exp_value, precompute): """Function to test each all clustering meta-features.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True)
def test_gray_encoding_missing_value(self): X, y = utils.load_xy(1) mfe = MFE() X = np.copy(X.values) y = y.values X[5, 0] = np.nan with pytest.raises(ValueError): mfe.fit(X, y, transform_cat="gray")
def extract_all_mtf(): mfe = MFE( groups=mtf_groups, summary="mean", random_state=1234 ).fit( X.values, y.values if supervised else None, precomp_groups=precomp_group, ) all_mtf_vals = mfe.extract()[1] return all_mtf_vals
def test_integration_complexity(self, dt_id, exp_value, precompute): """Function to test each meta-feature belongs to complexity group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean", random_state=1234) mfe.fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True, rtol=0.025)
def test_verbose(self, capsys): X, y = load_xy(0) model = MFE( features=["freq_class", "mean", "class_conc", "one_nn", "nodes" ]).fit(X=X.values, y=y.values) model.extract(verbose=True) captured = capsys.readouterr().out # Expected number of messages in verbose mode of mtf extraction expected_msg_num = 21 assert captured.count("\n") == expected_msg_num
def test_integration_model_based(self, dt_id, exp_value, precompute): """Function to test all model-based meta-features.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean", random_state=1234) mfe.fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, equal_nan=True)
def test_error__set_data_numeric(self): with pytest.raises(TypeError): mfe = MFE() mfe._set_data_numeric(True) with pytest.raises(TypeError): mfe = MFE() mfe.X = np.array([]) mfe._set_data_numeric(True)
def test_extract_with_confidence_random_state1(self): X, y = utils.load_xy(2) _, mtf_vals_1, mtf_conf_int_1 = (MFE( features=["mean", "sd"], random_state=16).fit( X=X.values, y=y.values).extract_with_confidence(sample_num=3)) _, mtf_vals_2, mtf_conf_int_2 = (MFE( features=["mean", "sd"], random_state=16).fit( X=X.values, y=y.values).extract_with_confidence(sample_num=3)) assert np.allclose(mtf_vals_1, mtf_vals_2) and np.allclose( mtf_conf_int_1, mtf_conf_int_2)
def test_extract_with_confidence_random_state3(self): X, y = utils.load_xy(2) np.random.seed(1234) _, mtf_vals_1, mtf_conf_int_1 = (MFE(features=["mean", "sd"]).fit( X=X.values, y=y.values).extract_with_confidence(sample_num=3)) np.random.seed(1234) _, mtf_vals_2, mtf_conf_int_2 = (MFE(features=["mean", "sd"]).fit( X=X.values, y=y.values).extract_with_confidence(sample_num=3)) assert np.allclose( mtf_vals_1, mtf_vals_2) and np.any(~np.isclose(mtf_conf_int_1, mtf_conf_int_2))
def test_ft_methods_general(self, dt_id, ft_name, exp_value, precompute): """Function to test each meta-feature belongs to general group.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features=[ft_name]).fit( X.values, y.values, precomp_groups=precomp_group ) value = mfe.extract()[1] if exp_value is np.nan: assert value[0] is exp_value else: assert np.allclose(value, exp_value)
def test_roy_largest_root(self, dt_id, exp_value, precompute, criterion): precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], features="roy_root").fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract(roy_root={"criterion": criterion})[1] assert np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_integration_statistical(self, dt_id, exp_value, precompute): """Function to test all statistical meta-features simultaneously.""" precomp_group = GNAME if precompute else None X, y = load_xy(dt_id) mfe = MFE(groups=[GNAME], summary="mean").fit(X.values, y.values, precomp_groups=precomp_group) value = mfe.extract()[1] assert np.allclose(value, exp_value, atol=0.001, rtol=0.05, equal_nan=True)
def test_extract_metafeature_names_unsupervised_02(self, groups, summary): """Test .extract_metafeature_names method.""" X, _ = utils.load_xy(0) mfe = MFE(groups=groups, summary=summary) mtf_names_1 = mfe.fit(X.values).extract(suppress_warnings=True)[0] # Note: by default, .extract_metafeature_names should check wether # 'y' was fitted or not if .fit was called before. Therefore, here, # supervised=True is expected to be ignored and behave like # supervised=False. mtf_names_2 = mfe.extract_metafeature_names(supervised=True) mtf_names_3 = mfe.extract_metafeature_names(supervised=False) assert tuple(mtf_names_1) == mtf_names_2 == mtf_names_3