コード例 #1
0
    def test_error__set_data_numeric(self):
        with pytest.raises(TypeError):
            mfe = MFE()
            mfe._set_data_numeric(True)

        with pytest.raises(TypeError):
            mfe = MFE()
            mfe.X = np.array([])
            mfe._set_data_numeric(True)
コード例 #2
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_extract_with_confidence_random_state1(self):
        X, y = utils.load_xy(2)

        _, mtf_vals_1, mtf_conf_int_1 = (MFE(
            features=["mean", "sd"], random_state=16).fit(
                X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        _, mtf_vals_2, mtf_conf_int_2 = (MFE(
            features=["mean", "sd"], random_state=16).fit(
                X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        assert np.allclose(mtf_vals_1, mtf_vals_2) and np.allclose(
            mtf_conf_int_1, mtf_conf_int_2)
コード例 #3
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_extract_with_confidence_random_state3(self):
        X, y = utils.load_xy(2)

        np.random.seed(1234)
        _, mtf_vals_1, mtf_conf_int_1 = (MFE(features=["mean", "sd"]).fit(
            X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        np.random.seed(1234)
        _, mtf_vals_2, mtf_conf_int_2 = (MFE(features=["mean", "sd"]).fit(
            X=X.values, y=y.values).extract_with_confidence(sample_num=3))

        assert np.allclose(
            mtf_vals_1,
            mtf_vals_2) and np.any(~np.isclose(mtf_conf_int_1, mtf_conf_int_2))
コード例 #4
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_extract_from_model(self):
        X, y = utils.load_xy(2)

        model = sklearn.tree.DecisionTreeClassifier(random_state=1234).fit(
            X.values, y.values)

        mtf_name, mtf_vals = MFE(random_state=1234).extract_from_model(model)

        extractor = MFE(groups="model-based", random_state=1234)
        extractor.fit(X=X.values, y=y.values, transform_num=False)
        mtf_name2, mtf_vals2 = extractor.extract()

        assert np.all(mtf_name == mtf_name2) and np.allclose(
            mtf_vals, mtf_vals2)
コード例 #5
0
 def test_extract_with_confidence_output_dictionary_unsupervised(self):
     X, _ = load_xy(2)
     extractor = MFE(groups="general").fit(X.values)
     res = extractor.extract_with_confidence(
         3, arguments_extract=dict(out_type=dict))
     assert isinstance(res, dict)
     assert len(res) == 3
コード例 #6
0
 def test_extract_with_time_output_dictionary(self):
     X, y = load_xy(2)
     extractor = MFE(groups="general",
                     measure_time="total").fit(X.values, y.values)
     res = extractor.extract(out_type=dict)
     assert isinstance(res, dict)
     assert len(res) == 3
コード例 #7
0
def extract_from_object(dataset: Union[np.ndarray, list], mfe_params: dict = None) -> Sequence:
    if mfe_params is None or len(mfe_params) == 0:
        mfe_params = __default_mfe_params

    mfe = MFE(**mfe_params)
    mfe.fit(dataset, suppress_warnings=True)
    return mfe.extract(suppress_warnings=True)[1]
コード例 #8
0
    def test_extract_with_confidence_invalid3(self):
        X, y = utils.load_xy(2)

        with pytest.raises(ValueError):
            MFE().fit(X.values, y.values).extract_with_confidence(
                confidence=1.0001
            )
コード例 #9
0
    def test_extract_with_confidence(self, confidence):
        X, y = utils.load_xy(2)

        mtf_names, mtf_vals, mtf_conf_int = (
            MFE(
                groups="all",
                features=["mean", "best_node", "sil"],
                random_state=1234,
            )
            .fit(X=X.values, y=y.values, precomp_groups=None)
            .extract_with_confidence(
                sample_num=64,
                return_avg_val=False,
                confidence=confidence,
                verbose=0,
            )
        )

        in_range_prop = np.zeros(len(mtf_names), dtype=float)

        for mtf_ind, cur_mtf_vals in enumerate(mtf_vals):
            int_low, int_high = mtf_conf_int[mtf_ind, :]
            in_range_prop[mtf_ind] = np.sum(
                np.logical_and(
                    int_low <= cur_mtf_vals, cur_mtf_vals <= int_high
                )
            ) / len(cur_mtf_vals)

        assert np.all(confidence - 0.05 <= in_range_prop)
コード例 #10
0
    def test_none_cancor(self):
        X, y = load_xy(0)

        feats = [
            "w_lambda",
            "p_trace",
            "lh_trace",
            "roy_root",
        ]

        mfe = MFE(groups=[GNAME], features=feats)

        custom_args = {
            "can_cors": np.array([]),
            "can_cor_eigvals": np.array([]),
        }

        mfe.fit(X.values, y.values, precomp_groups=None)

        extract_args = {cur_feat: custom_args for cur_feat in feats}
        vals = mfe.extract(**extract_args, suppress_warnings=True)[1]

        assert np.allclose(vals,
                           np.full(shape=len(vals), fill_value=np.nan),
                           equal_nan=True)
コード例 #11
0
    def test_verbosity_3(self, verbosity, msg_expected, capsys):
        X, y = load_xy(0)

        MFE().fit(X=X.values, y=y.values).extract(verbose=verbosity)

        captured = capsys.readouterr().out
        assert (not msg_expected) or captured
コード例 #12
0
ファイル: test_system_testing.py プロジェクト: paulasb/pymfe
        def extract_mtf_by_group():
            all_mtf_names = []
            all_mtf_vals = []

            for cur_group in mtf_groups:
                cur_precomp_group = cur_group if precompute else None

                mfe = MFE(
                    groups=cur_group, summary="mean", random_state=1234
                ).fit(
                    X.values,
                    y.values if supervised else None,
                    precomp_groups=cur_precomp_group,
                )

                cur_names, cur_vals = mfe.extract()

                all_mtf_names += cur_names
                all_mtf_vals += cur_vals

            _, all_mtf_vals = zip(
                *sorted(
                    zip(all_mtf_names, all_mtf_vals), key=lambda item: item[0]
                )
            )

            return all_mtf_vals
コード例 #13
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_extract_from_model_invalid4(self):
        X, y = utils.load_xy(2)

        model = sklearn.tree.DecisionTreeClassifier().fit(X, y)

        with pytest.raises(ValueError):
            MFE(groups="general").extract_from_model(model)
コード例 #14
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_extract_from_model_invalid1(self):
        X, y = utils.load_xy(2)

        model = sklearn.tree.DecisionTreeRegressor().fit(X.values, y.values)

        with pytest.raises(TypeError):
            MFE().extract_from_model(model)
コード例 #15
0
    def test_output_lengths_2(self, dt_id, measure_time):
        X, y = load_xy(dt_id)
        res = (MFE(measure_time=measure_time).fit(X=X.values,
                                                  y=y.values).extract())
        vals, names, time = res

        assert len(vals) == len(names) == len(time)
コード例 #16
0
    def test_ft_methods_model_based_02(self, dt_id, ft_name, exp_value,
                                       precompute):
        """Function to test each meta-feature belongs to model-based group."""
        precomp_group = GNAME if precompute else None

        X, y = load_xy(dt_id)
        mfe = MFE(
            groups=[GNAME],
            features=[ft_name],
            hypparam_model_dt={
                "max_depth": 5,
                "min_samples_split": 10,
                "criterion": "entropy",
            },
            random_state=1234,
        )

        mfe.fit(X.values, y.values, precomp_groups=precomp_group)

        if precomp_group is None:
            # Note: the precomputation of 'model-based' group is always
            # forced due to the need of the 'dt_model' value
            mfe._precomp_args_ft = {
                "dt_model": mfe._precomp_args_ft.get("dt_model")
            }

        value = mfe.extract()[1]

        if exp_value is np.nan:
            assert value[0] is exp_value

        else:
            assert np.allclose(value, exp_value)
コード例 #17
0
 def test_scaling_error_1(self):
     with pytest.raises(ValueError):
         X, y = load_xy(0)
         MFE().fit(X=X.values,
                   y=y.values,
                   rescale="invalid",
                   transform_cat=False)
コード例 #18
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_extract_from_model_invalid2(self):
        X, y = utils.load_xy(2)

        model = sklearn.tree.DecisionTreeClassifier(random_state=1234).fit(
            X.values, y.values)

        with pytest.raises(KeyError):
            MFE().extract_from_model(model, arguments_fit={"dt_model": model})
コード例 #19
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_one_hot_encoding_03(self):
        X, y = utils.load_xy(2)
        mfe = MFE()
        mfe.fit(X.values, y.values, transform_cat="one-hot")

        exp_value = X.values.shape[1]

        assert mfe._custom_args_ft["N"].shape[1] == exp_value
コード例 #20
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_one_hot_encoding_02(self):
        X, y = utils.load_xy(1)
        mfe = MFE()
        mfe.fit(X.values, y.values, transform_cat="one-hot-full")

        exp_value = np.sum([np.unique(attr).size for attr in X.values.T])

        assert mfe._custom_args_ft["N"].shape[1] == exp_value
コード例 #21
0
 def test_extract_output_pandas_dataframe(self):
     X, y = load_xy(2)
     extractor = MFE(groups="general").fit(X.values, y.values)
     expected_mtfs = extractor.extract_metafeature_names()
     res = extractor.extract(out_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     assert res.values.shape == (1, len(expected_mtfs)) and np.array_equal(
         res.columns, expected_mtfs)
コード例 #22
0
    def test_verbosity_2(self, capsys):
        X, y = load_xy(0)

        MFE().fit(X=X.values, y=y.values).extract(verbose=0)

        captured = capsys.readouterr().out

        assert not captured
コード例 #23
0
 def test_extract_with_time_output_pandas_dataframe_unsupervised(self):
     X, _ = load_xy(2)
     extractor = MFE(measure_time="total", groups="general").fit(X.values)
     expected_mtfs = extractor.extract_metafeature_names()
     res = extractor.extract(out_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     assert res.values.shape == (2, len(expected_mtfs)) and np.array_equal(
         res.columns, expected_mtfs)
コード例 #24
0
def main():
    """Extract meta-features with pyMFE and evaluate MSE with LightGBM.
    """
    args = parse_args()
    wandb.init(project='DeepMetaLearning', name='classical', config=args)
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    warnings.filterwarnings("ignore", category=UserWarning)
    mfe = MFE(random_state=args.seed)
    print("Extracting meta-features for train files")
    train_df = []
    train_path = pathlib.Path(args.data_path) / 'train'
    train_files = list(train_path.glob('*.parquet'))
    scores_data = pd.read_csv("augment_data.csv", index_col="filename")
    for fname in tqdm(train_files):
        df = pd.read_parquet(fname)
        X = df.drop(columns=["class"]).values
        # First evaluate only unsupervised features
        #y = df["class"].values
        mfe.fit(X)
        ft = mfe.extract()
        ft = dict(zip(*ft))
        ft["best_clf"] = scores_data.loc[fname.name].argmax()
        train_df.append(ft)

    print("Extracting meta-features for validation files")
    valid_df = []
    valid_path = pathlib.Path(args.data_path) / 'valid'
    valid_files = list(valid_path.glob('*.parquet'))
    for fname in tqdm(valid_files):
        df = pd.read_parquet(fname)
        X = df.drop(columns=["class"]).values
        # First evaluate only unsupervised features
        #y = df["class"].values
        mfe.fit(X)
        ft = mfe.extract()
        ft = dict(zip(*ft))
        ft["best_clf"] = scores_data.loc[fname.name].argmax()
        valid_df.append(ft)

    train_df = pd.DataFrame(train_df)
    valid_df = pd.DataFrame(valid_df)
    if args.save_mfe:
        train_df.to_csv("mfe.train.csv", index=False)
        train_df.to_csv("mfe.test.csv", index=False)

    drop_columns = ["best_clf"]
    xtrain = train_df.drop(columns=drop_columns).values
    xtest = valid_df.drop(columns=drop_columns).values
    ytrain = train_df[drop_columns]
    ytrue = valid_df[drop_columns]
    lg = LGBMClassifier(random_state=args.seed, objective='multiclass')
    lg.fit(xtrain, ytrain)
    yhat = lg.predict(xtest)

    recall = metrics.recall_score(ytrue, yhat, average="micro")
    precis = metrics.precision_score(ytrue, yhat, average="micro")
    wandb.log({"recall": recall})
    wandb.log({"precision": precis})
コード例 #25
0
    def test_integration_general(self, dt_id, exp_value, precompute):
        precomp_group = GNAME if precompute else None
        X, y = load_xy(dt_id)
        mfe = MFE(groups=[GNAME], summary="mean").fit(
            X.values, y.values, precomp_groups=precomp_group
        )
        value = mfe.extract()[1]

        assert np.allclose(value, exp_value, equal_nan=True)
コード例 #26
0
    def test_verbosity_with_confidence(self, verbosity, msg_expected, capsys):
        X, y = load_xy(2)

        MFE().fit(X.values,
                  y.values).extract_with_confidence(verbose=verbosity)

        captured = capsys.readouterr().out
        assert ((not msg_expected) and
                (not captured)) or (msg_expected and captured)
コード例 #27
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_default_alias_groups(self):
        model = MFE(groups="default")
        res = model.valid_groups()
        assert len(res) == len(
            _internal.VALID_GROUPS) and not set(res).symmetric_difference(
                _internal.VALID_GROUPS)

        model = MFE(groups=["default"])
        res = model.valid_groups()
        assert len(res) == len(
            _internal.VALID_GROUPS) and not set(res).symmetric_difference(
                _internal.VALID_GROUPS)

        model = MFE(groups=["general", "default"])
        res = model.valid_groups()
        assert len(res) == len(
            _internal.VALID_GROUPS) and not set(res).symmetric_difference(
                _internal.VALID_GROUPS)
コード例 #28
0
ファイル: Metadata.py プロジェクト: Sanyam07/curumin_mtl
 def _get_feats(cls):
     from sklearn.datasets import load_iris
     from pymfe.mfe import MFE
     data = load_iris()
     mfe = MFE()
     mfe.fit(data.data, data.target)
     ft = mfe.extract()
     _feats = [feature.replace(".", "_") for feature in ft[0]]
     return _feats
コード例 #29
0
    def test_threshold_attr_conc(self):
        X, y = load_xy(1)
        mfe = MFE(features="attr_conc", random_state=1234).fit(
            X.values, y.values, precomp_groups=False
        )

        value = mfe.extract(attr_conc={"max_attr_num": 25})[1]

        assert np.allclose(value, [0.01682327, 0.04715381], rtol=0.2)
コード例 #30
0
ファイル: test_architecture.py プロジェクト: ealcobaca/pymfe
    def test_one_hot_encoding_04(self):
        X, y = utils.load_xy(2)
        mfe = MFE()

        X = np.hstack((X.values, np.ones((y.size, 1), dtype=str)))
        y = y.values

        with pytest.raises(ValueError):
            mfe.fit(X=X, y=y, transform_cat="one-hot")