Beispiel #1
0
 def test_list_metafeatures(self):
     mf_list = Metafeatures.list_metafeatures()
     mf_list_copy = [mf for mf in mf_list]
     mf_list.clear()
     if Metafeatures.list_metafeatures() != mf_list_copy:
         mf_list.extend(mf_list_copy)
         self.fail('Metafeature list has been mutated')
Beispiel #2
0
    def test_numeric_targets(self):
        """ Test Metafeatures().compute() with numeric targets
        """
        test_failures = {}
        test_name = inspect.stack()[0][3]
        for dataset_filename, dataset in self.datasets.items():
            metafeatures = Metafeatures()
            column_types = dataset["column_types"].copy()
            column_types[dataset["Y"].name] = consts.NUMERIC
            computed_mfs = metafeatures.compute(
                X=dataset["X"],
                Y=pd.Series(np.random.rand(dataset["Y"].shape[0]),
                            name=dataset["Y"].name),
                seed=CORRECTNESS_SEED,
                column_types=column_types)
            known_mfs = dataset["known_metafeatures"]
            target_dependent_metafeatures = Metafeatures.list_metafeatures(
                consts.MetafeatureGroup.TARGET_DEPENDENT.value)
            for mf_name in target_dependent_metafeatures:
                known_mfs[mf_name] = {
                    consts.VALUE_KEY: consts.NUMERIC_TARGETS,
                    consts.COMPUTE_TIME_KEY: 0.
                }

            required_checks = [(self._check_correctness,
                                [computed_mfs, known_mfs, dataset_filename]),
                               (self._check_compare_metafeature_lists,
                                [computed_mfs, known_mfs, dataset_filename])]
            test_failures.update(self._perform_checks(required_checks))

        self._report_test_failures(test_failures, test_name)
Beispiel #3
0
    def test_exclude_metafeature_groups(self):
        SUBSET_LENGTH = 3
        test_failures = {}
        test_name = inspect.stack()[0][3]
        for dataset_filename, dataset in self.datasets.items():
            groups = random.sample(
                [group.value for group in consts.MetafeatureGroup],
                SUBSET_LENGTH)
            computed_mfs = Metafeatures().compute(
                X=dataset["X"],
                Y=dataset["Y"],
                column_types=dataset["column_types"],
                seed=CORRECTNESS_SEED,
                exclude_groups=groups,
            )
            known_metafeatures = dataset["known_metafeatures"]
            required_checks = [
                (self._check_correctness,
                 [computed_mfs, known_metafeatures, dataset_filename])
            ]
            test_failures.update(self._perform_checks(required_checks))

            metafeature_ids = set(
                mf_id for group in groups
                for mf_id in Metafeatures.list_metafeatures(group))
            if any(mf_id in computed_mfs.keys() for mf_id in metafeature_ids):
                self.fail('Metafeatures computed an excluded metafeature')
        self._report_test_failures(test_failures, test_name)
def get_list_metafeatures(list_X, list_y, type_metafeatures):
    metafeatures = Metafeatures()
    list_dataset_metafeatures = []

    for X, y in tqdm(zip(list_X, list_Y), total=7084):
        mfs = metafeatures.compute(
            pd.DataFrame(X),
            Y=pd.Series(y, dtype="category"),
            metafeature_ids=metafeatures.list_metafeatures(
                group=type_metafeatures),
            exclude=None,
            seed=0,
            #verbose=True,
            timeout=60,
            # return_times=True,
        )
        list_dataset_metafeatures.append(
            pd.DataFrame(mfs).reset_index(drop=True))

    df_metafeatures = pd.concat(list_dataset_metafeatures).fillna(0)
    df_metafeatures["index"] = list_files
    df_metafeatures.set_index("index", inplace=True)
    return df_metafeatures