Beispiel #1
0
 def test_scale(self):
     kND = KNearestDatasets()
     metafeatures = pd.DataFrame([self.anneal, self.krvskp])
     metafeatures, other = kND._scale(metafeatures, self.labor)
     from pandas.util.testing import assert_series_equal
     # Series.equal does not work properly with floats...
     assert_series_equal(metafeatures.iloc[0],
                         pd.Series({"number_of_instances": 0.267919719656,
                                   "number_of_classes": 1,
                                   "number_of_features": 1}))
Beispiel #2
0
    def test_fit_l1_distance(self):
        kND = KNearestDatasets()

        kND.fit(pd.DataFrame([self.anneal, self.krvskp, self.labor]),
                self.runs)
        self.assertEqual(kND.best_configuration_per_dataset[232], 0)
        self.assertEqual(kND.best_configuration_per_dataset[233], 1)
        self.assertEqual(kND.best_configuration_per_dataset[234], 2)
        self.assertTrue((kND.metafeatures == pd.DataFrame(
            [self.anneal, self.krvskp, self.labor])).all().all())
 def test_random_metric(self):
     kND = KNearestDatasets(logger=self.logger,
                            metric=get_random_metric(random_state=1))
     kND.fit(pd.DataFrame([self.krvskp, self.labor]),
             self.runs.loc[:, [233, 234]])
     distances = []
     for i in range(20):
         neighbor = kND.kBestSuggestions(self.anneal, 1)
         distances.append(neighbor[0][1])
     self.assertEqual(len(np.unique(distances)), 20)
Beispiel #4
0
    def _learn(self, exclude_double_configurations=True):
        dataset_metafeatures, all_other_metafeatures = \
            self._split_metafeature_array()

        # Remove metafeatures which could not be calculated for the target
        # dataset
        keep = []
        for idx in dataset_metafeatures.index:
            if np.isfinite(dataset_metafeatures.loc[idx]):
                keep.append(idx)

        dataset_metafeatures = dataset_metafeatures.loc[keep]
        all_other_metafeatures = all_other_metafeatures.loc[:, keep]

        # Do mean imputation of all other metafeatures
        all_other_metafeatures = all_other_metafeatures.fillna(
            all_other_metafeatures.mean())

        if self.kND is None:
            # In case that we learn our distance function, get_value the parameters for
            #  the random forest
            if self.distance_kwargs:
                rf_params = ast.literal_eval(self.distance_kwargs)
            else:
                rf_params = None

            # To keep the distance the same in every iteration, we create a new
            # random state
            random_state = sklearn.utils.check_random_state(self.seed)
            kND = KNearestDatasets(metric=self.distance,
                                   random_state=random_state,
                                   logger=self.logger,
                                   metric_params=rf_params)

            runs = dict()
            # TODO move this code to the metabase
            for task_id in all_other_metafeatures.index:
                try:
                    runs[task_id] = self.meta_base.get_runs(task_id)
                except KeyError:
                    # TODO should I really except this?
                    self.logger.info("Could not find runs for instance %s" %
                                     task_id)
                    runs[task_id] = pd.Series([], name=task_id)
            runs = pd.DataFrame(runs)

            kND.fit(all_other_metafeatures, runs)
            self.kND = kND
        return self.kND.kBestSuggestions(
            dataset_metafeatures,
            k=-1,
            exclude_double_configurations=exclude_double_configurations,
        )
Beispiel #5
0
    def test_kBestSuggestions(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])
        neighbor = kND.kBestSuggestions(self.anneal, 1)
        self.assertEqual([(233, 1.8229893712531495, 1)], neighbor)
        neighbors = kND.kBestSuggestions(self.anneal, 2)
        self.assertEqual([(233, 1.8229893712531495, 1),
                          (234, 2.2679197196559415, 2)], neighbors)
        neighbors = kND.kBestSuggestions(self.anneal, -1)
        self.assertEqual([(233, 1.8229893712531495, 1),
                          (234, 2.2679197196559415, 2)], neighbors)

        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0)
        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)
Beispiel #6
0
    def test_kBestSuggestions(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])
        neighbor = kND.kBestSuggestions(self.anneal, 1)
        np.testing.assert_array_almost_equal(
            [(233, 3.8320802803440586, 1)],
            neighbor,
        )
        neighbors = kND.kBestSuggestions(self.anneal, 2)
        np.testing.assert_array_almost_equal(
            [(233, 3.8320802803440586, 1), (234, 4.367919719655942, 2)],
            neighbors,
        )
        neighbors = kND.kBestSuggestions(self.anneal, -1)
        np.testing.assert_array_almost_equal(
            [(233, 3.8320802803440586, 1), (234, 4.367919719655942, 2)],
            neighbors,
        )

        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0)
        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)