Esempio n. 1
0
    def test_kNearestDatasets(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])

        neighbor = kND.kNearestDatasets(self.anneal, 1)
        self.assertEqual([233], neighbor)
        neighbor, distance = kND.kNearestDatasets(self.anneal,
                                                  1,
                                                  return_distance=True)
        self.assertEqual([233], neighbor)
        np.testing.assert_array_almost_equal([3.8320802803440586], distance)

        neighbors = kND.kNearestDatasets(self.anneal, 2)
        self.assertEqual([233, 234], neighbors)
        neighbors, distance = kND.kNearestDatasets(self.anneal,
                                                   2,
                                                   return_distance=True)
        self.assertEqual([233, 234], neighbors)
        np.testing.assert_array_almost_equal(
            [3.8320802803440586, 4.367919719655942], distance)

        neighbors = kND.kNearestDatasets(self.anneal, -1)
        self.assertEqual([233, 234], neighbors)
        neighbors, distance = kND.kNearestDatasets(self.anneal,
                                                   -1,
                                                   return_distance=True)
        self.assertEqual([233, 234], neighbors)
        np.testing.assert_array_almost_equal(
            [3.8320802803440586, 4.367919719655942], distance)

        self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0)
        self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
Esempio n. 2
0
    def test_kNearestDatasets(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])

        neighbor = kND.kNearestDatasets(self.anneal, 1)
        self.assertEqual([233], neighbor)
        neighbor, distance = kND.kNearestDatasets(self.anneal,
                                                  1,
                                                  return_distance=True)
        self.assertEqual([233], neighbor)
        assert_array_almost_equal([1.82298937], distance)

        neighbors = kND.kNearestDatasets(self.anneal, 2)
        self.assertEqual([233, 234], neighbors)
        neighbors, distance = kND.kNearestDatasets(self.anneal,
                                                   2,
                                                   return_distance=True)
        self.assertEqual([233, 234], neighbors)
        assert_array_almost_equal([1.822989, 2.267919], distance)

        neighbors = kND.kNearestDatasets(self.anneal, -1)
        self.assertEqual([233, 234], neighbors)
        neighbors, distance = kND.kNearestDatasets(self.anneal,
                                                   -1,
                                                   return_distance=True)
        self.assertEqual([233, 234], neighbors)
        assert_array_almost_equal([1.822989, 2.267919], distance)

        self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0)
        self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
Esempio n. 3
0
 def test_random_metric(self):
     kND = KNearestDatasets(metric=get_random_metric(random_state=1))
     kND.fit(pd.DataFrame([self.krvskp, self.labor]),
             self.runs.loc[:, [233, 234]])
     distances = []
     for i in range(20):
         neighbor = kND.kBestSuggestions(self.anneal, 1)
         distances.append(neighbor[0][1])
     self.assertEqual(len(np.unique(distances)), 20)
Esempio n. 4
0
 def test_random_metric(self):
     kND = KNearestDatasets(metric=get_random_metric(random_state=1))
     kND.fit(pd.DataFrame([self.krvskp, self.labor]),
             self.runs.loc[:,[233, 234]])
     distances = []
     for i in range(20):
         neighbor = kND.kBestSuggestions(self.anneal, 1)
         distances.append(neighbor[0][1])
     self.assertEqual(len(np.unique(distances)), 20)
Esempio n. 5
0
    def test_fit_l1_distance(self):
        kND = KNearestDatasets()

        kND.fit(pd.DataFrame([self.anneal, self.krvskp, self.labor]), self.runs)
        self.assertEqual(kND.best_configuration_per_dataset[232], 0)
        self.assertEqual(kND.best_configuration_per_dataset[233], 1)
        self.assertEqual(kND.best_configuration_per_dataset[234], 2)
        self.assertTrue((kND.metafeatures ==
                         pd.DataFrame([self.anneal, self.krvskp, self.labor])).all().all())
Esempio n. 6
0
 def test_scale(self):
     kND = KNearestDatasets()
     metafeatures = pd.DataFrame([self.anneal, self.krvskp])
     metafeatures, other = kND._scale(metafeatures, self.labor)
     from pandas.util.testing import assert_series_equal
     # Series.equal does not work properly with floats...
     assert_series_equal(metafeatures.iloc[0],
                         pd.Series({"number_of_instances": 0.267919719656,
                                   "number_of_classes": 1,
                                   "number_of_features": 1}))
Esempio n. 7
0
    def test_kNearestDatasets(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])

        neighbor = kND.kNearestDatasets(self.anneal, 1)
        self.assertEqual([233], neighbor)
        neighbor, distance = kND.kNearestDatasets(self.anneal, 1,
                                                  return_distance=True)
        self.assertEqual([233], neighbor)
        assert_array_almost_equal([1.82298937], distance)


        neighbors = kND.kNearestDatasets(self.anneal, 2)
        self.assertEqual([233, 234], neighbors)
        neighbors, distance = kND.kNearestDatasets(self.anneal, 2,
                                                   return_distance=True)
        self.assertEqual([233, 234], neighbors)
        assert_array_almost_equal([1.822989, 2.267919], distance)

        neighbors = kND.kNearestDatasets(self.anneal, -1)
        self.assertEqual([233, 234], neighbors)
        neighbors, distance = kND.kNearestDatasets(self.anneal, -1,
                                                   return_distance=True)
        self.assertEqual([233, 234], neighbors)
        assert_array_almost_equal([1.822989, 2.267919], distance)

        self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0)
        self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
Esempio n. 8
0
    def _learn(self, exclude_double_configurations=True):
        dataset_metafeatures, all_other_metafeatures = \
            self._split_metafeature_array()

        # Remove metafeatures which could not be calculated for the target
        # dataset
        keep = []
        for idx in dataset_metafeatures.index:
            if np.isfinite(dataset_metafeatures.loc[idx]):
                keep.append(idx)

        dataset_metafeatures = dataset_metafeatures.loc[keep]
        all_other_metafeatures = all_other_metafeatures.loc[:, keep]

        # Do mean imputation of all other metafeatures
        all_other_metafeatures = all_other_metafeatures.fillna(
            all_other_metafeatures.mean())

        if self.kND is None:
            # In case that we learn our distance function, get_value the parameters for
            #  the random forest
            if self.distance_kwargs:
                rf_params = ast.literal_eval(self.distance_kwargs)
            else:
                rf_params = None

            # To keep the distance the same in every iteration, we create a new
            # random state
            random_state = sklearn.utils.check_random_state(self.seed)
            kND = KNearestDatasets(metric=self.distance,
                                   random_state=random_state,
                                   logger=self.logger,
                                   metric_params=rf_params)

            runs = dict()
            # TODO move this code to the metabase
            for task_id in all_other_metafeatures.index:
                try:
                    runs[task_id] = self.meta_base.get_runs(task_id)
                except KeyError:
                    # TODO should I really except this?
                    self.logger.info("Could not find runs for instance %s" %
                                     task_id)
                    runs[task_id] = pd.Series([], name=task_id)
            runs = pd.DataFrame(runs)

            kND.fit(all_other_metafeatures, runs)
            self.kND = kND
        return self.kND.kBestSuggestions(
            dataset_metafeatures,
            k=-1,
            exclude_double_configurations=exclude_double_configurations,
        )
Esempio n. 9
0
    def _learn(self, exclude_double_configurations=True):
        dataset_metafeatures, all_other_metafeatures = \
            self._split_metafeature_array()

        # Remove metafeatures which could not be calculated for the target
        # dataset
        keep = []
        for idx in dataset_metafeatures.index:
            if np.isfinite(dataset_metafeatures.loc[idx]):
               keep.append(idx)

        dataset_metafeatures = dataset_metafeatures.loc[keep]
        all_other_metafeatures = all_other_metafeatures.loc[:,keep]

        # Do mean imputation of all other metafeatures
        all_other_metafeatures = all_other_metafeatures.fillna(
            all_other_metafeatures.mean())

        if self.kND is None:
            # In case that we learn our distance function, get_value the parameters for
            #  the random forest
            if self.distance_kwargs:
                rf_params = ast.literal_eval(self.distance_kwargs)
            else:
                rf_params = None

            # To keep the distance the same in every iteration, we create a new
            # random state
            random_state = sklearn.utils.check_random_state(self.seed)
            kND = KNearestDatasets(metric=self.distance,
                                   random_state=random_state,
                                   metric_params=rf_params)

            runs = dict()
            # TODO move this code to the metabase
            for task_id in all_other_metafeatures.index:
                try:
                    runs[task_id] = self.meta_base.get_runs(task_id)
                except KeyError:
                    # TODO should I really except this?
                    self.logger.warning("Could not find runs for instance %s" % task_id)
                    runs[task_id] = pd.Series([], name=task_id)
            runs = pd.DataFrame(runs)

            kND.fit(all_other_metafeatures, runs)
            self.kND = kND
        return self.kND.kBestSuggestions(dataset_metafeatures, k=-1,
            exclude_double_configurations=exclude_double_configurations)
Esempio n. 10
0
    def test_kBestSuggestions(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])
        neighbor = kND.kBestSuggestions(self.anneal, 1)
        self.assertEqual([(233, 1.8229893712531495, 1)], neighbor)
        neighbors = kND.kBestSuggestions(self.anneal, 2)
        self.assertEqual([(233, 1.8229893712531495, 1),
                          (234, 2.2679197196559415, 2)], neighbors)
        neighbors = kND.kBestSuggestions(self.anneal, -1)
        self.assertEqual([(233, 1.8229893712531495, 1),
                          (234, 2.2679197196559415, 2)], neighbors)

        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0)
        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)
Esempio n. 11
0
    def test_kBestSuggestions(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:,[233, 234]])
        neighbor = kND.kBestSuggestions(self.anneal, 1)
        self.assertEqual([(233, 1.8229893712531495, 1)],
                         neighbor)
        neighbors = kND.kBestSuggestions(self.anneal, 2)
        self.assertEqual([(233, 1.8229893712531495, 1),
                          (234, 2.2679197196559415, 2)],
                         neighbors)
        neighbors = kND.kBestSuggestions(self.anneal, -1)
        self.assertEqual([(233, 1.8229893712531495, 1),
                          (234, 2.2679197196559415, 2)],
                         neighbors)

        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0)
        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)
Esempio n. 12
0
    def test_kBestSuggestions(self):
        kND = KNearestDatasets()
        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
                self.runs.loc[:, [233, 234]])
        neighbor = kND.kBestSuggestions(self.anneal, 1)
        np.testing.assert_array_almost_equal(
            [(233, 3.8320802803440586, 1)],
            neighbor,
        )
        neighbors = kND.kBestSuggestions(self.anneal, 2)
        np.testing.assert_array_almost_equal(
            [(233, 3.8320802803440586, 1), (234, 4.367919719655942, 2)],
            neighbors,
        )
        neighbors = kND.kBestSuggestions(self.anneal, -1)
        np.testing.assert_array_almost_equal(
            [(233, 3.8320802803440586, 1), (234, 4.367919719655942, 2)],
            neighbors,
        )

        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0)
        self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)