Ejemplo n.º 1
0
    def test_suggestions_type(self):
        """Test that returned value from meta_suggestions() is correct.

        We know that the internal methods have already been tested, so testing
        the type is the only thing left.
        """
        dataset = DataLoader.get_openml_dataset(openml_id=46, problem_type=0)
        _, idx = MKDatabaseClient().nearest_datasets(dataset=dataset)

        res = MKDatabaseClient().meta_suggestions(dataset, idx[0])

        self.assertTrue(isinstance(res, MLSuggestion))
Ejemplo n.º 2
0
    def reduced_search_space(self):
        """Retrieve the reduced search space based on the similar datasets.

        The similar datasets should have been computed already. AutoMLError is
        thrown if no neighbors have been computed yet.

        Returns:
            list: List of MLSuggestions.

        """
        if self._neighbors is None:
            raise AutoMLError("No neighbors available. Call the \
                               compute_similar_datasets method first")

        mk_dc = MKDatabaseClient()
        return mk_dc.meta_suggestions(dataset=self.dataset,
                                      ids_list=list(self._neighbors),
                                      metric=self.metalearning_metric)
Ejemplo n.º 3
0
    def test_value_distance_metric(self):
        """Test that a different distance metric evaluates correctly.

        For this, we test with cosine similarity.
        """
        dataset = DataLoader.get_openml_dataset(openml_id=46, problem_type=0)
        dists, _ = MKDatabaseClient().nearest_datasets(
            dataset=dataset, distance_metric='cosine')

        for distance in dists[0]:
            self.assertTrue(0 <= distance <= 1)  # it can be by chance but
Ejemplo n.º 4
0
    def compute_similar_datasets(self, k=5, similarity_metric='minkowski'):
        """Compute the similar datasets based on the dataset's metafeatures.

        Args:
            k (int): The number of similar datasets to retrieve. Defaults to 5.

        Returns:
            list: List of neighbors ordered by similarity.
            list: List of the metrics for each of the neighbors.

        """
        mk_dc = MKDatabaseClient()
        similarities, dataset_ids = \
            mk_dc.nearest_datasets(self.dataset, k=k, weighted=False,
                                   distance_metric=similarity_metric)

        # Always set to None cause it has changed. Only recompute if needed.
        self._search_space = None

        self._neighbors = dataset_ids[0]
        self._neighbors_metrics = similarities[0]
        return self._neighbors, self._neighbors_metrics
Ejemplo n.º 5
0
    def test_value_k(self):
        """Test that if a k > maximum_allowd is passed, error raises."""
        dataset = DataLoader.get_openml_dataset(openml_id=46, problem_type=0)
        max_k = len(MKDatabaseClient().metaknwoledge.weighted_matrix()[0])

        # Lower values -> do not fail
        for k in range(1, max_k + 1):
            try:
                _, _ = MKDatabaseClient().nearest_datasets(dataset=dataset,
                                                           k=k)
                flag = False
            except Exception:  # pylint: disable=W0703
                flag = True
            finally:
                self.assertFalse(flag)

        # Greater than max -> fails
        with self.assertRaises(ValueError):
            MKDatabaseClient().nearest_datasets(dataset=dataset, k=max_k + 1)

        # 0 -> fails
        with self.assertRaises(ValueError):
            MKDatabaseClient().nearest_datasets(dataset=dataset, k=0)
Ejemplo n.º 6
0
 def test_reload(self):
     """Test the loading retrieves a valid instance."""
     client = MKDatabaseClient().reload()
     self.assertTrue(isinstance(client, MKDatabaseClient))