def test_suggestions_type(self): """Test that returned value from meta_suggestions() is correct. We know that the internal methods have already been tested, so testing the type is the only thing left. """ dataset = DataLoader.get_openml_dataset(openml_id=46, problem_type=0) _, idx = MKDatabaseClient().nearest_datasets(dataset=dataset) res = MKDatabaseClient().meta_suggestions(dataset, idx[0]) self.assertTrue(isinstance(res, MLSuggestion))
def reduced_search_space(self): """Retrieve the reduced search space based on the similar datasets. The similar datasets should have been computed already. AutoMLError is thrown if no neighbors have been computed yet. Returns: list: List of MLSuggestions. """ if self._neighbors is None: raise AutoMLError("No neighbors available. Call the \ compute_similar_datasets method first") mk_dc = MKDatabaseClient() return mk_dc.meta_suggestions(dataset=self.dataset, ids_list=list(self._neighbors), metric=self.metalearning_metric)
def test_value_distance_metric(self): """Test that a different distance metric evaluates correctly. For this, we test with cosine similarity. """ dataset = DataLoader.get_openml_dataset(openml_id=46, problem_type=0) dists, _ = MKDatabaseClient().nearest_datasets( dataset=dataset, distance_metric='cosine') for distance in dists[0]: self.assertTrue(0 <= distance <= 1) # it can be by chance but
def compute_similar_datasets(self, k=5, similarity_metric='minkowski'): """Compute the similar datasets based on the dataset's metafeatures. Args: k (int): The number of similar datasets to retrieve. Defaults to 5. Returns: list: List of neighbors ordered by similarity. list: List of the metrics for each of the neighbors. """ mk_dc = MKDatabaseClient() similarities, dataset_ids = \ mk_dc.nearest_datasets(self.dataset, k=k, weighted=False, distance_metric=similarity_metric) # Always set to None cause it has changed. Only recompute if needed. self._search_space = None self._neighbors = dataset_ids[0] self._neighbors_metrics = similarities[0] return self._neighbors, self._neighbors_metrics
def test_value_k(self): """Test that if a k > maximum_allowd is passed, error raises.""" dataset = DataLoader.get_openml_dataset(openml_id=46, problem_type=0) max_k = len(MKDatabaseClient().metaknwoledge.weighted_matrix()[0]) # Lower values -> do not fail for k in range(1, max_k + 1): try: _, _ = MKDatabaseClient().nearest_datasets(dataset=dataset, k=k) flag = False except Exception: # pylint: disable=W0703 flag = True finally: self.assertFalse(flag) # Greater than max -> fails with self.assertRaises(ValueError): MKDatabaseClient().nearest_datasets(dataset=dataset, k=max_k + 1) # 0 -> fails with self.assertRaises(ValueError): MKDatabaseClient().nearest_datasets(dataset=dataset, k=0)
def test_reload(self): """Test the loading retrieves a valid instance.""" client = MKDatabaseClient().reload() self.assertTrue(isinstance(client, MKDatabaseClient))