def _check_task(self, task): self.assertEqual(type(task), dict) self.assertGreaterEqual(len(task), 2) self.assertIn('did', task) self.assertIsInstance(task['did'], int) self.assertIn('status', task) self.assertTrue(is_string(task['status'])) self.assertIn(task['status'], ['in_preparation', 'active', 'deactivated'])
def _check_dataset(self, dataset): self.assertEqual(type(dataset), dict) self.assertGreaterEqual(len(dataset), 2) self.assertIn('did', dataset) self.assertIsInstance(dataset['did'], int) self.assertIn('status', dataset) self.assertTrue(is_string(dataset['status'])) self.assertIn(dataset['status'], ['in_preparation', 'active', 'deactivated'])
def test_list_datasets_by_tag(self): datasets = openml.datasets.list_datasets_by_tag('uci') self.assertGreaterEqual(len(datasets), 5) for dataset in datasets: self.assertEqual(type(dataset), dict) self.assertGreaterEqual(len(dataset), 2) self.assertIn('did', dataset) self.assertIsInstance(dataset['did'], int) self.assertIn('status', dataset) self.assertTrue(is_string(dataset['status'])) self.assertIn(dataset['status'], ['in_preparation', 'active', 'deactivated'])
def test_get_data(self): # Basic usage rval = self.dataset.get_data() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((898, 39), rval.shape) rval, categorical = self.dataset.get_data( return_categorical_indicator=True) self.assertEqual(len(categorical), 39) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.dataset.get_data( return_attribute_names=True) self.assertEqual(len(attribute_names), 39) self.assertTrue(all([is_string(att) for att in attribute_names]))
def test_get_dataset(self): # Basic usage rval = self.dataset.get_dataset() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((898, 39), rval.shape) rval, categorical = self.dataset.get_dataset( return_categorical_indicator=True) self.assertEqual(len(categorical), 39) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.dataset.get_dataset( return_attribute_names=True) self.assertEqual(len(attribute_names), 39) self.assertTrue(all([is_string(att) for att in attribute_names]))
def test_list_datasets(self): # We can only perform a smoke test here because we test on dynamic # data from the internet... datasets = openml.datasets.list_datasets() # 1087 as the number of datasets on openml.org self.assertGreaterEqual(len(datasets), 1087) for dataset in datasets: self.assertEqual(type(dataset), dict) self.assertGreaterEqual(len(dataset), 2) self.assertIn('did', dataset) self.assertIsInstance(dataset['did'], int) self.assertIn('status', dataset) self.assertTrue(is_string(dataset['status'])) self.assertIn(dataset['status'], ['in_preparation', 'active', 'deactivated'])
def test_get_sparse_dataset(self): rval = self.sparse_dataset.get_dataset() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((2, 20001), rval.shape) rval, categorical = self.sparse_dataset.get_dataset( return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20001) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.sparse_dataset.get_dataset( return_attribute_names=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(attribute_names), 20001) self.assertTrue(all([is_string(att) for att in attribute_names]))