Exemple #1
0
 def _check_task(self, task):
     self.assertEqual(type(task), dict)
     self.assertGreaterEqual(len(task), 2)
     self.assertIn('did', task)
     self.assertIsInstance(task['did'], int)
     self.assertIn('status', task)
     self.assertTrue(is_string(task['status']))
     self.assertIn(task['status'],
                   ['in_preparation', 'active', 'deactivated'])
Exemple #2
0
 def _check_task(self, task):
     self.assertEqual(type(task), dict)
     self.assertGreaterEqual(len(task), 2)
     self.assertIn('did', task)
     self.assertIsInstance(task['did'], int)
     self.assertIn('status', task)
     self.assertTrue(is_string(task['status']))
     self.assertIn(task['status'],
                   ['in_preparation', 'active', 'deactivated'])
 def _check_dataset(self, dataset):
     self.assertEqual(type(dataset), dict)
     self.assertGreaterEqual(len(dataset), 2)
     self.assertIn('did', dataset)
     self.assertIsInstance(dataset['did'], int)
     self.assertIn('status', dataset)
     self.assertTrue(is_string(dataset['status']))
     self.assertIn(dataset['status'],
                   ['in_preparation', 'active', 'deactivated'])
Exemple #4
0
 def test_list_datasets_by_tag(self):
     datasets = openml.datasets.list_datasets_by_tag('uci')
     self.assertGreaterEqual(len(datasets), 5)
     for dataset in datasets:
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
         self.assertIn('did', dataset)
         self.assertIsInstance(dataset['did'], int)
         self.assertIn('status', dataset)
         self.assertTrue(is_string(dataset['status']))
         self.assertIn(dataset['status'], ['in_preparation', 'active',
                                           'deactivated'])
 def test_list_datasets_by_tag(self):
     datasets = openml.datasets.list_datasets_by_tag('uci')
     self.assertGreaterEqual(len(datasets), 5)
     for dataset in datasets:
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
         self.assertIn('did', dataset)
         self.assertIsInstance(dataset['did'], int)
         self.assertIn('status', dataset)
         self.assertTrue(is_string(dataset['status']))
         self.assertIn(dataset['status'],
                       ['in_preparation', 'active', 'deactivated'])
 def test_get_data(self):
     # Basic usage
     rval = self.dataset.get_data()
     self.assertIsInstance(rval, np.ndarray)
     self.assertEqual(rval.dtype, np.float32)
     self.assertEqual((898, 39), rval.shape)
     rval, categorical = self.dataset.get_data(
         return_categorical_indicator=True)
     self.assertEqual(len(categorical), 39)
     self.assertTrue(all([isinstance(cat, bool) for cat in categorical]))
     rval, attribute_names = self.dataset.get_data(
         return_attribute_names=True)
     self.assertEqual(len(attribute_names), 39)
     self.assertTrue(all([is_string(att) for att in attribute_names]))
Exemple #7
0
 def test_get_dataset(self):
     # Basic usage
     rval = self.dataset.get_dataset()
     self.assertIsInstance(rval, np.ndarray)
     self.assertEqual(rval.dtype, np.float32)
     self.assertEqual((898, 39), rval.shape)
     rval, categorical = self.dataset.get_dataset(
         return_categorical_indicator=True)
     self.assertEqual(len(categorical), 39)
     self.assertTrue(all([isinstance(cat, bool) for cat in categorical]))
     rval, attribute_names = self.dataset.get_dataset(
         return_attribute_names=True)
     self.assertEqual(len(attribute_names), 39)
     self.assertTrue(all([is_string(att) for att in attribute_names]))
Exemple #8
0
 def test_list_datasets(self):
     # We can only perform a smoke test here because we test on dynamic
     # data from the internet...
     datasets = openml.datasets.list_datasets()
     # 1087 as the number of datasets on openml.org
     self.assertGreaterEqual(len(datasets), 1087)
     for dataset in datasets:
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
         self.assertIn('did', dataset)
         self.assertIsInstance(dataset['did'], int)
         self.assertIn('status', dataset)
         self.assertTrue(is_string(dataset['status']))
         self.assertIn(dataset['status'], ['in_preparation', 'active',
                                           'deactivated'])
 def test_list_datasets(self):
     # We can only perform a smoke test here because we test on dynamic
     # data from the internet...
     datasets = openml.datasets.list_datasets()
     # 1087 as the number of datasets on openml.org
     self.assertGreaterEqual(len(datasets), 1087)
     for dataset in datasets:
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
         self.assertIn('did', dataset)
         self.assertIsInstance(dataset['did'], int)
         self.assertIn('status', dataset)
         self.assertTrue(is_string(dataset['status']))
         self.assertIn(dataset['status'],
                       ['in_preparation', 'active', 'deactivated'])
Exemple #10
0
 def test_get_sparse_dataset(self):
     rval = self.sparse_dataset.get_dataset()
     self.assertIsInstance(rval, np.ndarray)
     self.assertEqual(rval.dtype, np.float32)
     self.assertEqual((2, 20001), rval.shape)
     rval, categorical = self.sparse_dataset.get_dataset(
         return_categorical_indicator=True)
     self.assertIsInstance(rval, np.ndarray)
     self.assertEqual(len(categorical), 20001)
     self.assertTrue(all([isinstance(cat, bool) for cat in categorical]))
     rval, attribute_names = self.sparse_dataset.get_dataset(
         return_attribute_names=True)
     self.assertIsInstance(rval, np.ndarray)
     self.assertEqual(len(attribute_names), 20001)
     self.assertTrue(all([is_string(att) for att in attribute_names]))