def setUp(self): # Load dataset id 1 __file__ = inspect.getfile(OpenMLDatasetTest) self.directory = os.path.dirname(__file__) self.arff_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.arff") self.pickle_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.pkl") self.dataset = OpenMLDataset( 1, "anneal", 2, "Lorem ipsum.", "arff", None, None, None, "2014-04-06 23:19:24", None, "Public", "http://openml.liacs.nl/files/download/2/dataset_2_anneal.ORIG.arff", "class", None, None, None, None, None, None, None, None, None, "939966a711925e333bf4aaadeaa71135", data_file=self.arff_filename) self.sparse_arff_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.arff") self.sparse_pickle_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.pkl") self.sparse_dataset = OpenMLDataset( -1, "dexter", -1, "Lorem ipsum.", "arff", None, None, None, None, None, "Public", "http://www.cs.ubc.ca/labs/beta/Projects/autoweka/datasets/dexter.zip", "class", None, None, None, None, None, None, None, None, None, None, data_file=self.sparse_arff_filename)
def test_data_status(self): dataset = OpenMLDataset( "%s-UploadTestWithURL" % self._get_sentinel(), "test", "ARFF", version=1, url="https://www.openml.org/data/download/61/dataset_61_iris.arff") dataset.publish() did = dataset.dataset_id # admin key for test server (only adminds can activate datasets. # all users can deactivate their own datasets) openml.config.apikey = 'd488d8afd93b32331cf6ea9d7003d4c3' openml.datasets.status_update(did, 'active') # need to use listing fn, as this is immune to cache result = openml.datasets.list_datasets(data_id=did, status='all') self.assertEqual(len(result), 1) self.assertEqual(result[did]['status'], 'active') openml.datasets.status_update(did, 'deactivated') # need to use listing fn, as this is immune to cache result = openml.datasets.list_datasets(data_id=did, status='all') self.assertEqual(len(result), 1) self.assertEqual(result[did]['status'], 'deactivated') openml.datasets.status_update(did, 'active') # need to use listing fn, as this is immune to cache result = openml.datasets.list_datasets(data_id=did, status='all') self.assertEqual(len(result), 1) self.assertEqual(result[did]['status'], 'active') with self.assertRaises(ValueError): openml.datasets.status_update(did, 'in_preparation') # need to use listing fn, as this is immune to cache result = openml.datasets.list_datasets(data_id=did, status='all') self.assertEqual(len(result), 1) self.assertEqual(result[did]['status'], 'active')
def test_upload_dataset_with_url(self): dataset = OpenMLDataset( name="UploadTestWithURL", version=1, description="test", format="ARFF", url="https://www.openml.org/data/download/61/dataset_61_iris.arff") dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def setUp(self): # Load dataset id 1 __file__ = inspect.getfile(OpenMLDatasetTest) self.directory = os.path.dirname(__file__) self.arff_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.arff") self.pickle_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.pkl") self.dataset = OpenMLDataset( 1, "anneal", 2, "Lorem ipsum.", "arff", None, None, None, "2014-04-06 23:19:24", None, "Public", "http://openml.liacs.nl/files/download/2/dataset_2_anneal.ORIG.arff", "class", None, None, None, None, None, None, None, None, None, "939966a711925e333bf4aaadeaa71135", data_file=self.arff_filename) self.sparse_arff_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.arff") self.sparse_pickle_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.pkl") self.sparse_dataset = OpenMLDataset( -1, "dexter", -1, "Lorem ipsum.", "arff", None, None, None, None, None, "Public", "http://www.cs.ubc.ca/labs/beta/Projects/autoweka/datasets/dexter.zip", "class", None, None, None, None, None, None, None, None, None, None, data_file=self.sparse_arff_filename)
def test_upload_dataset_with_url(self): dataset = OpenMLDataset( name="UploadTestWithURL", version=1, description="test", format="ARFF", url="http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff") dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def test_upload_dataset_with_url(self): dataset = OpenMLDataset( name="UploadTestWithURL", version=1, description="test", format="ARFF", url="http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff") return_code, return_value = dataset.publish() # self.assertTrue("This is a read-only account" in return_value) self.assertEqual(return_code, 200)
def test_upload_dataset_with_url(self): dataset = OpenMLDataset( name="UploadTestWithURL", version=1, description="test", format="ARFF", url="http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff") dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def test_upload_dataset_with_url(self): dataset = OpenMLDataset( name="UploadTestWithURL", version=1, description="test", format="ARFF", url="http://www.cs.umb.edu/~rickb/files/UCI/anneal.arff") dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def test_publish_dataset(self): dataset = openml.datasets.get_dataset(3) file_path = os.path.join(openml.config.get_cache_directory(), "datasets", "3", "dataset.arff") dataset = OpenMLDataset( name="anneal", version=1, description="test", format="ARFF", licence="public", default_target_attribute="class", data_file=file_path) dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def test_publish_dataset(self): dataset = openml.datasets.get_dataset(3) file_path = os.path.join(openml.config.get_cache_directory(), "datasets", "3", "dataset.arff") dataset = OpenMLDataset( name="anneal", version=1, description="test", format="ARFF", licence="public", default_target_attribute="class", data_file=file_path) dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def test_publish_dataset(self): dataset = openml.datasets.get_dataset(3) file_path = os.path.join(openml.config.get_cache_directory(), "datasets", "3", "dataset.arff") dataset = OpenMLDataset( name="anneal", version=1, description="test", format="ARFF", licence="public", default_target_attribute="class", data_file=file_path) return_code, return_value = dataset.publish() # self.assertTrue("This is a read-only account" in return_value) self.assertEqual(return_code, 200)
def test_upload_dataset_with_url(self): dataset = OpenMLDataset( "%s-UploadTestWithURL" % self._get_sentinel(), "test", data_format="arff", version=1, url="https://www.openml.org/data/download/61/dataset_61_iris.arff", ) dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
def test_publish_dataset(self): # lazy loading not possible as we need the arff-file. openml.datasets.get_dataset(3) file_path = os.path.join(openml.config.get_cache_directory(), "datasets", "3", "dataset.arff") dataset = OpenMLDataset( "anneal", "test", data_format="arff", version=1, licence="public", default_target_attribute="class", data_file=file_path, ) dataset.publish() self.assertIsInstance(dataset.dataset_id, int)
class OpenMLDatasetTest(unittest.TestCase): def setUp(self): # Load dataset id 1 __file__ = inspect.getfile(OpenMLDatasetTest) self.directory = os.path.dirname(__file__) self.arff_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.arff") self.pickle_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.pkl") self.dataset = OpenMLDataset( 1, "anneal", 2, "Lorem ipsum.", "arff", None, None, None, "2014-04-06 23:19:24", None, "Public", "http://openml.liacs.nl/files/download/2/dataset_2_anneal.ORIG.arff", "class", None, None, None, None, None, None, None, None, None, "939966a711925e333bf4aaadeaa71135", data_file=self.arff_filename) self.sparse_arff_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.arff") self.sparse_pickle_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.pkl") self.sparse_dataset = OpenMLDataset( -1, "dexter", -1, "Lorem ipsum.", "arff", None, None, None, None, None, "Public", "http://www.cs.ubc.ca/labs/beta/Projects/autoweka/datasets/dexter.zip", "class", None, None, None, None, None, None, None, None, None, None, data_file=self.sparse_arff_filename) def tearDown(self): for file_ in [self.pickle_filename, self.sparse_pickle_filename]: os.remove(file_) ########################################################################## # Pandas @unittest.skip("Does not work right now") def test_get_arff(self): rval = self.dataset.get_arff() self.assertIsInstance(rval, tuple) self.assertIsInstance(rval[0], np.ndarray) self.assertTrue(hasattr(rval[1], '__dict__')) self.assertEqual(rval[0].shape, (898, )) def test_get_data(self): # Basic usage rval = self.dataset.get_data() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((898, 39), rval.shape) rval, categorical = self.dataset.get_data( return_categorical_indicator=True) self.assertEqual(len(categorical), 39) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.dataset.get_data( return_attribute_names=True) self.assertEqual(len(attribute_names), 39) self.assertTrue(all([is_string(att) for att in attribute_names])) def test_get_sparse_dataset(self): rval = self.sparse_dataset.get_data() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((2, 20001), rval.shape) rval, categorical = self.sparse_dataset.get_data( return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20001) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.sparse_dataset.get_data( return_attribute_names=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(attribute_names), 20001) self.assertTrue(all([is_string(att) for att in attribute_names])) def test_get_data_with_target(self): X, y = self.dataset.get_data(target="class") self.assertIsInstance(X, np.ndarray) self.assertEqual(X.dtype, np.float32) self.assertIn(y.dtype, [np.int32, np.int64]) self.assertEqual(X.shape, (898, 38)) X, y, attribute_names = self.dataset.get_data( target="class", return_attribute_names=True) self.assertEqual(len(attribute_names), 38) self.assertNotIn("class", attribute_names) self.assertEqual(y.shape, (898, )) def test_get_sparse_dataset_with_target(self): X, y = self.sparse_dataset.get_data(target="class") self.assertIsInstance(X, np.ndarray) self.assertEqual(X.dtype, np.float32) self.assertIsInstance(y, np.ndarray) self.assertIn(y.dtype, [np.int32, np.int64]) self.assertEqual(X.shape, (2, 20000)) X, y, attribute_names = self.sparse_dataset.get_data( target="class", return_attribute_names=True) self.assertIsInstance(X, np.ndarray) self.assertEqual(len(attribute_names), 20000) self.assertNotIn("class", attribute_names) self.assertEqual(y.shape, (2, )) def test_get_data_with_rowid(self): self.dataset.row_id_attribute = "condition" rval, categorical = self.dataset.get_data( include_row_id=True, return_categorical_indicator=True) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 39)) self.assertEqual(len(categorical), 39) rval, categorical = self.dataset.get_data( include_row_id=False, return_categorical_indicator=True) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 38)) self.assertEqual(len(categorical), 38) # TODO this is not yet supported! #rowid = ["condition", "formability"] #self.dataset.row_id_attribute = rowid #rval = self.dataset.get_pandas(include_row_id=False) def test_get_sparse_dataset_with_rowid(self): self.sparse_dataset.row_id_attribute = "a_0" rval, categorical = self.sparse_dataset.get_data( include_row_id=True, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20001)) self.assertEqual(len(categorical), 20001) rval, categorical = self.sparse_dataset.get_data( include_row_id=False, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20000)) self.assertEqual(len(categorical), 20000) # TODO this is not yet supported! # rowid = ["condition", "formability"] #self.dataset.row_id_attribute = rowid #rval = self.dataset.get_pandas(include_row_id=False) def test_get_data_with_ignore_attributes(self): self.dataset.ignore_attributes = "condition" rval = self.dataset.get_data(include_ignore_attributes=True) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 39)) rval, categorical = self.dataset.get_data( include_ignore_attributes=True, return_categorical_indicator=True) self.assertEqual(len(categorical), 39) rval = self.dataset.get_data(include_ignore_attributes=False) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 38)) rval, categorical = self.dataset.get_data( include_ignore_attributes=False, return_categorical_indicator=True) self.assertEqual(len(categorical), 38) # TODO test multiple ignore attributes! def test_get_sparse_dataset_with_ignore_attributes(self): self.sparse_dataset.ignore_attributes = "a_0" rval = self.sparse_dataset.get_data(include_ignore_attributes=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20001)) rval, categorical = self.sparse_dataset.get_data( include_ignore_attributes=True, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20001) rval = self.sparse_dataset.get_data(include_ignore_attributes=False) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20000)) rval, categorical = self.sparse_dataset.get_data( include_ignore_attributes=False, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20000) # TODO test multiple ignore attributes! def test_get_data_rowid_and_ignore_and_target(self): self.dataset.ignore_attributes = "condition" self.dataset.row_id_attribute = "hardness" X, y = self.dataset.get_data(target="class", include_row_id=False, include_ignore_attributes=False) self.assertEqual(X.dtype, np.float32) self.assertIn(y.dtype, [np.int32, np.int64]) self.assertEqual(X.shape, (898, 36)) X, y, categorical = self.dataset.get_data( target="class", return_categorical_indicator=True) self.assertEqual(len(categorical), 36) self.assertListEqual(categorical, [True] * 3 + [False] + [True] * 2 + [ False] + [True] * 23 + [False] * 3 + [True] * 3) self.assertEqual(y.shape, (898, )) def test_get_sparse_dataset_rowid_and_ignore_and_target(self): self.sparse_dataset.ignore_attributes = "a_0" self.sparse_dataset.row_id_attribute = "a_1" X, y = self.sparse_dataset.get_data( target="class", include_row_id=False, include_ignore_attributes=False) self.assertIsInstance(X, np.ndarray) self.assertEqual(X.dtype, np.float32) self.assertIn(y.dtype, [np.int32, np.int64]) self.assertEqual(X.shape, (2, 19998)) X, y, categorical = self.sparse_dataset.get_data( target="class", return_categorical_indicator=True) self.assertIsInstance(X, np.ndarray) self.assertEqual(len(categorical), 19998) self.assertListEqual(categorical, [False] * 19998) self.assertEqual(y.shape, (2, ))
class OpenMLDatasetTest(unittest.TestCase): def setUp(self): # Load dataset id 1 __file__ = inspect.getfile(OpenMLDatasetTest) self.directory = os.path.dirname(__file__) self.arff_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.arff") self.pickle_filename = os.path.join(self.directory, "..", "files", "datasets", "2", "dataset.pkl") self.dataset = OpenMLDataset( 1, "anneal", 2, "Lorem ipsum.", "arff", None, None, None, "2014-04-06 23:19:24", None, "Public", "http://openml.liacs.nl/files/download/2/dataset_2_anneal.ORIG.arff", "class", None, None, None, None, None, None, None, None, None, "939966a711925e333bf4aaadeaa71135", data_file=self.arff_filename) self.sparse_arff_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.arff") self.sparse_pickle_filename = os.path.join( self.directory, "..", "files", "datasets", "-1", "dataset.pkl") self.sparse_dataset = OpenMLDataset( -1, "dexter", -1, "Lorem ipsum.", "arff", None, None, None, None, None, "Public", "http://www.cs.ubc.ca/labs/beta/Projects/autoweka/datasets/dexter.zip", "class", None, None, None, None, None, None, None, None, None, None, data_file=self.sparse_arff_filename) def tearDown(self): for file_ in [self.pickle_filename, self.sparse_pickle_filename]: os.remove(file_) ########################################################################## # Pandas @unittest.skip("Does not work right now") def test_get_arff(self): rval = self.dataset.get_arff() self.assertIsInstance(rval, tuple) self.assertIsInstance(rval[0], np.ndarray) self.assertTrue(hasattr(rval[1], '__dict__')) self.assertEqual(rval[0].shape, (898, )) def test_get_data(self): # Basic usage rval = self.dataset.get_data() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((898, 39), rval.shape) rval, categorical = self.dataset.get_data( return_categorical_indicator=True) self.assertEqual(len(categorical), 39) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.dataset.get_data( return_attribute_names=True) self.assertEqual(len(attribute_names), 39) self.assertTrue(all([is_string(att) for att in attribute_names])) def test_get_sparse_dataset(self): rval = self.sparse_dataset.get_data() self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual((2, 20001), rval.shape) rval, categorical = self.sparse_dataset.get_data( return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20001) self.assertTrue(all([isinstance(cat, bool) for cat in categorical])) rval, attribute_names = self.sparse_dataset.get_data( return_attribute_names=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(attribute_names), 20001) self.assertTrue(all([is_string(att) for att in attribute_names])) def test_get_data_with_target(self): X, y = self.dataset.get_data(target="class") self.assertIsInstance(X, np.ndarray) self.assertEqual(X.dtype, np.float32) self.assertEqual(y.dtype, np.int64) self.assertEqual(X.shape, (898, 38)) X, y, attribute_names = self.dataset.get_data( target="class", return_attribute_names=True) self.assertEqual(len(attribute_names), 38) self.assertNotIn("class", attribute_names) self.assertEqual(y.shape, (898, )) def test_get_sparse_dataset_with_target(self): X, y = self.sparse_dataset.get_data(target="class") self.assertIsInstance(X, np.ndarray) self.assertEqual(X.dtype, np.float32) self.assertIsInstance(y, np.ndarray) self.assertEqual(y.dtype, np.int64) self.assertEqual(X.shape, (2, 20000)) X, y, attribute_names = self.sparse_dataset.get_data( target="class", return_attribute_names=True) self.assertIsInstance(X, np.ndarray) self.assertEqual(len(attribute_names), 20000) self.assertNotIn("class", attribute_names) self.assertEqual(y.shape, (2, )) def test_get_data_with_rowid(self): self.dataset.row_id_attribute = "condition" rval, categorical = self.dataset.get_data( include_row_id=True, return_categorical_indicator=True) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 39)) self.assertEqual(len(categorical), 39) rval, categorical = self.dataset.get_data( include_row_id=False, return_categorical_indicator=True) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 38)) self.assertEqual(len(categorical), 38) # TODO this is not yet supported! #rowid = ["condition", "formability"] #self.dataset.row_id_attribute = rowid #rval = self.dataset.get_pandas(include_row_id=False) def test_get_sparse_dataset_with_rowid(self): self.sparse_dataset.row_id_attribute = "a_0" rval, categorical = self.sparse_dataset.get_data( include_row_id=True, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20001)) self.assertEqual(len(categorical), 20001) rval, categorical = self.sparse_dataset.get_data( include_row_id=False, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20000)) self.assertEqual(len(categorical), 20000) # TODO this is not yet supported! # rowid = ["condition", "formability"] #self.dataset.row_id_attribute = rowid #rval = self.dataset.get_pandas(include_row_id=False) def test_get_data_with_ignore_attributes(self): self.dataset.ignore_attributes = "condition" rval = self.dataset.get_data(include_ignore_attributes=True) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 39)) rval, categorical = self.dataset.get_data( include_ignore_attributes=True, return_categorical_indicator=True) self.assertEqual(len(categorical), 39) rval = self.dataset.get_data(include_ignore_attributes=False) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (898, 38)) rval, categorical = self.dataset.get_data( include_ignore_attributes=False, return_categorical_indicator=True) self.assertEqual(len(categorical), 38) # TODO test multiple ignore attributes! def test_get_sparse_dataset_with_ignore_attributes(self): self.sparse_dataset.ignore_attributes = "a_0" rval = self.sparse_dataset.get_data(include_ignore_attributes=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20001)) rval, categorical = self.sparse_dataset.get_data( include_ignore_attributes=True, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20001) rval = self.sparse_dataset.get_data(include_ignore_attributes=False) self.assertIsInstance(rval, np.ndarray) self.assertEqual(rval.dtype, np.float32) self.assertEqual(rval.shape, (2, 20000)) rval, categorical = self.sparse_dataset.get_data( include_ignore_attributes=False, return_categorical_indicator=True) self.assertIsInstance(rval, np.ndarray) self.assertEqual(len(categorical), 20000) # TODO test multiple ignore attributes! def test_get_data_rowid_and_ignore_and_target(self): self.dataset.ignore_attributes = "condition" self.dataset.row_id_attribute = "hardness" X, y = self.dataset.get_data(target="class", include_row_id=False, include_ignore_attributes=False) self.assertEqual(X.dtype, np.float32) self.assertEqual(y.dtype, np.int64) self.assertEqual(X.shape, (898, 36)) X, y, categorical = self.dataset.get_data( target="class", return_categorical_indicator=True) self.assertEqual(len(categorical), 36) self.assertListEqual(categorical, [True] * 3 + [False] + [True] * 2 + [ False] + [True] * 23 + [False] * 3 + [True] * 3) self.assertEqual(y.shape, (898, )) def test_get_sparse_dataset_rowid_and_ignore_and_target(self): self.sparse_dataset.ignore_attributes = "a_0" self.sparse_dataset.row_id_attribute = "a_1" X, y = self.sparse_dataset.get_data( target="class", include_row_id=False, include_ignore_attributes=False) self.assertIsInstance(X, np.ndarray) self.assertEqual(X.dtype, np.float32) self.assertEqual(y.dtype, np.int64) self.assertEqual(X.shape, (2, 19998)) X, y, categorical = self.sparse_dataset.get_data( target="class", return_categorical_indicator=True) self.assertIsInstance(X, np.ndarray) self.assertEqual(len(categorical), 19998) self.assertListEqual(categorical, [False] * 19998) self.assertEqual(y.shape, (2, ))