def test_blobs(): blobs = SG.Blobs(n_samples=50, n_features=2, centers=[[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]], random_state=0) X, y = blobs.classification_task() tasks.assert_classification(X, y) assert_equal(X.shape, (50, 2), "X shape mismatch") assert_equal(y.shape, (50,), "y shape mismatch") assert_equal(np.unique(y).shape, (3,), "Unexpected number of blobs")
def test_four_regions(): four_regions = SG.FourRegions(n_samples=100, random_state=0) X, y = four_regions.classification_task() tasks.assert_classification(X, y, 100) assert_equal(X.shape, (100, 2), "X shape mismatch") assert_equal(y.shape, (100,), "y shape mismatch") assert_equal(np.unique(y).shape, (4,), "Unexpected number of classes") assert_equal(sum(y == 0), 22, "Unexpected number of samples in class #0") assert_equal(sum(y == 1), 31, "Unexpected number of samples in class #1") assert_equal(sum(y == 2), 24, "Unexpected number of samples in class #2") assert_equal(sum(y == 3), 23, "Unexpected number of samples in class #3")
def test_madelon(): madelon = SG.Madelon(n_samples=100, n_features=20, n_informative=5, n_redundant=1, n_repeated=1, n_classes=3, n_clusters_per_class=1, hypercube=False, shift=None, scale=None, weights=[0.1, 0.25], random_state=0) X, y = madelon.classification_task() tasks.assert_classification(X, y, 100) assert_equal(X.shape, (100, 20), "X shape mismatch") assert_equal(y.shape, (100,), "y shape mismatch") assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes") assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0") assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1") assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2")
def test_several(): dsetnames = [ 'MNIST_Basic', 'MNIST_BackgroundImages', 'MNIST_BackgroundRandom', 'Rectangles', 'RectanglesImages', 'Convex' ] dsetnames.extend(['MNIST_Noise%i' % i for i in range(1, 7)]) for dsetname in dsetnames: aa = dset(dsetname) assert len(aa.meta) == sum( [aa.descr[s] for s in 'n_train', 'n_valid', 'n_test']) bb = dset(dsetname) assert aa.meta == bb.meta tasks.assert_classification(*aa.classification_task()) tasks.assert_latent_structure(aa.latent_structure_task())
def test_assert_classification(self): # things that work: tasks.assert_classification(rnd('float32', 4, 2), rnd('int8', 4)) tasks.assert_classification(rnd('float64', 4, 2), rnd('uint64', 4)) tasks.assert_classification(rnd('float64', 4, 2), rnd('uint64', 4), 4) # things that break: self.assertRaises(AssertionError, tasks.assert_classification, rnd('int8', 4, 2), rnd('int8', 4)) # X not float self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('float64', 4)) # y not int self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('int8', 5)) # y wrong len self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('int8', 4, 1)) # y wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('int8', 4, 7)) # y wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2, 2), rnd('int8', 4)) # X wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float64', 4), rnd('int8', 4)) # X wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float64', 4, 3), rnd('int8', 4), 5) # N mismatch
def test_assert_classification(self): # things that work: tasks.assert_classification( rnd('float32', 4, 2), rnd('int8', 4)) tasks.assert_classification( rnd('float64', 4, 2), rnd('uint64', 4)) tasks.assert_classification( rnd('float64', 4, 2), rnd('uint64', 4), 4) # things that break: self.assertRaises(AssertionError, tasks.assert_classification, rnd('int8', 4, 2), rnd('int8', 4)) # X not float self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('float64', 4)) # y not int self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('int8', 5)) # y wrong len self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('int8', 4, 1)) # y wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2), rnd('int8', 4, 7)) # y wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float32', 4, 2, 2), rnd('int8', 4)) # X wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float64', 4), rnd('int8', 4)) # X wrong rank self.assertRaises(AssertionError, tasks.assert_classification, rnd('float64', 4, 3), rnd('int8', 4), 5) # N mismatch
def test_madelon(): madelon = SG.Madelon( n_samples=100, n_features=20, n_informative=5, n_redundant=1, n_repeated=1, n_classes=3, n_clusters_per_class=1, hypercube=False, shift=None, scale=None, weights=[0.1, 0.25], random_state=0, ) X, y = madelon.classification_task() tasks.assert_classification(X, y, 100) assert_equal(X.shape, (100, 20), "X shape mismatch") assert_equal(y.shape, (100,), "y shape mismatch") assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes") assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0") assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1") assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2")
def test_classification_train_valid_test(self): dataset = larochelle_etal_2007.Rectangles() # smallest one with splits assert not hasattr(dataset, 'classification_train_valid_test_task') train, valid, test = tasks.classification_train_valid_test(dataset) tasks.assert_classification(*train) tasks.assert_classification(*valid) tasks.assert_classification(*test) assert len(train[0]) == dataset.descr['n_train'] assert len(valid[0]) == dataset.descr['n_valid'] assert len(test[0]) == dataset.descr['n_test'] tasks.assert_classification_train_valid_test(train, valid, test)
def test_classification(): cifar = cifar10.CIFAR10() # just make sure we can create the class cifar.DOWNLOAD_IF_MISSING = False X, y = cifar.classification_task() tasks.assert_classification(X, y, 60000)
def check_classification_Xy(X, y, N=None): assert_classification(X, y, N)
assert aa.meta[10000] == dict(id=10000, label=3, split='valid') assert aa.meta[11999] == dict(id=11999, label=3, split='valid') assert aa.meta[12000] == dict(id=12000, label=7, split='test') assert aa.meta[50000] == dict(id=50000, label=3, split='test') assert aa.meta[61989] == dict(id=61989, label=4, split='test') assert len(aa.meta) == 62000 bb = dset(dsetname) assert bb.meta == aa.meta def test_several(): dsetnames = ['MNIST_Basic', 'MNIST_BackgroundImages', 'MNIST_BackgroundRandom', 'Rectangles', 'RectanglesImages', 'Convex'] dsetnames.extend(['MNIST_Noise%i' % i for i in range(1,7)]) for dsetname in dsetnames: aa = dset(dsetname) assert len(aa.meta) == sum( [aa.descr[s] for s in 'n_train', 'n_valid', 'n_test']) bb = dset(dsetname) assert aa.meta == bb.meta tasks.assert_classification(*aa.classification_task()) tasks.assert_latent_structure(aa.latent_structure_task())
def test_MNIST_classification(): M = mnist.MNIST() # just make sure we can create the class M.DOWNLOAD_IF_MISSING = False X, y = M.classification_task() tasks.assert_classification(X, y, 70000)