예제 #1
0
def setup(testobj):
    skip_if_not_available(modules=['nose2'])
    # Not importing unicode_literal because it gives problems
    # If needed, see https://dirkjan.ochtman.nl/writing/2014/07/06/
    # single-source-python-23-doctests.html for a solution
    testobj.globs['absolute_import'] = absolute_import
    testobj.globs['print_function'] = print_function
예제 #2
0
파일: test_mnist.py 프로젝트: nagyist/fuel
def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets', ))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features', ))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)
예제 #3
0
파일: __init__.py 프로젝트: Afrik/fuel
def setup(testobj):
    skip_if_not_available(modules=['nose2'])
    # Not importing unicode_literal because it gives problems
    # If needed, see https://dirkjan.ochtman.nl/writing/2014/07/06/
    # single-source-python-23-doctests.html for a solution
    testobj.globs['absolute_import'] = absolute_import
    testobj.globs['print_function'] = print_function
예제 #4
0
파일: test_mnist.py 프로젝트: jfsantos/fuel
def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets',))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features',))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)
예제 #5
0
파일: test_adult.py 프로젝트: Afrik/fuel
def test_adult_test():
    skip_if_not_available(datasets=['adult.hdf5'])

    dataset = Adult(('test',), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))

    assert data.shape == (10, 104)
    assert labels.shape == (10, 1)
    known = numpy.array(
        [25.,  38.,  28.,  44.,  34.,  63.,  24.,  55.,  65.,  36.])
    assert_allclose(data[:, 0], known)
    assert dataset.num_examples == 15060
    dataset.close(handle)

    dataset = Adult(('train',), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))

    assert data.shape == (10, 104)
    assert labels.shape == (10, 1)
    known = numpy.array(
        [39.,  50.,  38.,  53.,  28.,  37.,  49.,  52.,  31.,  42.])
    assert_allclose(data[:, 0], known)
    assert dataset.num_examples == 30162
    dataset.close(handle)
예제 #6
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST('train', load_in_memory=True)
    data_stream = DataStream(mnist,
                             iteration_scheme=SequentialScheme(
                                 examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)
예제 #7
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist'])
    # Load MNIST and get two batches
    mnist = MNIST('train')
    data_stream = DataStream(mnist,
                             iteration_scheme=SequentialScheme(
                                 num_examples=mnist.num_examples,
                                 batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    assert numpy.all(features == mnist.features[256:512])

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        pickle_dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    assert numpy.all(features == mnist.features[512:768])
예제 #8
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST(('train',), load_in_memory=True)
    data_stream = DataStream(mnist, iteration_scheme=SequentialScheme(
        examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)
예제 #9
0
def test_read_devkit():
    skip_if_not_available(datasets=[DEVKIT_ARCHIVE])
    synsets, raw_valid_gt = read_devkit(find_in_data_path(DEVKIT_ARCHIVE))
    # synset sanity tests appear in test_read_metadata_mat_file
    assert raw_valid_gt.min() == 1
    assert raw_valid_gt.max() == 1000
    assert raw_valid_gt.dtype.kind == 'i'
    assert raw_valid_gt.shape == (50000,)
예제 #10
0
def test_adult_axes():
    skip_if_not_available(datasets=['adult.hdf5'])

    dataset = Adult(('test', ), load_in_memory=False)
    assert_equal(dataset.axis_labels['features'], ('batch', 'feature'))

    dataset = Adult(('train', ), load_in_memory=False)
    assert_equal(dataset.axis_labels['features'], ('batch', 'feature'))
예제 #11
0
def test_match_column_specs():
    skip_if_not_available(modules=['pandas'])
    experiments = some_experiments()
    specs = ['0:col0', '*1']
    df = plot.match_column_specs(experiments, specs)

    assert isinstance(df, DataFrame)
    assert list(df.columns) == ['0:col0', '0:col1', '1:col1']
예제 #12
0
def test_match_column_specs():
    skip_if_not_available(modules=['pandas'])
    experiments = some_experiments()
    specs = ['0:col0', '*1']
    df = plot.match_column_specs(experiments, specs)

    assert isinstance(df, DataFrame)
    assert list(df.columns) == ['0:col0', '0:col1', '1:col1']
예제 #13
0
def test_binarized_mnist_test():
    skip_if_not_available(datasets=['binarized_mnist'])

    mnist_test = BinarizedMNIST('test')
    handle = mnist_test.open()
    data = mnist_test.get_data(handle, slice(0, 10000))[0]
    assert data.shape == (10000, 1, 28, 28)
    assert mnist_test.num_examples == 10000
    mnist_test.close(handle)
예제 #14
0
def test_binarized_mnist_no_split():
    skip_if_not_available(datasets=['binarized_mnist'])

    dataset = BinarizedMNIST()
    handle = dataset.open()
    data = dataset.get_data(handle, slice(0, 70000))[0]
    assert data.shape == (70000, 1, 28, 28)
    assert dataset.num_examples == 70000
    dataset.close(handle)
예제 #15
0
def test_match_column_specs():
    skip_if_not_available(modules=["pandas"])
    experiments = some_experiments()
    specs = ["0:col0", "*1"]
    df = plot.match_column_specs(experiments, specs)

    assert isinstance(df, DataFrame)
    assert list(df.columns) == ["0:col0", "0:col1", "1:col1"]
    assert list(df.index) == [0, 1, 2, 3]
예제 #16
0
def test_mnist():
    skip_if_not_available(modules=['bokeh'])
    with tempfile.NamedTemporaryFile() as f:
        mnist_test(f.name, 1)
        with open(f.name, "rb") as source:
            main_loop = cPickle.load(source)
        main_loop.find_extension("FinishAfter").set_conditions(
            after_n_epochs=2)
        main_loop.run()
        assert main_loop.log.status.epochs_done == 2
예제 #17
0
def test_prepare_metadata():
    skip_if_not_available(datasets=[DEVKIT_ARCHIVE, TEST_IMAGES_TAR])
    devkit_path = find_in_data_path(DEVKIT_ARCHIVE)
    n_train, v_gt, n_test, wnid_map = prepare_metadata(devkit_path)
    assert n_train == 1281167
    assert len(v_gt) == 50000
    assert n_test == 100000
    assert sorted(wnid_map.values()) == list(range(1000))
    assert all(isinstance(k, six.string_types) and len(k) == 9
               for k in wnid_map)
예제 #18
0
파일: test_adult.py 프로젝트: Afrik/fuel
def test_adult_axes():
    skip_if_not_available(datasets=['adult.hdf5'])

    dataset = Adult(('test',), load_in_memory=False)
    assert_equal(dataset.axis_labels['features'],
                 ('batch', 'feature'))

    dataset = Adult(('train',), load_in_memory=False)
    assert_equal(dataset.axis_labels['features'],
                 ('batch', 'feature'))
예제 #19
0
def test_mnist():
    skip_if_not_available(modules=['bokeh'])
    with tempfile.NamedTemporaryFile() as f:
        mnist_test(f.name, 1, True)
        with open(f.name, "rb") as source:
            main_loop = cPickle.load(source)
        main_loop.find_extension("FinishAfter").set_conditions(
            after_n_epochs=2)
        main_loop.run()
        assert main_loop.log.status['epochs_done'] == 2
예제 #20
0
def test_reverse_words():
    skip_if_not_available(modules=['bokeh'])
    old_limit = blocks.config.recursion_limit
    blocks.config.recursion_limit = 100000
    with tempfile.NamedTemporaryFile() as f_save,\
            tempfile.NamedTemporaryFile() as f_data:
        with open(f_data.name, 'wt') as data:
            for i in range(10):
                print("A line.", file=data)
        reverse_words_test("train", f_save.name, 1, [f_data.name])
    blocks.config.recursion_limit = old_limit
예제 #21
0
def test_reverse_words():
    skip_if_not_available(modules=['bokeh'])
    old_limit = config.recursion_limit
    config.recursion_limit = 100000
    with tempfile.NamedTemporaryFile() as f_save,\
            tempfile.NamedTemporaryFile() as f_data:
        with open(f_data.name, 'wt') as data:
            for i in range(10):
                print("A line.", file=data)
        reverse_words_test("train", f_save.name, 1, [f_data.name])
    config.recursion_limit = old_limit
예제 #22
0
def test_binarized_mnist_train():
    skip_if_not_available(datasets=['binarized_mnist.hdf5'])

    dataset = BinarizedMNIST('train', load_in_memory=False)
    handle = dataset.open()
    data, = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert hashlib.md5(data).hexdigest() == '0922fefc9a9d097e3b086b89107fafce'
    assert dataset.num_examples == 50000
    dataset.close(handle)
예제 #23
0
def test_binarized_mnist_valid():
    skip_if_not_available(datasets=['binarized_mnist.hdf5'])

    dataset = BinarizedMNIST('valid', load_in_memory=False)
    handle = dataset.open()
    data, = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert hashlib.md5(data).hexdigest() == '65e8099613162b3110a7618037011617'
    assert dataset.num_examples == 10000
    dataset.close(handle)
예제 #24
0
def test_binarized_mnist_test():
    skip_if_not_available(datasets=['binarized_mnist.hdf5'])

    dataset = BinarizedMNIST('test', load_in_memory=False)
    handle = dataset.open()
    data, = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert hashlib.md5(data).hexdigest() == '0fa539ed8cb008880a61be77f744f06a'
    assert dataset.num_examples == 10000
    dataset.close(handle)
예제 #25
0
def test_prepare_metadata():
    skip_if_not_available(datasets=[DEVKIT_ARCHIVE, TEST_GROUNDTRUTH])
    devkit_path = find_in_data_path(DEVKIT_ARCHIVE)
    test_gt_path = find_in_data_path(TEST_GROUNDTRUTH)
    n_train, v_gt, t_gt, wnid_map = prepare_metadata(devkit_path,
                                                     test_gt_path)
    assert n_train == 1261406
    assert len(v_gt) == 50000
    assert len(t_gt) == 150000
    assert sorted(wnid_map.values()) == list(range(1000))
    assert all(isinstance(k, six.string_types) and len(k) == 9
               for k in wnid_map)
예제 #26
0
def test_iris_all():
    skip_if_not_available(datasets=['iris.hdf5'])

    dataset = Iris(('all', ), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'float32'
    assert data.shape == (10, 4)
    assert labels.shape == (10, 1)
    known = numpy.array([5.1, 3.5, 1.4, 0.2])
    assert_allclose(data[0], known)
    assert labels[0][0] == 0
    assert dataset.num_examples == 150
    dataset.close(handle)
예제 #27
0
파일: test_iris.py 프로젝트: shabanian/fuel
def test_iris_all():
    skip_if_not_available(datasets=['iris.hdf5'])

    dataset = Iris(('all',), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == config.floatX
    assert data.shape == (10, 4)
    assert labels.shape == (10, 1)
    known = numpy.array([5.1, 3.5, 1.4, 0.2])
    assert_allclose(data[0], known)
    assert labels[0][0] == 0
    assert dataset.num_examples == 150
    dataset.close(handle)
예제 #28
0
def test_read_metadata_mat_file():
    skip_if_not_available(datasets=[DEVKIT_ARCHIVE])
    with tarfile.open(find_in_data_path(DEVKIT_ARCHIVE)) as tar:
        meta_mat = tar.extractfile(DEVKIT_META_PATH)
        synsets = read_metadata_mat_file(meta_mat)
    assert (synsets['ILSVRC2012_ID'] ==
            numpy.arange(1, len(synsets) + 1)).all()
    assert synsets['num_train_images'][1000:].sum() == 0
    assert (synsets['num_train_images'][:1000] > 0).all()
    assert synsets.ndim == 1
    assert synsets['wordnet_height'].min() == 0
    assert synsets['wordnet_height'].max() == 19
    assert synsets['WNID'].dtype == numpy.dtype('S9')
    assert (synsets['num_children'][:1000] == 0).all()
    assert (synsets['children'][:1000] == -1).all()
예제 #29
0
def test_interpolate():
    skip_if_not_available(modules=['pandas'])
    """ Ensure tha DataFrame.interpolate(method='nearest') has the
    desired properties.

    It is used by blocks-plot and should:

    * interpolate missing/NaN datapoints between valid ones
    * not replace any NaN before/after the first/last finite datapoint
    """
    y = [nan, nan, 2., 3., nan, 5, nan, nan]
    df = DataFrame(y)
    df_ = df.interpolate(method='nearest')[0]

    assert all(isfinite(df_[2:6]))
    assert all(~isfinite(df_[0:2]))
    assert all(~isfinite(df_[6:8]))
예제 #30
0
def test_caltech101_silhouettes16():
    skip_if_not_available(datasets=['caltech101_silhouettes16.hdf5'])
    for which_set, size, num_examples in (
            ('train', 16, 4082), ('valid', 16, 2257), ('test', 16, 2302)):
        ds = CalTech101Silhouettes(which_sets=[which_set], size=size,
                                   load_in_memory=False)

        assert ds.num_examples == num_examples

        handle = ds.open()
        features, targets = ds.get_data(handle, slice(0, 10))

        assert features.shape == (10, 1, size, size)
        assert targets.shape == (10, 1)

        assert features.dtype == numpy.uint8
        assert targets.dtype == numpy.uint8
예제 #31
0
def test_caltech101_silhouettes16():
    skip_if_not_available(datasets=['caltech101_silhouettes16.hdf5'])
    for which_set, size, num_examples in (('train', 16, 4082),
                                          ('valid', 16, 2257), ('test', 16,
                                                                2302)):
        ds = CalTech101Silhouettes(which_sets=[which_set],
                                   size=size,
                                   load_in_memory=False)

        assert ds.num_examples == num_examples

        handle = ds.open()
        features, targets = ds.get_data(handle, slice(0, 10))

        assert features.shape == (10, 1, size, size)
        assert targets.shape == (10, 1)

        assert features.dtype == numpy.uint8
        assert targets.dtype == numpy.uint8
예제 #32
0
def test_mnist_test():
    skip_if_not_available(datasets=["mnist.hdf5"])

    dataset = MNIST(("test",), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == "uint8"
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
예제 #33
0
def test_mnist_train():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('train', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253,
                         253, 253, 253, 253, 225, 172, 253, 242, 195,  64, 0,
                         0, 0, 0])
    assert_allclose(data[0][0][6], known)
    assert labels[0][0] == 5
    assert dataset.num_examples == 60000
    dataset.close(handle)

    stream = DataStream.default_stream(
        dataset, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
예제 #34
0
def test_mnist():
    skip_if_not_available(datasets=['binarized_mnist'])
    mnist_train = BinarizedMNIST('train')
    assert len(mnist_train.features) == 50000
    assert mnist_train.num_examples == 50000
    mnist_valid = BinarizedMNIST('valid')
    assert len(mnist_valid.features) == 10000
    assert mnist_valid.num_examples == 10000
    mnist_test = BinarizedMNIST('test')
    assert len(mnist_test.features) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'

    assert_raises(ValueError, BinarizedMNIST, 'dummy')

    mnist_test = cPickle.loads(cPickle.dumps(mnist_test))
    assert len(mnist_test.features) == 10000

    mnist_test_unflattened = BinarizedMNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)
예제 #35
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist'])
    # Load MNIST and get two batches
    mnist = MNIST('train')
    data_stream = DataStream(mnist, iteration_scheme=SequentialScheme(
        num_examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    assert numpy.all(features == mnist.features[256:512])

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    assert numpy.all(features == mnist.features[512:768])
예제 #36
0
def test_adult_test():
    skip_if_not_available(datasets=['adult.hdf5'])

    dataset = Adult(('test', ), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))

    assert data.shape == (10, 104)
    assert labels.shape == (10, 1)
    known = numpy.array([25., 38., 28., 44., 34., 63., 24., 55., 65., 36.])
    assert_allclose(data[:, 0], known)
    assert dataset.num_examples == 15060
    dataset.close(handle)

    dataset = Adult(('train', ), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))

    assert data.shape == (10, 104)
    assert labels.shape == (10, 1)
    known = numpy.array([39., 50., 38., 53., 28., 37., 49., 52., 31., 42.])
    assert_allclose(data[:, 0], known)
    assert dataset.num_examples == 30162
    dataset.close(handle)
예제 #37
0
def test_read_metadata_mat_file():
    skip_if_not_available(datasets=[DEVKIT_ARCHIVE])
    with tarfile.open(find_in_data_path(DEVKIT_ARCHIVE)) as tar:
        meta_mat = tar.extractfile(DEVKIT_META_PATH)
        synsets, cost_mat = read_metadata_mat_file(meta_mat)
    assert (synsets['ILSVRC2010_ID'] ==
            numpy.arange(1, len(synsets) + 1)).all()
    assert synsets['num_train_images'][1000:].sum() == 0
    assert (synsets['num_train_images'][:1000] > 0).all()
    assert synsets.ndim == 1
    assert synsets['wordnet_height'].min() == 0
    assert synsets['wordnet_height'].max() == 19
    assert synsets['WNID'].dtype == numpy.dtype('S9')
    assert (synsets['num_children'][:1000] == 0).all()
    assert (synsets['children'][:1000] == -1).all()

    # Assert the basics about the cost matrix.
    assert cost_mat.shape == (1000, 1000)
    assert cost_mat.dtype == 'uint8'
    assert cost_mat.min() == 0
    assert cost_mat.max() == 18
    assert (cost_mat == cost_mat.T).all()
    # Assert that the diagonal is 0.
    assert (cost_mat.flat[::1001] == 0).all()
예제 #38
0
def test_mnist_test():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('test', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([
        0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0
    ])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset,
                                       iteration_scheme=SequentialScheme(
                                           10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
예제 #39
0
def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets', ))
    assert len(mnist_test.features) == 10000
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype is numpy.dtype(theano.config.floatX)
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features', ))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')
예제 #40
0
def test_mnist_invalid_split():
    skip_if_not_available(datasets=["mnist.hdf5"])

    assert_raises(ValueError, MNIST, ("dummy",))
예제 #41
0
def test_mnist_axes():
    skip_if_not_available(datasets=["mnist.hdf5"])

    dataset = MNIST(("train",), load_in_memory=False)
    assert_equal(dataset.axis_labels["features"], ("batch", "channel", "height", "width"))
예제 #42
0
def test_binarized_mnist_axes():
    skip_if_not_available(datasets=['binarized_mnist.hdf5'])

    dataset = BinarizedMNIST('train', load_in_memory=False)
    assert_equal(dataset.axis_labels['features'],
                 ('batch', 'channel', 'height', 'width'))
예제 #43
0
def test_iris_axes():
    skip_if_not_available(datasets=['iris.hdf5'])

    dataset = Iris(('all', ), load_in_memory=False)
    assert_equal(dataset.axis_labels['features'], ('batch', 'feature'))
예제 #44
0
def test_iris_invalid_split():
    skip_if_not_available(datasets=['iris.hdf5'])

    assert_raises(ValueError, Iris, ('dummy', ))
예제 #45
0
def test_mnist_data_path():
    skip_if_not_available(datasets=['mnist.hdf5'])

    assert MNIST('train').data_path == os.path.join(config.data_path,
                                                    'mnist.hdf5')
예제 #46
0
def test_mnist_invalid_split():
    skip_if_not_available(datasets=['mnist.hdf5'])

    assert_raises(ValueError, MNIST, 'dummy')
예제 #47
0
def test_mnist_data_path():
    skip_if_not_available(datasets=['mnist.hdf5'])

    assert MNIST('train').data_path == os.path.join(config.data_path,
                                                    'mnist.hdf5')
예제 #48
0
def test_print_column_summary():
    skip_if_not_available(modules=['pandas'])
    experiments = some_experiments()
    plot.print_column_summary(experiments)
예제 #49
0
def test_adult_invalid_split():
    skip_if_not_available(datasets=['adult.hdf5'])

    assert_raises(ValueError, Adult, ('dummy', ))