Esempio n. 1
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    assert hash(a) == hash(b)
Esempio n. 2
0
def test_numpy_persistence():
    filename = env['filename']
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    for compress in (False, True, 0, 3):
        # We use 'a.T' to have a non C-contiguous array.
        for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])):
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))

            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress)

            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)
            # Check that only one file was created
            nose.tools.assert_equal(filenames[0], this_filename)
            # Check that this file does exist
            nose.tools.assert_true(
                os.path.exists(os.path.join(env['dir'], filenames[0])))

            # Unpickle the object
            obj_ = numpy_pickle.load(this_filename)
            # Check that the items are indeed arrays
            for item in obj_:
                nose.tools.assert_true(isinstance(item, np.ndarray))
            # And finally, check that all the values are equal.
            np.testing.assert_array_equal(np.array(obj), np.array(obj_))

        # Now test with array subclasses
        for obj in (np.matrix(np.zeros(10)),
                    np.memmap(filename + str(random.randint(0, 1000)) + 'mmap',
                              mode='w+', shape=4, dtype=np.float)):
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress)
            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)

            obj_ = numpy_pickle.load(this_filename)
            if (type(obj) is not np.memmap and
                    hasattr(obj, '__array_prepare__')):
                # We don't reconstruct memmaps
                nose.tools.assert_true(isinstance(obj_, type(obj)))

            np.testing.assert_array_equal(obj_, obj)

        # Test with an object containing multiple numpy arrays
        obj = ComplexTestObject()
        filenames = numpy_pickle.dump(obj, this_filename,
                                      compress=compress)
        # All is cached in one file
        nose.tools.assert_equal(len(filenames), 1)

        obj_loaded = numpy_pickle.load(this_filename)
        nose.tools.assert_true(isinstance(obj_loaded, type(obj)))
        np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float)
        np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int)
        np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
Esempio n. 3
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    nose.tools.assert_equal(hash(a),
                            hash(b))
Esempio n. 4
0
def test_numpy_persistence():
    filename = env['filename']
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    for compress, cache_size in ((0, 0), (1, 0), (1, 10)):
        # We use 'a.T' to have a non C-contiguous array.
        for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])):
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress,
                                          cache_size=cache_size)
            # Check that one file was created per array
            if not compress:
                nose.tools.assert_equal(len(filenames), len(obj) + 1)
            # Check that these files do exist
            for file in filenames:
                nose.tools.assert_true(
                    os.path.exists(os.path.join(env['dir'], file)))

            # Unpickle the object
            obj_ = numpy_pickle.load(this_filename)
            # Check that the items are indeed arrays
            for item in obj_:
                nose.tools.assert_true(isinstance(item, np.ndarray))
            # And finally, check that all the values are equal.
            nose.tools.assert_true(np.all(np.array(obj) ==
                                                np.array(obj_)))

        # Now test with array subclasses
        for obj in (
                    np.matrix(np.zeros(10)),
                    np.core.multiarray._reconstruct(np.memmap, (), np.float)
                   ):
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress,
                                          cache_size=cache_size)
            obj_ = numpy_pickle.load(this_filename)
            if (type(obj) is not np.memmap
                        and hasattr(obj, '__array_prepare__')):
                # We don't reconstruct memmaps
                nose.tools.assert_true(isinstance(obj_, type(obj)))

    # Finally smoke test the warning in case of compress + mmap_mode
    this_filename = filename + str(random.randint(0, 1000))
    numpy_pickle.dump(a, this_filename, compress=1)
    numpy_pickle.load(this_filename, mmap_mode='r')
Esempio n. 5
0
def test_joblib_pickle_across_python_versions():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
Esempio n. 6
0
def test_compressed_pickle_dump_and_load():
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('>i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.arange(5, dtype=np.dtype('>f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
        assert len(dumped_filenames) == 1
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                assert result.dtype == expected.dtype
                np.testing.assert_equal(result, expected)
            else:
                assert result == expected
    finally:
        os.remove(fname)
Esempio n. 7
0
def test_joblib_pickle_across_python_versions():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the compressed and non compressed
    # pickles in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma')
    pickle_filenames = [os.path.join(test_data_dir, fn)
                        for fn in os.listdir(test_data_dir)
                        if any(fn.endswith(ext) for ext in pickle_extensions)]

    for fname in pickle_filenames:
        _check_pickle(fname, expected_list)
Esempio n. 8
0
def test_compressed_pickle_dump_and_load():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    # Need to test both code branches (whether array size is greater
    # or smaller than cache_size)
    for cache_size in [0, 1e9]:
        try:
            dumped_filenames = numpy_pickle.dump(
                expected_list, fname, compress=1,
                cache_size=cache_size)
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        finally:
            for fn in dumped_filenames:
                os.remove(fn)
Esempio n. 9
0
def test_joblib_decompression_format_support():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma')
    compress_filenames_list = [glob.glob(os.path.join(test_data_dir, ext))
                               for ext in extensions]
    compress_filenames = sum(compress_filenames_list, [])

    for fname in compress_filenames:
        _check_compression_format(fname, expected_list)
Esempio n. 10
0
def test_memmap_persistence_mixed_dtypes():
    # loading datastructures that have sub-arrays with dtype=object
    # should not prevent memmaping on fixed size dtype sub-arrays.
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    b = np.array([1, 'b'], dtype=object)
    construct = (a, b)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(construct, filename)
    a_clone, b_clone = numpy_pickle.load(filename, mmap_mode='r')

    # the floating point array has been memory mapped
    assert isinstance(a_clone, np.memmap)

    # the object-dtype array has been loaded in memory
    assert not isinstance(b_clone, np.memmap)
Esempio n. 11
0
def test_memmaping_pool_for_large_arrays():
    """Check that large arrays are not copied in memory"""
    assert_array_equal = np.testing.assert_array_equal

    # Check that the tempfolder is empty
    assert_equal(os.listdir(TEMP_FOLDER), [])

    # Build an array reducers that automaticaly dump large array content
    # to filesystem backed memmap instances to avoid memory explosion
    p = MemmapingPool(3, max_nbytes=40, temp_folder=TEMP_FOLDER)
    try:
        # The tempory folder for the pool is not provisioned in advance
        assert_equal(os.listdir(TEMP_FOLDER), [])
        assert_false(os.path.exists(p._temp_folder))

        small = np.ones(5, dtype=np.float32)
        assert_equal(small.nbytes, 20)
        p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])])

        # Memory has been copied, the pool filesystem folder is unused
        assert_equal(os.listdir(TEMP_FOLDER), [])

        # Try with a file larger than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert_equal(large.nbytes, 800)
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # The data has been dumped in a temp folder for subprocess to share it
        # without per-child memory copies
        assert_true(os.path.isdir(p._temp_folder))
        dumped_filenames = os.listdir(p._temp_folder)
        assert_equal(len(dumped_filenames), 2)

        # Check that memmory mapping is not triggered for arrays with
        # dtype='object'
        objects = np.array(['abc'] * 100, dtype='object')
        results = p.map(has_shareable_memory, [objects])
        assert_false(results[0])

    finally:
        # check FS garbage upon pool termination
        p.terminate()
        assert_false(os.path.exists(p._temp_folder))
        del p
Esempio n. 12
0
def test_memmapping_pool_for_large_arrays(factory, tmpdir):
    """Check that large arrays are not copied in memory"""

    # Check that the tempfolder is empty
    assert os.listdir(tmpdir.strpath) == []

    # Build an array reducers that automaticaly dump large array content
    # to filesystem backed memmap instances to avoid memory explosion
    p = factory(3, max_nbytes=40, temp_folder=tmpdir.strpath, verbose=2)
    try:
        # The temporary folder for the pool is not provisioned in advance
        assert os.listdir(tmpdir.strpath) == []
        assert not os.path.exists(p._temp_folder)

        small = np.ones(5, dtype=np.float32)
        assert small.nbytes == 20
        p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])])

        # Memory has been copied, the pool filesystem folder is unused
        assert os.listdir(tmpdir.strpath) == []

        # Try with a file larger than the memmap threshold of 40 bytes
        large = np.ones(100, dtype=np.float64)
        assert large.nbytes == 800
        p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])])

        # The data has been dumped in a temp folder for subprocess to share it
        # without per-child memory copies
        assert os.path.isdir(p._temp_folder)
        dumped_filenames = os.listdir(p._temp_folder)
        assert len(dumped_filenames) == 1

        # Check that memory mapping is not triggered for arrays with
        # dtype='object'
        objects = np.array(['abc'] * 100, dtype='object')
        results = p.map(has_shareable_memory, [objects])
        assert not results[0]

    finally:
        # check FS garbage upon pool termination
        p.terminate()
        assert not os.path.exists(p._temp_folder)
        del p
Esempio n. 13
0
def test_joblib_pickle_across_python_versions():
    expected_list = [np.arange(5, dtype=np.int64),
                     np.arange(5, dtype=np.float64),
                     np.array([1, 'abc', {'a': 1, 'b': 2}]),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
Esempio n. 14
0
def test_0d_and_1d_array_hashing_is_different():
    assert hash(np.array(0)) != hash(np.array([0]))
Esempio n. 15
0
 def __init__(self):
     self.array_float = np.arange(100, dtype='float64')
     self.array_int = np.ones(100, dtype='int32')
     self.array_obj = np.array(['a', 10, 20.0], dtype='object')
Esempio n. 16
0
def test_0d_array():
    hash(np.array(0))
Esempio n. 17
0
def test_0d_array():
    hash(np.array(0))
Esempio n. 18
0
def test_0d_and_1d_array_hashing_is_different():
    assert_not_equal(hash(np.array(0)), hash(np.array([0])))
Esempio n. 19
0
def test_0d_and_1d_array_hashing_is_different():
    assert hash(np.array(0)) != hash(np.array([0]))
 def __init__(self):
     self.array_float = np.arange(100, dtype='float64')
     self.array_int = np.ones(100, dtype='int32')
     self.array_obj = np.array(['a', 10, 20.0], dtype='object')