def test_hash_object_dtype(): """ Make sure that ndarrays with dtype `object' hash correctly.""" a = np.array([np.arange(i) for i in range(6)], dtype=object) b = np.array([np.arange(i) for i in range(6)], dtype=object) assert hash(a) == hash(b)
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress in (False, True, 0, 3): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) # Check that only one file was created nose.tools.assert_equal(filenames[0], this_filename) # Check that this file does exist nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], filenames[0]))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + str(random.randint(0, 1000)) + 'mmap', mode='w+', shape=4, dtype=np.float)): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_loaded = numpy_pickle.load(this_filename) nose.tools.assert_true(isinstance(obj_loaded, type(obj))) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_hash_object_dtype(): """ Make sure that ndarrays with dtype `object' hash correctly.""" a = np.array([np.arange(i) for i in range(6)], dtype=object) b = np.array([np.arange(i) for i in range(6)], dtype=object) nose.tools.assert_equal(hash(a), hash(b))
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress, cache_size in ((0, 0), (1, 0), (1, 10)): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) # Check that one file was created per array if not compress: nose.tools.assert_equal(len(filenames), len(obj) + 1) # Check that these files do exist for file in filenames: nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], file))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. nose.tools.assert_true(np.all(np.array(obj) == np.array(obj_))) # Now test with array subclasses for obj in ( np.matrix(np.zeros(10)), np.core.multiarray._reconstruct(np.memmap, (), np.float) ): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) # Finally smoke test the warning in case of compress + mmap_mode this_filename = filename + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) numpy_pickle.load(this_filename, mmap_mode='r')
def test_joblib_pickle_across_python_versions(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] # Testing all the *.gz and *.pkl (compressed and non compressed # pickles) in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz')) data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl')) for fname in data_filenames: _check_pickle(fname, expected_list)
def test_compressed_pickle_dump_and_load(): expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('>i8')), np.arange(5, dtype=np.dtype('<f8')), np.arange(5, dtype=np.dtype('>f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), np.matrix([0, 1, 2], dtype=np.dtype('>i8')), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name try: dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1) assert len(dumped_filenames) == 1 result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: assert result == expected finally: os.remove(fname)
def test_joblib_pickle_across_python_versions(): # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), u"C'est l'\xe9t\xe9 !"] # Testing all the compressed and non compressed # pickles in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma') pickle_filenames = [os.path.join(test_data_dir, fn) for fn in os.listdir(test_data_dir) if any(fn.endswith(ext) for ext in pickle_extensions)] for fname in pickle_filenames: _check_pickle(fname, expected_list)
def test_compressed_pickle_dump_and_load(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name # Need to test both code branches (whether array size is greater # or smaller than cache_size) for cache_size in [0, 1e9]: try: dumped_filenames = numpy_pickle.dump( expected_list, fname, compress=1, cache_size=cache_size) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: for fn in dumped_filenames: os.remove(fn)
def test_joblib_decompression_format_support(): # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), u"C'est l'\xe9t\xe9 !"] test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma') compress_filenames_list = [glob.glob(os.path.join(test_data_dir, ext)) for ext in extensions] compress_filenames = sum(compress_filenames_list, []) for fname in compress_filenames: _check_compression_format(fname, expected_list)
def test_memmap_persistence_mixed_dtypes(): # loading datastructures that have sub-arrays with dtype=object # should not prevent memmaping on fixed size dtype sub-arrays. rnd = np.random.RandomState(0) a = rnd.random_sample(10) b = np.array([1, 'b'], dtype=object) construct = (a, b) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(construct, filename) a_clone, b_clone = numpy_pickle.load(filename, mmap_mode='r') # the floating point array has been memory mapped assert isinstance(a_clone, np.memmap) # the object-dtype array has been loaded in memory assert not isinstance(b_clone, np.memmap)
def test_memmaping_pool_for_large_arrays(): """Check that large arrays are not copied in memory""" assert_array_equal = np.testing.assert_array_equal # Check that the tempfolder is empty assert_equal(os.listdir(TEMP_FOLDER), []) # Build an array reducers that automaticaly dump large array content # to filesystem backed memmap instances to avoid memory explosion p = MemmapingPool(3, max_nbytes=40, temp_folder=TEMP_FOLDER) try: # The tempory folder for the pool is not provisioned in advance assert_equal(os.listdir(TEMP_FOLDER), []) assert_false(os.path.exists(p._temp_folder)) small = np.ones(5, dtype=np.float32) assert_equal(small.nbytes, 20) p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])]) # Memory has been copied, the pool filesystem folder is unused assert_equal(os.listdir(TEMP_FOLDER), []) # Try with a file larger than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert_equal(large.nbytes, 800) p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # The data has been dumped in a temp folder for subprocess to share it # without per-child memory copies assert_true(os.path.isdir(p._temp_folder)) dumped_filenames = os.listdir(p._temp_folder) assert_equal(len(dumped_filenames), 2) # Check that memmory mapping is not triggered for arrays with # dtype='object' objects = np.array(['abc'] * 100, dtype='object') results = p.map(has_shareable_memory, [objects]) assert_false(results[0]) finally: # check FS garbage upon pool termination p.terminate() assert_false(os.path.exists(p._temp_folder)) del p
def test_memmapping_pool_for_large_arrays(factory, tmpdir): """Check that large arrays are not copied in memory""" # Check that the tempfolder is empty assert os.listdir(tmpdir.strpath) == [] # Build an array reducers that automaticaly dump large array content # to filesystem backed memmap instances to avoid memory explosion p = factory(3, max_nbytes=40, temp_folder=tmpdir.strpath, verbose=2) try: # The temporary folder for the pool is not provisioned in advance assert os.listdir(tmpdir.strpath) == [] assert not os.path.exists(p._temp_folder) small = np.ones(5, dtype=np.float32) assert small.nbytes == 20 p.map(check_array, [(small, i, 1.0) for i in range(small.shape[0])]) # Memory has been copied, the pool filesystem folder is unused assert os.listdir(tmpdir.strpath) == [] # Try with a file larger than the memmap threshold of 40 bytes large = np.ones(100, dtype=np.float64) assert large.nbytes == 800 p.map(check_array, [(large, i, 1.0) for i in range(large.shape[0])]) # The data has been dumped in a temp folder for subprocess to share it # without per-child memory copies assert os.path.isdir(p._temp_folder) dumped_filenames = os.listdir(p._temp_folder) assert len(dumped_filenames) == 1 # Check that memory mapping is not triggered for arrays with # dtype='object' objects = np.array(['abc'] * 100, dtype='object') results = p.map(has_shareable_memory, [objects]) assert not results[0] finally: # check FS garbage upon pool termination p.terminate() assert not os.path.exists(p._temp_folder) del p
def test_joblib_pickle_across_python_versions(): expected_list = [np.arange(5, dtype=np.int64), np.arange(5, dtype=np.float64), np.array([1, 'abc', {'a': 1, 'b': 2}]), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] # Testing all the *.gz and *.pkl (compressed and non compressed # pickles) in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz')) data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl')) for fname in data_filenames: _check_pickle(fname, expected_list)
def test_0d_and_1d_array_hashing_is_different(): assert hash(np.array(0)) != hash(np.array([0]))
def __init__(self): self.array_float = np.arange(100, dtype='float64') self.array_int = np.ones(100, dtype='int32') self.array_obj = np.array(['a', 10, 20.0], dtype='object')
def test_0d_array(): hash(np.array(0))
def test_0d_and_1d_array_hashing_is_different(): assert_not_equal(hash(np.array(0)), hash(np.array([0])))