def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress in (False, True, 0, 3): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) # Check that only one file was created nose.tools.assert_equal(filenames[0], this_filename) # Check that this file does exist nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], filenames[0]))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + str(random.randint(0, 1000)) + 'mmap', mode='w+', shape=4, dtype=np.float)): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_loaded = numpy_pickle.load(this_filename) nose.tools.assert_true(isinstance(obj_loaded, type(obj))) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_memmap_with_big_offset(tmpdir): fname = tmpdir.join('test.mmap').strpath size = mmap.ALLOCATIONGRANULARITY obj = [np.zeros(size, dtype='uint8'), np.ones(size, dtype='uint8')] dump(obj, fname) memmap = load(fname, mmap_mode='r') result, = Parallel(n_jobs=2)(delayed(identity)(memmap) for _ in [0]) assert isinstance(memmap[1], np.memmap) assert memmap[1].offset > size np.testing.assert_array_equal(obj, result)
def test_pickle_highest_protocol(tmpdir): # ensure persistence of a numpy array is valid even when using # the pickle HIGHEST_PROTOCOL. # see https://github.com/joblib/joblib/issues/362 filename = tmpdir.join('test.pkl').strpath test_array = np.zeros(10) numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, test_array)
def test_pickle_highest_protocol(): # ensure persistence of a numpy array is valid even when using # the pickle HIGHEST_PROTOCOL. # see https://github.com/joblib/joblib/issues/362 filename = env['filename'] + str(random.randint(0, 1000)) test_array = np.zeros(10) numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, test_array)
def test_numpy_arrays_use_different_memory(mmap_mode): def func(arr, value): arr[:] = value return arr arrays = [np.zeros((10, 10), dtype='float64') for i in range(10)] results = Parallel(mmap_mode=mmap_mode, max_nbytes=0, n_jobs=2)( delayed(func)(arr, i) for i, arr in enumerate(arrays)) for i, arr in enumerate(results): np.testing.assert_array_equal(arr, i)
def test_load_memmap_with_big_offset(tmpdir): # Test that numpy memmap offset is set correctly if greater than # mmap.ALLOCATIONGRANULARITY, see # https://github.com/joblib/joblib/issues/451 and # https://github.com/numpy/numpy/pull/8443 for more details. fname = tmpdir.join('test.mmap').strpath size = mmap.ALLOCATIONGRANULARITY obj = [np.zeros(size, dtype='uint8'), np.ones(size, dtype='uint8')] numpy_pickle.dump(obj, fname) memmaps = numpy_pickle.load(fname, mmap_mode='r') assert isinstance(memmaps[1], np.memmap) assert memmaps[1].offset > size np.testing.assert_array_equal(obj, memmaps)
def test_numpy_persistence(tmpdir, compress): filename = tmpdir.join('test.pkl').strpath rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a, ), (a.T, ), (a, a), [a, a, a])): filenames = numpy_pickle.dump(obj, filename, compress=compress) # All is cached in one file assert len(filenames) == 1 # Check that only one file was created assert filenames[0] == filename # Check that this file does exist assert os.path.exists(filenames[0]) # Unpickle the object obj_ = numpy_pickle.load(filename) # Check that the items are indeed arrays for item in obj_: assert isinstance(item, np.ndarray) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + 'mmap', mode='w+', shape=4, dtype=np.float)): filenames = numpy_pickle.dump(obj, filename, compress=compress) # All is cached in one file assert len(filenames) == 1 obj_ = numpy_pickle.load(filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps assert isinstance(obj_, type(obj)) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, filename, compress=compress) # All is cached in one file assert len(filenames) == 1 obj_loaded = numpy_pickle.load(filename) assert isinstance(obj_loaded, type(obj)) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress, cache_size in ((0, 0), (1, 0), (1, 10)): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) # Check that one file was created per array if not compress: nose.tools.assert_equal(len(filenames), len(obj) + 1) # Check that these files do exist for file in filenames: nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], file))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. nose.tools.assert_true(np.all(np.array(obj) == np.array(obj_))) # Now test with array subclasses for obj in ( np.matrix(np.zeros(10)), np.core.multiarray._reconstruct(np.memmap, (), np.float) ): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) # Finally smoke test the warning in case of compress + mmap_mode this_filename = filename + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) numpy_pickle.load(this_filename, mmap_mode='r')
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress in (False, True, 0, 3): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a, ), (a.T, ), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) # Check that only one file was created nose.tools.assert_equal(filenames[0], this_filename) # Check that this file does exist nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], filenames[0]))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + str(random.randint(0, 1000)) + 'mmap', mode='w+', shape=4, dtype=np.float)): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_loaded = numpy_pickle.load(this_filename) nose.tools.assert_true(isinstance(obj_loaded, type(obj))) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)