def test_compress_level_error(wrong_compress): # Verify that passing an invalid compress argument raises an error. exception_msg = ('Non valid compress level given: ' '"{0}"'.format(wrong_compress)) with raises(ValueError) as excinfo: numpy_pickle.dump('dummy', 'foo', compress=wrong_compress) excinfo.match(exception_msg)
def cache_value(value, filename, decimal=7): """Helper function for checking that a value hasn't changed between two invocations. First call: write value is a file Second call: check that what was written is identical to the value provided in the second call. TODO: only numpy arrays are compared, other values still have to be compared. Parameters ========== value: arbitrary Python value this could include numpy objects. Uses persistence from joblib to achieve high efficiency. """ from joblib.numpy_pickle import dump, load base_dir = os.path.split(filename)[0] if not os.path.isdir(base_dir): os.makedirs(base_dir) if os.path.isfile(filename): cached = load(filename) np.testing.assert_almost_equal(cached, value, decimal=decimal) else: dump(value, filename)
def test_numpy_persistence_bufferred_array_compression(): big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(big_array, filename, compress=True) arr_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(big_array, arr_reloaded)
def test_file_handle_persistence_compressed_mmap(): obj = np.random.random((10, 10)) filename = env['filename'] + str(random.randint(0, 1000)) with open(filename, 'wb') as f: numpy_pickle.dump(obj, f, compress=('gzip', 3)) with closing(gzip.GzipFile(filename, 'rb')) as f: with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(f, mmap_mode='r+') nose.tools.assert_equal(len(caught_warnings), 1) for warn in caught_warnings: nose.tools.assert_equal(warn.category, UserWarning) nose.tools.assert_equal( warn.message.args[0], 'File "%(filename)s" is compressed ' 'using "%(compressor)s" which is not ' 'compatible with mmap_mode ' '"%(mmap_mode)s" flag ' 'passed. mmap_mode option will be ' 'ignored.' % { 'filename': "", 'mmap_mode': 'r+', 'compressor': 'GzipFile' })
def test_joblib_compression_formats(): compresslevels = (1, 3, 6) filename = env['filename'] + str(random.randint(0, 1000)) objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {'a': 1, 2: 'b'}, [], (), {}, 0, 1.0) for compress in compresslevels: for cmethod in _COMPRESSORS: dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_filename, compress=(cmethod, compress)) else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj os.remove(dump_filename)
def test_numpy_subclass(tmpdir): filename = tmpdir.join('test.pkl').strpath a = SubArray((10, )) numpy_pickle.dump(a, filename) c = numpy_pickle.load(filename) assert isinstance(c, SubArray) np.testing.assert_array_equal(c, a)
def test_compress_tuple_argument(): compress_tuples = (('zlib', 3), ('gzip', 3)) # Verify the tuple is correctly taken into account. filename = env['filename'] + str(random.randint(0, 1000)) for compress in compress_tuples: numpy_pickle.dump("dummy", filename, compress=compress) # Verify the file contains the right magic number with open(filename, 'rb') as f: assert _detect_compressor(f) == compress[0] # Verify setting a wrong compress tuple raises a ValueError. assert_raises_regex(ValueError, 'Compress argument tuple should contain exactly ' '2 elements', numpy_pickle.dump, "dummy", filename, compress=('zlib', 3, 'extra')) # Verify a tuple with a wrong compress method raises a ValueError. msg = 'Non valid compression method given: "{}"'.format('wrong') assert_raises_regex(ValueError, msg, numpy_pickle.dump, "dummy", filename, compress=('wrong', 3)) # Verify a tuple with a wrong compress level raises a ValueError. msg = 'Non valid compress level given: "{}"'.format('wrong') assert_raises_regex(ValueError, msg, numpy_pickle.dump, "dummy", filename, compress=('zlib', 'wrong'))
def test_file_handle_persistence(): objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])] fobjs = [bz2.BZ2File, gzip.GzipFile] if PY3_OR_LATER: import lzma fobjs += [lzma.LZMAFile] filename = env['filename'] + str(random.randint(0, 1000)) for obj in objs: for fobj in fobjs: with fobj(filename, 'wb') as f: numpy_pickle.dump(obj, f) # using the same decompressor prevents from internally # decompress again. with fobj(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f) # when needed, the correct decompressor should be used when # passing a raw file handle. with open(filename, 'rb') as f: obj_reloaded_2 = numpy_pickle.load(f) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) np.testing.assert_array_equal(obj_reloaded_2, obj) else: assert obj_reloaded == obj assert obj_reloaded_2 == obj os.remove(filename)
def test_numpy_persistence_bufferred_array_compression(tmpdir): big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8) filename = tmpdir.join('test.pkl').strpath numpy_pickle.dump(big_array, filename, compress=True) arr_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(big_array, arr_reloaded)
def test_compression_using_file_extension(): # test that compression method corresponds to the given filename extension. extensions_dict = { # valid compressor extentions '.z': 'zlib', '.gz': 'gzip', '.bz2': 'bz2', '.lzma': 'lzma', '.xz': 'xz', # invalid compressor extensions '.pkl': 'not-compressed', '': 'not-compressed' } filename = env['filename'] + str(random.randint(0, 1000)) obj = "object to dump" for ext, cmethod in extensions_dict.items(): dump_fname = filename + ext if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{0} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_fname) else: numpy_pickle.dump(obj, dump_fname) # Verify the file contains the right magic number with open(dump_fname, 'rb') as f: nose.tools.assert_equal(_detect_compressor(f), cmethod) # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_fname) nose.tools.assert_true(isinstance(obj_reloaded, type(obj))) nose.tools.assert_equal(obj_reloaded, obj) os.remove(dump_fname)
def test_numpy_subclass(): filename = env['filename'] a = SubArray((10,)) numpy_pickle.dump(a, filename) c = numpy_pickle.load(filename) assert isinstance(c, SubArray) np.testing.assert_array_equal(c, a)
def test_numpy_subclass(): filename = env['filename'] a = SubArray((10, )) numpy_pickle.dump(a, filename) c = numpy_pickle.load(filename) nose.tools.assert_true(isinstance(c, SubArray)) np.testing.assert_array_equal(c, a)
def test_joblib_compression_formats(tmpdir, compress, cmethod): filename = tmpdir.join('test.pkl').strpath objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), { 'a': 1, 2: 'b' }, [], (), {}, 0, 1.0) if cmethod in ("lzma", "xz") and lzma is None: pytest.skip("lzma is support not available") elif cmethod == 'lz4' and with_lz4.args[0]: # Skip the test if lz4 is not installed. We here use the with_lz4 # skipif fixture whose argument is True when lz4 is not installed pytest.skip("lz4 is not installed.") dump_filename = filename + "." + cmethod for obj in objects: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj
def test_compress_string_argument(tmpdir, compress_string): # Verify the string is correctly taken into account. filename = tmpdir.join('test.pkl').strpath numpy_pickle.dump("dummy", filename, compress=compress_string) # Verify the file contains the right magic number with open(filename, 'rb') as f: assert _detect_compressor(f) == compress_string
def test_file_handle_persistence(tmpdir): objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])] fobjs = [bz2.BZ2File, gzip.GzipFile] if lzma is not None: fobjs += [lzma.LZMAFile] filename = tmpdir.join('test.pkl').strpath for obj in objs: for fobj in fobjs: with fobj(filename, 'wb') as f: numpy_pickle.dump(obj, f) # using the same decompressor prevents from internally # decompress again. with fobj(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f) # when needed, the correct decompressor should be used when # passing a raw file handle. with open(filename, 'rb') as f: obj_reloaded_2 = numpy_pickle.load(f) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) np.testing.assert_array_equal(obj_reloaded_2, obj) else: assert obj_reloaded == obj assert obj_reloaded_2 == obj
def test_compressed_pickle_dump_and_load(): expected_list = [ np.arange(5, dtype=np.int64), np.arange(5, dtype=np.float64), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !" ] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name try: numpy_pickle.dump(expected_list, fname, compress=1) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: os.remove(fname)
def test_compression_using_file_extension(): # test that compression method corresponds to the given filename extension. extensions_dict = { # valid compressor extentions '.z': 'zlib', '.gz': 'gzip', '.bz2': 'bz2', '.lzma': 'lzma', '.xz': 'xz', # invalid compressor extensions '.pkl': 'not-compressed', '': 'not-compressed' } filename = env['filename'] + str(random.randint(0, 1000)) obj = "object to dump" for ext, cmethod in extensions_dict.items(): dump_fname = filename + ext if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_fname) else: numpy_pickle.dump(obj, dump_fname) # Verify the file contains the right magic number with open(dump_fname, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_fname) assert isinstance(obj_reloaded, type(obj)) assert obj_reloaded == obj os.remove(dump_fname)
def test_file_handle_persistence(): objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])] fobjs = [open] if not PY26: fobjs += [bz2.BZ2File, gzip.GzipFile] if PY3_OR_LATER: import lzma fobjs += [lzma.LZMAFile] filename = env['filename'] + str(random.randint(0, 1000)) for obj in objs: for fobj in fobjs: with fobj(filename, 'wb') as f: numpy_pickle.dump(obj, f) # using the same decompressor prevents from internally # decompress again. with fobj(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f) # when needed, the correct decompressor should be used when # passing a raw file handle. with open(filename, 'rb') as f: obj_reloaded_2 = numpy_pickle.load(f) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) np.testing.assert_array_equal(obj_reloaded_2, obj) else: nose.tools.assert_equal(obj_reloaded, obj) nose.tools.assert_equal(obj_reloaded_2, obj) os.remove(filename)
def test_joblib_compression_formats(tmpdir, compress, cmethod): filename = tmpdir.join('test.pkl').strpath objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), { 'a': 1, 2: 'b' }, [], (), {}, 0, 1.0) dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('lzma', 'xz', 'lz4'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) error = NotImplementedError if cmethod == 'lz4': error = ValueError with raises(error) as excinfo: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) excinfo.match(msg) else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress in (False, True, 0, 3): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) # Check that only one file was created nose.tools.assert_equal(filenames[0], this_filename) # Check that this file does exist nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], filenames[0]))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + str(random.randint(0, 1000)) + 'mmap', mode='w+', shape=4, dtype=np.float)): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_loaded = numpy_pickle.load(this_filename) nose.tools.assert_true(isinstance(obj_loaded, type(obj))) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_memmap_persistence(): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') nose.tools.assert_true(isinstance(b, np.memmap))
def test_memmap_persistence(): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') if [int(x) for x in np.__version__.split('.', 2)[:2]] >= [1, 3]: nose.tools.assert_true(isinstance(b, np.memmap))
def persist_input(self, args_tuple, kwargs_dict, filtered_args_dict): DirectoryJob.persist_input(self, args_tuple, kwargs_dict, filtered_args_dict) call_info = dict(func=self.func, version_info=self.func.version_info, args=args_tuple, kwargs=kwargs_dict) numpy_pickle.dump(call_info, pjoin(self._work_path, 'input.pkl'))
def test_standard_types(tmpdir, compress, member): # Test pickling and saving with standard types. filename = tmpdir.join('test.pkl').strpath numpy_pickle.dump(member, filename, compress=compress) _member = numpy_pickle.load(filename) # We compare the pickled instance to the reloaded one only if it # can be compared to a copied one if member == copy.deepcopy(member): assert member == _member
def test_masked_array_persistence(): # The special-case picker fails, because saving masked_array # not implemented, but it just delegates to the standard pickler. rnd = np.random.RandomState(0) a = rnd.random_sample(10) a = np.ma.masked_greater(a, 0.5) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') nose.tools.assert_true(isinstance(b, np.ma.masked_array))
def atomic_pickle(data, path, filename): fd, workfile = tempfile.mkstemp(prefix=filename + '-', dir=path) try: os.close(fd) numpy_pickle.dump(data, workfile) os.rename(workfile, pjoin(path, filename)) except: if os.path.exists(workfile): os.unlink(workfile) raise
def test_in_memory_persistence(): objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])] for obj in objs: f = io.BytesIO() numpy_pickle.dump(obj, f) obj_reloaded = numpy_pickle.load(f) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj
def test_masked_array_persistence(): # The special-case picker fails, because saving masked_array # not implemented, but it just delegates to the standard pickler. rnd = np.random.RandomState(0) a = rnd.random_sample(10) a = np.ma.masked_greater(a, 0.5) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(b, np.ma.masked_array)
def test_masked_array_persistence(tmpdir): # The special-case picker fails, because saving masked_array # not implemented, but it just delegates to the standard pickler. rnd = np.random.RandomState(0) a = rnd.random_sample(10) a = np.ma.masked_greater(a, 0.5) filename = tmpdir.join('test.pkl').strpath numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(b, np.ma.masked_array)
def test_file_handle_persistence_mmap(tmpdir): obj = np.random.random((10, 10)) filename = tmpdir.join('test.pkl').strpath with open(filename, 'wb') as f: numpy_pickle.dump(obj, f) with open(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f, mmap_mode='r+') np.testing.assert_array_equal(obj_reloaded, obj)
def test_lz4_compression_without_lz4(tmpdir): # Check that lz4 cannot be used when dependency is not available. fname = tmpdir.join('test.nolz4').strpath data = 'test data' with raises(ValueError) as excinfo: numpy_pickle.dump(data, fname, compress='lz4') excinfo.match(LZ4_NOT_INSTALLED_ERROR) with raises(ValueError) as excinfo: numpy_pickle.dump(data, fname + '.lz4') excinfo.match(LZ4_NOT_INSTALLED_ERROR)
def test_file_handle_persistence_mmap(): obj = np.random.random((10, 10)) filename = env['filename'] + str(random.randint(0, 1000)) with open(filename, 'wb') as f: numpy_pickle.dump(obj, f) with open(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f, mmap_mode='r+') np.testing.assert_array_equal(obj_reloaded, obj)
def test_pickle_highest_protocol(): # ensure persistence of a numpy array is valid even when using # the pickle HIGHEST_PROTOCOL. # see https://github.com/joblib/joblib/issues/362 filename = env['filename'] + str(random.randint(0, 1000)) test_array = np.zeros(10) numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, test_array)
def test_pathlib(): try: from pathlib import Path except ImportError: pass else: filename = env['filename'] value = 123 numpy_pickle.dump(value, Path(filename)) nose.tools.assert_equal(numpy_pickle.load(filename), value) numpy_pickle.dump(value, filename) nose.tools.assert_equal(numpy_pickle.load(Path(filename)), value)
def test_memmap_with_padding(tmpdir): # Test that memmaped arrays return by numpy.load are correctly aligned fname = tmpdir.join('test.mmap').strpath arr = np.random.randn(10) numpy_pickle.dump(arr, fname) memmap = numpy_pickle.load(fname, mmap_mode='r') assert isinstance(memmap, np.memmap) np.testing.assert_array_equal(arr, memmap) assert isinstance(memmap.ctypes.data / 8, int)
def test_pathlib(): try: from pathlib import Path except ImportError: pass else: filename = env['filename'] value = 123 numpy_pickle.dump(value, Path(filename)) assert numpy_pickle.load(filename) == value numpy_pickle.dump(value, filename) assert numpy_pickle.load(Path(filename)) == value
def test_pickle_highest_protocol(tmpdir): # ensure persistence of a numpy array is valid even when using # the pickle HIGHEST_PROTOCOL. # see https://github.com/joblib/joblib/issues/362 filename = tmpdir.join('test.pkl').strpath test_array = np.zeros(10) numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, test_array)
def test_pathlib(tmpdir): try: from pathlib import Path except ImportError: pass else: filename = tmpdir.join('test.pkl').strpath value = 123 numpy_pickle.dump(value, Path(filename)) assert numpy_pickle.load(filename) == value numpy_pickle.dump(value, filename) assert numpy_pickle.load(Path(filename)) == value
def test_load_memmap_with_big_offset(tmpdir): # Test that numpy memmap offset is set correctly if greater than # mmap.ALLOCATIONGRANULARITY, see # https://github.com/joblib/joblib/issues/451 and # https://github.com/numpy/numpy/pull/8443 for more details. fname = tmpdir.join('test.mmap').strpath size = mmap.ALLOCATIONGRANULARITY obj = [np.zeros(size, dtype='uint8'), np.ones(size, dtype='uint8')] numpy_pickle.dump(obj, fname) memmaps = numpy_pickle.load(fname, mmap_mode='r') assert isinstance(memmaps[1], np.memmap) assert memmaps[1].offset > size np.testing.assert_array_equal(obj, memmaps)
def test_standard_types(): # Test pickling and saving with standard types. filename = env['filename'] for compress in [0, 1]: for member in typelist: # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) numpy_pickle.dump(member, this_filename, compress=compress) _member = numpy_pickle.load(this_filename) # We compare the pickled instance to the reloaded one only if it # can be compared to a copied one if member == copy.deepcopy(member): yield assert_equal, member, _member
def test_non_contiguous_array_pickling(): filename = env['filename'] + str(random.randint(0, 1000)) for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :]]: assert not array.flags.c_contiguous assert not array.flags.f_contiguous numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array) os.remove(filename)
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress, cache_size in ((0, 0), (1, 0), (1, 10)): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) # Check that one file was created per array if not compress: nose.tools.assert_equal(len(filenames), len(obj) + 1) # Check that these files do exist for file in filenames: nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], file))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. nose.tools.assert_true(np.all(np.array(obj) == np.array(obj_))) # Now test with array subclasses for obj in ( np.matrix(np.zeros(10)), np.core.multiarray._reconstruct(np.memmap, (), np.float) ): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) # Finally smoke test the warning in case of compress + mmap_mode this_filename = filename + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) numpy_pickle.load(this_filename, mmap_mode='r')
def test_compressed_pickle_dump_and_load(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name # Need to test both code branches (whether array size is greater # or smaller than cache_size) for cache_size in [0, 1e9]: try: dumped_filenames = numpy_pickle.dump( expected_list, fname, compress=1, cache_size=cache_size) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: for fn in dumped_filenames: os.remove(fn)
def test_compressed_pickle_dump_and_load(): expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('>i8')), np.arange(5, dtype=np.dtype('<f8')), np.arange(5, dtype=np.dtype('>f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), np.matrix([0, 1, 2], dtype=np.dtype('>i8')), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name try: dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1) assert len(dumped_filenames) == 1 result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: assert result == expected finally: os.remove(fname)
def test_file_handle_persistence_in_memory_mmap(): obj = np.random.random((10, 10)) buf = io.BytesIO() numpy_pickle.dump(obj, buf) with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(buf, mmap_mode='r+') assert len(caught_warnings) == 1 for warn in caught_warnings: assert warn.category == UserWarning assert (warn.message.args[0] == 'In memory persistence is not compatible with ' 'mmap_mode "%(mmap_mode)s" flag passed. mmap_mode ' 'option will be ignored.' % {'mmap_mode': 'r+'})
def test_memmap_persistence_mixed_dtypes(): # loading datastructures that have sub-arrays with dtype=object # should not prevent memmaping on fixed size dtype sub-arrays. rnd = np.random.RandomState(0) a = rnd.random_sample(10) b = np.array([1, 'b'], dtype=object) construct = (a, b) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(construct, filename) a_clone, b_clone = numpy_pickle.load(filename, mmap_mode='r') # the floating point array has been memory mapped assert isinstance(a_clone, np.memmap) # the object-dtype array has been loaded in memory assert not isinstance(b_clone, np.memmap)
def test_memmap_persistence(): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(b, np.memmap) # Test with an object containing multiple numpy arrays filename = env['filename'] + str(random.randint(0, 1000)) obj = ComplexTestObject() numpy_pickle.dump(obj, filename) obj_loaded = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(obj_loaded, type(obj)) assert isinstance(obj_loaded.array_float, np.memmap) assert not obj_loaded.array_float.flags.writeable assert isinstance(obj_loaded.array_int, np.memmap) assert not obj_loaded.array_int.flags.writeable # Memory map not allowed for numpy object arrays assert not isinstance(obj_loaded.array_obj, np.memmap) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj) # Test we can write in memmaped arrays obj_loaded = numpy_pickle.load(filename, mmap_mode='r+') assert obj_loaded.array_float.flags.writeable obj_loaded.array_float[0:10] = 10.0 assert obj_loaded.array_int.flags.writeable obj_loaded.array_int[0:10] = 10 obj_reloaded = numpy_pickle.load(filename, mmap_mode='r') np.testing.assert_array_equal(obj_reloaded.array_float, obj_loaded.array_float) np.testing.assert_array_equal(obj_reloaded.array_int, obj_loaded.array_int) # Test w+ mode is caught and the mode has switched to r+ numpy_pickle.load(filename, mmap_mode='w+') assert obj_loaded.array_int.flags.writeable assert obj_loaded.array_int.mode == 'r+' assert obj_loaded.array_float.flags.writeable assert obj_loaded.array_float.mode == 'r+'
def test_compress_mmap_mode_warning(): # Test the warning in case of compress + mmap_mode rnd = np.random.RandomState(0) a = rnd.random_sample(10) this_filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(this_filename, mmap_mode='r+') assert len(caught_warnings) == 1 for warn in caught_warnings: assert warn.category == UserWarning assert (warn.message.args[0] == 'mmap_mode "%(mmap_mode)s" is not compatible with ' 'compressed file %(filename)s. "%(mmap_mode)s" flag will ' 'be ignored.' % {'filename': this_filename, 'mmap_mode': 'r+'})
def test_file_handle_persistence_compressed_mmap(): obj = np.random.random((10, 10)) filename = env['filename'] + str(random.randint(0, 1000)) with open(filename, 'wb') as f: numpy_pickle.dump(obj, f, compress=('gzip', 3)) with closing(gzip.GzipFile(filename, 'rb')) as f: with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(f, mmap_mode='r+') assert len(caught_warnings) == 1 for warn in caught_warnings: assert warn.category == UserWarning assert (warn.message.args[0] == '"%(fileobj)r" is not a raw file, mmap_mode ' '"%(mmap_mode)s" flag will be ignored.' % {'fileobj': f, 'mmap_mode': 'r+'})
def test_cache_size_warning(): # Check deprecation warning raised when cache size is not None filename = env['filename'] + str(random.randint(0, 1000)) rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for cache_size in (None, 0, 10): with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.dump(a, filename, cache_size=cache_size) expected_nb_warnings = 1 if cache_size is not None else 0 assert len(caught_warnings) == expected_nb_warnings for warn in caught_warnings: assert warn.category == DeprecationWarning assert (warn.message.args[0] == "Please do not set 'cache_size' in joblib.dump, this " "parameter has no effect and will be removed. You " "used 'cache_size={0}'".format(cache_size))
def test_compressed_pickle_dump_and_load(): expected_list = [np.arange(5, dtype=np.int64), np.arange(5, dtype=np.float64), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: numpy_pickle.dump(expected_list, f.name, compress=1) result_list = numpy_pickle.load(f.name) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected)
def test_compress_mmap_mode_warning(): # Test the warning in case of compress + mmap_mode rnd = np.random.RandomState(0) a = rnd.random_sample(10) this_filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(this_filename, mmap_mode='r+') nose.tools.assert_equal(len(caught_warnings), 1) for warn in caught_warnings: nose.tools.assert_equal(warn.category, DeprecationWarning) nose.tools.assert_equal(warn.message.args[0], 'File "%(filename)s" is compressed using ' '"%(compressor)s" which is not compatible ' 'with mmap_mode "%(mmap_mode)s" flag ' 'passed.' % {'filename': this_filename, 'mmap_mode': 'r+', 'compressor': 'zlib'})