def test_file_handle_persistence(): objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])] fobjs = [bz2.BZ2File, gzip.GzipFile] if PY3_OR_LATER: import lzma fobjs += [lzma.LZMAFile] filename = env['filename'] + str(random.randint(0, 1000)) for obj in objs: for fobj in fobjs: with fobj(filename, 'wb') as f: numpy_pickle.dump(obj, f) # using the same decompressor prevents from internally # decompress again. with fobj(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f) # when needed, the correct decompressor should be used when # passing a raw file handle. with open(filename, 'rb') as f: obj_reloaded_2 = numpy_pickle.load(f) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) np.testing.assert_array_equal(obj_reloaded_2, obj) else: assert obj_reloaded == obj assert obj_reloaded_2 == obj os.remove(filename)
def test_compressed_pickle_python_2_3_compatibility(): expected_list = [np.arange(5, dtype=np.int64), np.arange(5, dtype=np.float64), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) # These files have been generated with the # joblib/test/data/create_numpy_pickle.py script for the relevant # python and joblib versions basenames = ['joblib_0.8.4_compressed_pickle_py27.gz', 'joblib_0.9.0_compressed_pickle_py27.gz', 'joblib_0.8.4_compressed_pickle_py33.gz', 'joblib_0.9.0_compressed_pickle_py33.gz', 'joblib_0.8.4_compressed_pickle_py34.gz', 'joblib_0.9.0_compressed_pickle_py34.gz'] data_filenames = [os.path.join(test_data_dir, bname) for bname in basenames] for fname in data_filenames: version_match = re.match(r'.+py(\d)(\d).gz', fname) python_version_used_for_writing = tuple( [int(each) for each in version_match.groups()]) python_version_used_for_reading = sys.version_info[:2] python_version_to_default_pickle_protocol = { (2, 6): 2, (2, 7): 2, (3, 0): 3, (3, 1): 3, (3, 2): 3, (3, 3): 3, (3, 4): 4} pickle_reading_protocol = python_version_to_default_pickle_protocol[ python_version_used_for_reading] pickle_writing_protocol = python_version_to_default_pickle_protocol[ python_version_used_for_writing] if ('0.8.4' not in fname or pickle_reading_protocol >= pickle_writing_protocol): result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) else: # For joblib <= 0.8.4 compressed pickles written with # python `version = v` can not be read by python with # `version < v' because of differences in the default # pickle protocol (2 for python 2, 3 for python 3.3 and 4 # for python 3.4) try: numpy_pickle.load(fname) raise AssertionError('Numpy pickle loading should ' 'have raised a ValueError exception') except ValueError as e: nose.tools.assert_true( 'unsupported pickle protocol' in str(e.args))
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress in (False, True, 0, 3): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) # Check that only one file was created nose.tools.assert_equal(filenames[0], this_filename) # Check that this file does exist nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], filenames[0]))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + str(random.randint(0, 1000)) + 'mmap', mode='w+', shape=4, dtype=np.float)): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_loaded = numpy_pickle.load(this_filename) nose.tools.assert_true(isinstance(obj_loaded, type(obj))) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def _check_pickle(filename, expected_list): """Helper function to test joblib pickle content. Note: currently only pickles containing an iterable are supported by this function. """ version_match = re.match(r'.+py(\d)(\d).+', filename) py_version_used_for_writing = int(version_match.group(1)) py_version_used_for_reading = sys.version_info[0] py_version_to_default_pickle_protocol = {2: 2, 3: 3} pickle_reading_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_reading, 4) pickle_writing_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_writing, 4) if pickle_reading_protocol >= pickle_writing_protocol: try: with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") result_list = numpy_pickle.load(filename) expected_nb_warnings = 1 if ("0.9" in filename or "0.8.4" in filename) else 0 nose.tools.assert_equal(len(caught_warnings), expected_nb_warnings) for warn in caught_warnings: nose.tools.assert_equal(warn.category, DeprecationWarning) nose.tools.assert_equal(warn.message.args[0], "The file '{0}' has been generated " "with a joblib version less than " "0.10. Please regenerate this pickle " "file.".format(filename)) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) except Exception as exc: # When trying to read with python 3 a pickle generated # with python 2 we expect a user-friendly error if (py_version_used_for_reading == 3 and py_version_used_for_writing == 2): nose.tools.assert_true(isinstance(exc, ValueError)) message = ('You may be trying to read with ' 'python 3 a joblib pickle generated with python 2.') nose.tools.assert_true(message in str(exc)) else: raise else: # Pickle protocol used for writing is too high. We expect a # "unsupported pickle protocol" error message try: numpy_pickle.load(filename) raise AssertionError('Numpy pickle loading should ' 'have raised a ValueError exception') except ValueError as e: message = 'unsupported pickle protocol: {0}'.format( pickle_writing_protocol) nose.tools.assert_true(message in str(e.args))
def test_pathlib(): try: from pathlib import Path except ImportError: pass else: filename = env['filename'] value = 123 numpy_pickle.dump(value, Path(filename)) nose.tools.assert_equal(numpy_pickle.load(filename), value) numpy_pickle.dump(value, filename) nose.tools.assert_equal(numpy_pickle.load(Path(filename)), value)
def test_pathlib(): try: from pathlib import Path except ImportError: pass else: filename = env['filename'] value = 123 numpy_pickle.dump(value, Path(filename)) assert numpy_pickle.load(filename) == value numpy_pickle.dump(value, filename) assert numpy_pickle.load(Path(filename)) == value
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress, cache_size in ((0, 0), (1, 0), (1, 10)): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) # Check that one file was created per array if not compress: nose.tools.assert_equal(len(filenames), len(obj) + 1) # Check that these files do exist for file in filenames: nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], file))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. nose.tools.assert_true(np.all(np.array(obj) == np.array(obj_))) # Now test with array subclasses for obj in ( np.matrix(np.zeros(10)), np.core.multiarray._reconstruct(np.memmap, (), np.float) ): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress, cache_size=cache_size) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) # Finally smoke test the warning in case of compress + mmap_mode this_filename = filename + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) numpy_pickle.load(this_filename, mmap_mode='r')
def test_compressed_pickle_dump_and_load(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name # Need to test both code branches (whether array size is greater # or smaller than cache_size) for cache_size in [0, 1e9]: try: dumped_filenames = numpy_pickle.dump( expected_list, fname, compress=1, cache_size=cache_size) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: for fn in dumped_filenames: os.remove(fn)
def cache_value(value, filename, decimal=7): """Helper function for checking that a value hasn't changed between two invocations. First call: write value is a file Second call: check that what was written is identical to the value provided in the second call. TODO: only numpy arrays are compared, other values still have to be compared. Parameters ========== value: arbitrary Python value this could include numpy objects. Uses persistence from joblib to achieve high efficiency. """ from joblib.numpy_pickle import dump, load base_dir = os.path.split(filename)[0] if not os.path.isdir(base_dir): os.makedirs(base_dir) if os.path.isfile(filename): cached = load(filename) np.testing.assert_almost_equal(cached, value, decimal=decimal) else: dump(value, filename)
def test_joblib_compression_formats(): compresslevels = (1, 3, 6) filename = env['filename'] + str(random.randint(0, 1000)) objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {'a': 1, 2: 'b'}, [], (), {}, 0, 1.0) for compress in compresslevels: for cmethod in _COMPRESSORS: dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_filename, compress=(cmethod, compress)) else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj os.remove(dump_filename)
def test_compressed_pickle_dump_and_load(): expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('>i8')), np.arange(5, dtype=np.dtype('<f8')), np.arange(5, dtype=np.dtype('>f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), np.matrix([0, 1, 2], dtype=np.dtype('>i8')), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name try: dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1) assert len(dumped_filenames) == 1 result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: assert result == expected finally: os.remove(fname)
def test_compression_using_file_extension(): # test that compression method corresponds to the given filename extension. extensions_dict = { # valid compressor extentions '.z': 'zlib', '.gz': 'gzip', '.bz2': 'bz2', '.lzma': 'lzma', '.xz': 'xz', # invalid compressor extensions '.pkl': 'not-compressed', '': 'not-compressed' } filename = env['filename'] + str(random.randint(0, 1000)) obj = "object to dump" for ext, cmethod in extensions_dict.items(): dump_fname = filename + ext if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_fname) else: numpy_pickle.dump(obj, dump_fname) # Verify the file contains the right magic number with open(dump_fname, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_fname) assert isinstance(obj_reloaded, type(obj)) assert obj_reloaded == obj os.remove(dump_fname)
def test_numpy_subclass(): filename = env['filename'] a = SubArray((10,)) numpy_pickle.dump(a, filename) c = numpy_pickle.load(filename) assert isinstance(c, SubArray) np.testing.assert_array_equal(c, a)
def test_numpy_persistence_bufferred_array_compression(): big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(big_array, filename, compress=True) arr_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(big_array, arr_reloaded)
def test_file_handle_persistence_in_memory_mmap(): obj = np.random.random((10, 10)) buf = io.BytesIO() numpy_pickle.dump(obj, buf) with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(buf, mmap_mode='r+') assert len(caught_warnings) == 1 for warn in caught_warnings: assert warn.category == UserWarning assert (warn.message.args[0] == 'In memory persistence is not compatible with ' 'mmap_mode "%(mmap_mode)s" flag passed. mmap_mode ' 'option will be ignored.' % {'mmap_mode': 'r+'})
def test_memmap_persistence(): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') nose.tools.assert_true(isinstance(b, np.memmap))
def _check_pickle(filename, expected_list): """Helper function to test joblib pickle content Note: currently only pickles containing an iterable are supported by this function. """ version_match = re.match(r'.+py(\d)(\d).+', filename) py_version_used_for_writing = int(version_match.group(1)) py_version_used_for_reading = sys.version_info[0] py_version_to_default_pickle_protocol = {2: 2, 3: 3} pickle_reading_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_reading, 4) pickle_writing_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_writing, 4) if pickle_reading_protocol >= pickle_writing_protocol: try: result_list = numpy_pickle.load(filename) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) except Exception as exc: # When trying to read with python 3 a pickle generated # with python 2 we expect a user-friendly error if (py_version_used_for_reading == 3 and py_version_used_for_writing == 2): nose.tools.assert_true(isinstance(exc, ValueError)) message = ('You may be trying to read with ' 'python 3 a joblib pickle generated with python 2.') nose.tools.assert_true(message in str(exc)) else: raise else: # Pickle protocol used for writing is too high. We expect a # "unsupported pickle protocol" error message try: numpy_pickle.load(filename) raise AssertionError('Numpy pickle loading should ' 'have raised a ValueError exception') except ValueError as e: message = 'unsupported pickle protocol: {0}'.format( pickle_writing_protocol) nose.tools.assert_true(message in str(e.args))
def test_compress_mmap_mode_warning(): # Test the warning in case of compress + mmap_mode rnd = np.random.RandomState(0) a = rnd.random_sample(10) this_filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(this_filename, mmap_mode='r+') assert len(caught_warnings) == 1 for warn in caught_warnings: assert warn.category == UserWarning assert (warn.message.args[0] == 'mmap_mode "%(mmap_mode)s" is not compatible with ' 'compressed file %(filename)s. "%(mmap_mode)s" flag will ' 'be ignored.' % {'filename': this_filename, 'mmap_mode': 'r+'})
def test_memmap_persistence(): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') if [int(x) for x in np.__version__.split('.', 2)[:2]] >= [1, 3]: nose.tools.assert_true(isinstance(b, np.memmap))
def test_file_handle_persistence_compressed_mmap(): obj = np.random.random((10, 10)) filename = env['filename'] + str(random.randint(0, 1000)) with open(filename, 'wb') as f: numpy_pickle.dump(obj, f, compress=('gzip', 3)) with closing(gzip.GzipFile(filename, 'rb')) as f: with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(f, mmap_mode='r+') assert len(caught_warnings) == 1 for warn in caught_warnings: assert warn.category == UserWarning assert (warn.message.args[0] == '"%(fileobj)r" is not a raw file, mmap_mode ' '"%(mmap_mode)s" flag will be ignored.' % {'fileobj': f, 'mmap_mode': 'r+'})
def test_compress_mmap_mode_warning(): # Test the warning in case of compress + mmap_mode rnd = np.random.RandomState(0) a = rnd.random_sample(10) this_filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, this_filename, compress=1) with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") numpy_pickle.load(this_filename, mmap_mode='r+') nose.tools.assert_equal(len(caught_warnings), 1) for warn in caught_warnings: nose.tools.assert_equal(warn.category, DeprecationWarning) nose.tools.assert_equal(warn.message.args[0], 'File "%(filename)s" is compressed using ' '"%(compressor)s" which is not compatible ' 'with mmap_mode "%(mmap_mode)s" flag ' 'passed.' % {'filename': this_filename, 'mmap_mode': 'r+', 'compressor': 'zlib'})
def gen(): idx = 0 n = -1 output_filename = pjoin(self.job_path, 'output.pkl') for _ in self._poll_generator(timeout): iter_filename = pjoin(self.job_path, 'output-%d.pkl' % idx) if os.path.exists(iter_filename): yield numpy_pickle.load(iter_filename) idx += 1 elif n == -1 and os.path.exists(output_filename): status, n, exc = numpy_pickle.load(output_filename) assert idx <= n, (idx, n) if idx == n: if status == 'exhausted': return elif status == 'exception': raise exc else: assert False
def test_masked_array_persistence(): # The special-case picker fails, because saving masked_array # not implemented, but it just delegates to the standard pickler. rnd = np.random.RandomState(0) a = rnd.random_sample(10) a = np.ma.masked_greater(a, 0.5) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(b, np.ma.masked_array)
def test_file_handle_persistence_mmap(): obj = np.random.random((10, 10)) filename = env['filename'] + str(random.randint(0, 1000)) with open(filename, 'wb') as f: numpy_pickle.dump(obj, f) with open(filename, 'rb') as f: obj_reloaded = numpy_pickle.load(f, mmap_mode='r+') np.testing.assert_array_equal(obj_reloaded, obj)
def before_submit(fut): with working_directory(fut.job_path): tests.append((eq_, ls(), PRE_SUBMIT_LS)) input = load('input.pkl') tests.append((eq_, input, dict( args=(1, 1), func=func, version_info=func.version_info, kwargs={}))) tests.append((eq_, 'jobscript for job func\n', filecontents('jobscript')))
def test_in_memory_persistence(): objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])] for obj in objs: f = io.BytesIO() numpy_pickle.dump(obj, f) obj_reloaded = numpy_pickle.load(f) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj
def test_pickle_highest_protocol(): # ensure persistence of a numpy array is valid even when using # the pickle HIGHEST_PROTOCOL. # see https://github.com/joblib/joblib/issues/362 filename = env['filename'] + str(random.randint(0, 1000)) test_array = np.zeros(10) numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, test_array)
def test_standard_types(): # Test pickling and saving with standard types. filename = env['filename'] for compress in [0, 1]: for member in typelist: # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) numpy_pickle.dump(member, this_filename, compress=compress) _member = numpy_pickle.load(this_filename) # We compare the pickled instance to the reloaded one only if it # can be compared to a copied one if member == copy.deepcopy(member): yield assert_equal, member, _member
def test_basic(): "Basic tests" tests = [] PRE_SUBMIT_LS = set(['input.pkl', 'jobscript']) COMPUTED_LS = PRE_SUBMIT_LS.union(['output.pkl', 'jobid', 'log']) def before_submit(fut): with working_directory(fut.job_path): tests.append((eq_, ls(), PRE_SUBMIT_LS)) input = load('input.pkl') tests.append((eq_, input, dict( args=(1, 1), func=func, version_info=func.version_info, kwargs={}))) tests.append((eq_, 'jobscript for job func\n', filecontents('jobscript'))) executor = MockExecutor(store_path=store_path, logger=logger, before_submit_hook=before_submit) # Run a single job, check that it executes, and check input/output fut = executor.submit(func, 1, 1) yield eq_, fut.is_generator(), False yield eq_, executor.submit_count, 1 yield eq_, fut.result(), 2 yield ne_, executor.given_work_paths[0], fut.job_path with working_directory(fut.job_path): output = load('output.pkl') yield eq_, output, ('finished', 2) yield eq_, ls(), COMPUTED_LS yield eq_, len(executor.given_work_paths), 1 yield eq_, filecontents('jobid'), 'job-0\n' # Re-run and check that result is loaded from cache fut = executor.submit(func, 1, 1) yield eq_, fut.result(), 2 yield eq_, executor.submit_count, 1 # Run yet again with different input executor.before_submit_hook = lambda x: None fut2 = executor.submit(func, 1, 2) yield eq_, fut2.result(), 3 yield eq_, executor.submit_count, 2 yield ne_, fut2.job_path, fut.job_path # Run tests queued by closures yield eq_, len(tests), 3 for x in tests: yield x
def test_non_contiguous_array_pickling(): filename = env['filename'] + str(random.randint(0, 1000)) for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :]]: assert not array.flags.c_contiguous assert not array.flags.f_contiguous numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array) os.remove(filename)
def test_joblib_compression_formats(): compresslevels = (1, 3, 6) filename = env['filename'] + str(random.randint(0, 1000)) objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), { 'a': 1, 2: 'b' }, [], (), {}, 0, 1.0) for compress in compresslevels: for cmethod in _COMPRESSORS: dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('xz', 'lzma'): # Lzma module only available for python >= 3.3 msg = "{0} compression is only available".format(cmethod) assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump, obj, dump_filename, compress=(cmethod, compress)) else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: nose.tools.assert_equal(_detect_compressor(f), cmethod) # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) nose.tools.assert_true(isinstance(obj_reloaded, type(obj))) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: nose.tools.assert_equal(obj_reloaded, obj) os.remove(dump_filename)
def test_joblib_compression_formats(tmpdir, compress, cmethod): filename = tmpdir.join('test.pkl').strpath objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), { 'a': 1, 2: 'b' }, [], (), {}, 0, 1.0) dump_filename = filename + "." + cmethod for obj in objects: if not PY3_OR_LATER and cmethod in ('lzma', 'xz', 'lz4'): # Lzma module only available for python >= 3.3 msg = "{} compression is only available".format(cmethod) error = NotImplementedError if cmethod == 'lz4': error = ValueError with raises(error) as excinfo: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) excinfo.match(msg) elif cmethod == 'lz4' and with_lz4.args[0]: # Skip the test if lz4 is not installed. We here use the with_lz4 # skipif fixture whose argument is True when lz4 is not installed raise SkipTest("lz4 is not installed.") else: numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress)) # Verify the file contains the right magic number with open(dump_filename, 'rb') as f: assert _detect_compressor(f) == cmethod # Verify the reloaded object is correct obj_reloaded = numpy_pickle.load(dump_filename) assert isinstance(obj_reloaded, type(obj)) if isinstance(obj, np.ndarray): np.testing.assert_array_equal(obj_reloaded, obj) else: assert obj_reloaded == obj
def test_load_externally_decompressed_files(): # Test that BinaryZlibFile generates valid gzip and zlib compressed files. obj = "a string to persist" filename_raw = env['filename'] + str(random.randint(0, 1000)) compress_list = (('.z', _zlib_file_decompress), ('.gz', _gzip_file_decompress)) for extension, decompress in compress_list: filename_compressed = filename_raw + extension # Use automatic extension detection to compress with the right method. numpy_pickle.dump(obj, filename_compressed) # Decompress with the corresponding method decompress(filename_compressed, filename_raw) # Test that the uncompressed pickle can be loaded and # that the result is correct. obj_reloaded = numpy_pickle.load(filename_raw) nose.tools.assert_equal(obj, obj_reloaded) # Do some cleanup os.remove(filename_raw) if os.path.exists(filename_compressed): os.remove(filename_compressed)
def test_memmap_persistence(tmpdir): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = tmpdir.join('test1.pkl').strpath numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(b, np.memmap) # Test with an object containing multiple numpy arrays filename = tmpdir.join('test2.pkl').strpath obj = ComplexTestObject() numpy_pickle.dump(obj, filename) obj_loaded = numpy_pickle.load(filename, mmap_mode='r') assert isinstance(obj_loaded, type(obj)) assert isinstance(obj_loaded.array_float, np.memmap) assert not obj_loaded.array_float.flags.writeable assert isinstance(obj_loaded.array_int, np.memmap) assert not obj_loaded.array_int.flags.writeable # Memory map not allowed for numpy object arrays assert not isinstance(obj_loaded.array_obj, np.memmap) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj) # Test we can write in memmapped arrays obj_loaded = numpy_pickle.load(filename, mmap_mode='r+') assert obj_loaded.array_float.flags.writeable obj_loaded.array_float[0:10] = 10.0 assert obj_loaded.array_int.flags.writeable obj_loaded.array_int[0:10] = 10 obj_reloaded = numpy_pickle.load(filename, mmap_mode='r') np.testing.assert_array_equal(obj_reloaded.array_float, obj_loaded.array_float) np.testing.assert_array_equal(obj_reloaded.array_int, obj_loaded.array_int) # Test w+ mode is caught and the mode has switched to r+ numpy_pickle.load(filename, mmap_mode='w+') assert obj_loaded.array_int.flags.writeable assert obj_loaded.array_int.mode == 'r+' assert obj_loaded.array_float.flags.writeable assert obj_loaded.array_float.mode == 'r+'
def test_memmap_persistence(): rnd = np.random.RandomState(0) a = rnd.random_sample(10) filename = env['filename'] + str(random.randint(0, 1000)) numpy_pickle.dump(a, filename) b = numpy_pickle.load(filename, mmap_mode='r') nose.tools.assert_true(isinstance(b, np.memmap)) # Test with an object containing multiple numpy arrays filename = env['filename'] + str(random.randint(0, 1000)) obj = ComplexTestObject() numpy_pickle.dump(obj, filename) obj_loaded = numpy_pickle.load(filename, mmap_mode='r') nose.tools.assert_true(isinstance(obj_loaded, type(obj))) nose.tools.assert_true(isinstance(obj_loaded.array_float, np.memmap)) nose.tools.assert_false(obj_loaded.array_float.flags.writeable) nose.tools.assert_true(isinstance(obj_loaded.array_int, np.memmap)) nose.tools.assert_false(obj_loaded.array_int.flags.writeable) # Memory map not allowed for numpy object arrays nose.tools.assert_false(isinstance(obj_loaded.array_obj, np.memmap)) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj) # Test we can write in memmaped arrays obj_loaded = numpy_pickle.load(filename, mmap_mode='r+') nose.tools.assert_true(obj_loaded.array_float.flags.writeable) obj_loaded.array_float[0:10] = 10.0 nose.tools.assert_true(obj_loaded.array_int.flags.writeable) obj_loaded.array_int[0:10] = 10 obj_reloaded = numpy_pickle.load(filename, mmap_mode='r') np.testing.assert_array_equal(obj_reloaded.array_float, obj_loaded.array_float) np.testing.assert_array_equal(obj_reloaded.array_int, obj_loaded.array_int) # Test w+ mode is caught and the mode has switched to r+ numpy_pickle.load(filename, mmap_mode='w+') nose.tools.assert_true(obj_loaded.array_int.flags.writeable) nose.tools.assert_equal(obj_loaded.array_int.mode, 'r+') nose.tools.assert_true(obj_loaded.array_float.flags.writeable) nose.tools.assert_equal(obj_loaded.array_float.mode, 'r+')
def _check_pickle(filename, expected_list): """Helper function to test joblib pickle content. Note: currently only pickles containing an iterable are supported by this function. """ if (not PY3_OR_LATER and (filename.endswith('.xz') or filename.endswith('.lzma'))): # lzma is not supported for python versions < 3.3 nose.tools.assert_raises(NotImplementedError, numpy_pickle.load, filename) return version_match = re.match(r'.+py(\d)(\d).+', filename) py_version_used_for_writing = int(version_match.group(1)) py_version_used_for_reading = sys.version_info[0] py_version_to_default_pickle_protocol = {2: 2, 3: 3} pickle_reading_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_reading, 4) pickle_writing_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_writing, 4) if pickle_reading_protocol >= pickle_writing_protocol: try: with warnings.catch_warnings(record=True) as caught_warnings: warnings.simplefilter("always") result_list = numpy_pickle.load(filename) expected_nb_warnings = 1 if ("0.9" in filename or "0.8.4" in filename) else 0 nose.tools.assert_equal(len(caught_warnings), expected_nb_warnings) for warn in caught_warnings: nose.tools.assert_equal(warn.category, DeprecationWarning) nose.tools.assert_equal( warn.message.args[0], "The file '{0}' has been generated " "with a joblib version less than " "0.10. Please regenerate this pickle " "file.".format(filename)) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) except Exception as exc: # When trying to read with python 3 a pickle generated # with python 2 we expect a user-friendly error if (py_version_used_for_reading == 3 and py_version_used_for_writing == 2): nose.tools.assert_true(isinstance(exc, ValueError)) message = ('You may be trying to read with ' 'python 3 a joblib pickle generated with python 2.') nose.tools.assert_true(message in str(exc)) else: raise else: # Pickle protocol used for writing is too high. We expect a # "unsupported pickle protocol" error message try: numpy_pickle.load(filename) raise AssertionError('Numpy pickle loading should ' 'have raised a ValueError exception') except ValueError as e: message = 'unsupported pickle protocol: {0}'.format( pickle_writing_protocol) nose.tools.assert_true(message in str(e.args))
def test_numpy_persistence(): filename = env['filename'] rnd = np.random.RandomState(0) a = rnd.random_sample((10, 2)) for compress in (False, True, 0, 3): # We use 'a.T' to have a non C-contiguous array. for index, obj in enumerate(((a, ), (a.T, ), (a, a), [a, a, a])): # Change the file name to avoid side effects between tests this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) # Check that only one file was created nose.tools.assert_equal(filenames[0], this_filename) # Check that this file does exist nose.tools.assert_true( os.path.exists(os.path.join(env['dir'], filenames[0]))) # Unpickle the object obj_ = numpy_pickle.load(this_filename) # Check that the items are indeed arrays for item in obj_: nose.tools.assert_true(isinstance(item, np.ndarray)) # And finally, check that all the values are equal. np.testing.assert_array_equal(np.array(obj), np.array(obj_)) # Now test with array subclasses for obj in (np.matrix(np.zeros(10)), np.memmap(filename + str(random.randint(0, 1000)) + 'mmap', mode='w+', shape=4, dtype=np.float)): this_filename = filename + str(random.randint(0, 1000)) filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_ = numpy_pickle.load(this_filename) if (type(obj) is not np.memmap and hasattr(obj, '__array_prepare__')): # We don't reconstruct memmaps nose.tools.assert_true(isinstance(obj_, type(obj))) np.testing.assert_array_equal(obj_, obj) # Test with an object containing multiple numpy arrays obj = ComplexTestObject() filenames = numpy_pickle.dump(obj, this_filename, compress=compress) # All is cached in one file nose.tools.assert_equal(len(filenames), 1) obj_loaded = numpy_pickle.load(this_filename) nose.tools.assert_true(isinstance(obj_loaded, type(obj))) np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float) np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int) np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
def test_compressed_pickle_python_2_3_compatibility(): expected_list = [ np.arange(5, dtype=np.int64), np.arange(5, dtype=np.float64), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !" ] test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) # These files have been generated with the # joblib/test/data/create_numpy_pickle.py script for the relevant # python and joblib versions basenames = [ 'joblib_0.8.4_compressed_pickle_py27.gz', 'joblib_0.9.0_compressed_pickle_py27.gz', 'joblib_0.8.4_compressed_pickle_py33.gz', 'joblib_0.9.0_compressed_pickle_py33.gz', 'joblib_0.8.4_compressed_pickle_py34.gz', 'joblib_0.9.0_compressed_pickle_py34.gz' ] data_filenames = [ os.path.join(test_data_dir, bname) for bname in basenames ] for fname in data_filenames: version_match = re.match(r'.+py(\d)(\d).gz', fname) py_version_used_for_writing = tuple( [int(each) for each in version_match.groups()]) py_version_used_for_reading = sys.version_info[:2] # Use Pickle protocol 4 for Python 3.4 and later py_version_to_default_pickle_protocol = { (2, 6): 2, (2, 7): 2, (3, 0): 3, (3, 1): 3, (3, 2): 3, (3, 3): 3 } pickle_reading_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_reading, 4) pickle_writing_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_writing, 4) if ('0.8.4' not in fname or pickle_reading_protocol >= pickle_writing_protocol): result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) else: # For joblib <= 0.8.4 compressed pickles written with # python `version = v` can not be read by python with # `version < v' because of differences in the default # pickle protocol (2 for python 2, 3 for python 3.3 and 4 # for python 3.4) try: numpy_pickle.load(fname) raise AssertionError('Numpy pickle loading should ' 'have raised a ValueError exception') except ValueError as e: nose.tools.assert_true( 'unsupported pickle protocol' in str(e.args))
def load(dataset): print("Loading model for "+dataset) rf[dataset] = numpy_pickle.load("models/randomforest_"+dataset+".joblib")
for fname in os.listdir(data_dir): test_path = os.path.join(data_dir, fname) with open(test_path) as f: text = f.read() yield fname, text if __name__ == "__main__": config = get_config() feat = load_object(config.get("NORMAL", "feat")) test_dir = "data/test" result_dir = "data/submit" crf = numpy_pickle.load('data/models/crf.m') for fname, text in read_data(test_dir): print(fname) sents = [text] y = crf.predict(feat(sents)) anns = tagger.seq_to_ind(y[0]) anns = sorted(anns, key=lambda x:(x[1],x[2])) ann_fname = fname.replace(".txt", ".ann") save_path = os.path.join(result_dir, ann_fname) with open(save_path, 'w') as f: for i, (type_, s, e) in enumerate(anns): f.write("T{tid}\t{type_} {start} {end}\t{name}\n".format(tid=i, type_=type_, start=s, end=e,
def load(dataset): print("Loading model for " + dataset) models[dataset] = numpy_pickle.load("models/SVM_" + dataset + ".joblib")
def _check_pickle(filename, expected_list): """Helper function to test joblib pickle content. Note: currently only pickles containing an iterable are supported by this function. """ version_match = re.match(r'.+py(\d)(\d).+', filename) py_version_used_for_writing = int(version_match.group(1)) py_version_to_default_pickle_protocol = {2: 2, 3: 3} pickle_reading_protocol = py_version_to_default_pickle_protocol.get(3, 4) pickle_writing_protocol = py_version_to_default_pickle_protocol.get( py_version_used_for_writing, 4) if pickle_reading_protocol >= pickle_writing_protocol: try: with warns(None) as warninfo: warnings.simplefilter('always') warnings.filterwarnings( 'ignore', module='numpy', message='The compiler package is deprecated') result_list = numpy_pickle.load(filename) filename_base = os.path.basename(filename) expected_nb_warnings = 1 if ("_0.9" in filename_base or "_0.8.4" in filename_base) else 0 assert len(warninfo) == expected_nb_warnings for w in warninfo: assert w.category == DeprecationWarning assert (str(w.message) == "The file '{0}' has been generated with a joblib " "version less than 0.10. Please regenerate this " "pickle file.".format(filename)) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: assert result == expected except Exception as exc: # When trying to read with python 3 a pickle generated # with python 2 we expect a user-friendly error if py_version_used_for_writing == 2: assert isinstance(exc, ValueError) message = ('You may be trying to read with ' 'python 3 a joblib pickle generated with python 2.') assert message in str(exc) elif filename.endswith('.lz4') and with_lz4.args[0]: assert isinstance(exc, ValueError) assert LZ4_NOT_INSTALLED_ERROR in str(exc) else: raise else: # Pickle protocol used for writing is too high. We expect a # "unsupported pickle protocol" error message try: numpy_pickle.load(filename) raise AssertionError('Numpy pickle loading should ' 'have raised a ValueError exception') except ValueError as e: message = 'unsupported pickle protocol: {0}'.format( pickle_writing_protocol) assert message in str(e.args)
def test_numpy_subclass(): filename = env['filename'] a = SubArray((10, )) numpy_pickle.dump(a, filename) c = numpy_pickle.load(filename) nose.tools.assert_true(isinstance(c, SubArray))
def _load_output(self, timeout=None): target_file = pjoin(self.job_path, 'output.pkl') self._executor.logger.debug('Loading job output: %s', self.job_name) return numpy_pickle.load(target_file)