def test_joblib_pickle_across_python_versions(): # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), u"C'est l'\xe9t\xe9 !"] # Testing all the compressed and non compressed # pickles in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma') pickle_filenames = [os.path.join(test_data_dir, fn) for fn in os.listdir(test_data_dir) if any(fn.endswith(ext) for ext in pickle_extensions)] for fname in pickle_filenames: _check_pickle(fname, expected_list)
def test_joblib_pickle_across_python_versions(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [ np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', { 'a': 1, 'b': 2 }], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !" ] # Testing all the *.gz and *.pkl (compressed and non compressed # pickles) in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz')) data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl')) for fname in data_filenames: _check_pickle(fname, expected_list)
def test_joblib_pickle_across_python_versions(): # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), u"C'est l'\xe9t\xe9 !"] # Testing all the compressed and non compressed # pickles in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma', 'lz4') pickle_filenames = [os.path.join(test_data_dir, fn) for fn in os.listdir(test_data_dir) if any(fn.endswith(ext) for ext in pickle_extensions)] for fname in pickle_filenames: _check_pickle(fname, expected_list)
def test_joblib_decompression_format_support(): # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [ np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', { 'a': 1, 'b': 2 }], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), u"C'est l'\xe9t\xe9 !" ] test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma') compress_filenames_list = [ glob.glob(os.path.join(test_data_dir, ext)) for ext in extensions ] compress_filenames = sum(compress_filenames_list, []) for fname in compress_filenames: _check_compression_format(fname, expected_list)
def test_joblib_pickle_across_python_versions(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] # Testing all the *.gz and *.pkl (compressed and non compressed # pickles) in joblib/test/data. These pickles were generated by # the joblib/test/data/create_numpy_pickle.py script for the # relevant python, joblib and numpy versions. test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz')) data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl')) for fname in data_filenames: _check_pickle(fname, expected_list)
def test_compressed_pickle_dump_and_load(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name # Need to test both code branches (whether array size is greater # or smaller than cache_size) for cache_size in [0, 1e9]: try: dumped_filenames = numpy_pickle.dump( expected_list, fname, compress=1, cache_size=cache_size) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: for fn in dumped_filenames: os.remove(fn)
def test_joblib_decompression_format_support(): # We need to be specific about dtypes in particular endianness # because the pickles can be generated on one architecture and # the tests run on another one. See # https://github.com/joblib/joblib/issues/279. expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), u"C'est l'\xe9t\xe9 !"] test_data_dir = os.path.dirname(os.path.abspath(data.__file__)) extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma') compress_filenames_list = [glob.glob(os.path.join(test_data_dir, ext)) for ext in extensions] compress_filenames = sum(compress_filenames_list, []) for fname in compress_filenames: _check_compression_format(fname, expected_list)
def test_dtype(): # Test that we obtain the same hash for object owning several dtype, # whatever the past of these dtypes. Catter for cache invalidation with # complex dtype a = np.dtype([('f1', np.uint), ('f2', np.int32)]) b = a c = pickle.loads(pickle.dumps(a)) assert hash([a, c]) == hash([a, b])
def test_compressed_pickle_dump_and_load(tmpdir): expected_list = [ np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('>i8')), np.arange(5, dtype=np.dtype('<f8')), np.arange(5, dtype=np.dtype('>f8')), np.array([1, 'abc', { 'a': 1, 'b': 2 }], dtype='O'), np.arange(256, dtype=np.uint8).tobytes(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), np.matrix([0, 1, 2], dtype=np.dtype('>i8')), u"C'est l'\xe9t\xe9 !" ] fname = tmpdir.join('temp.pkl.gz').strpath dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1) assert len(dumped_filenames) == 1 result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: assert result == expected
def test_compressed_pickle_dump_and_load(): # XXX: temporarily disable this test on non little-endian machines if sys.byteorder != 'little': raise nose.SkipTest('Skipping this test on non little-endian machines') expected_list = [ np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('<f8')), np.array([1, 'abc', { 'a': 1, 'b': 2 }], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), u"C'est l'\xe9t\xe9 !" ] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name # Need to test both code branches (whether array size is greater # or smaller than cache_size) for cache_size in [0, 1e9]: try: dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1, cache_size=cache_size) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: for fn in dumped_filenames: os.remove(fn)
def test_compressed_pickle_dump_and_load(): expected_list = [np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('>i8')), np.arange(5, dtype=np.dtype('<f8')), np.arange(5, dtype=np.dtype('>f8')), np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), np.matrix([0, 1, 2], dtype=np.dtype('>i8')), u"C'est l'\xe9t\xe9 !"] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name try: dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1) assert len(dumped_filenames) == 1 result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: assert result == expected finally: os.remove(fname)
def test_numpy_array_byte_order_mismatch_detection(): # List of numpy arrays with big endian byteorder. be_arrays = [ np.array([(1, 2.0), (3, 4.0)], dtype=[('', '>i8'), ('', '>f8')]), np.arange(3, dtype=np.dtype('>i8')), np.arange(3, dtype=np.dtype('>f8')) ] # Verify the byteorder mismatch is correctly detected. for array in be_arrays: if sys.byteorder == 'big': assert not _is_numpy_array_byte_order_mismatch(array) else: assert _is_numpy_array_byte_order_mismatch(array) converted = _ensure_native_byte_order(array) if converted.dtype.fields: for f in converted.dtype.fields.values(): f[0].byteorder == '=' else: assert converted.dtype.byteorder == "=" # List of numpy arrays with little endian byteorder. le_arrays = [ np.array([(1, 2.0), (3, 4.0)], dtype=[('', '<i8'), ('', '<f8')]), np.arange(3, dtype=np.dtype('<i8')), np.arange(3, dtype=np.dtype('<f8')) ] # Verify the byteorder mismatch is correctly detected. for array in le_arrays: if sys.byteorder == 'little': assert not _is_numpy_array_byte_order_mismatch(array) else: assert _is_numpy_array_byte_order_mismatch(array) converted = _ensure_native_byte_order(array) if converted.dtype.fields: for f in converted.dtype.fields.values(): f[0].byteorder == '=' else: assert converted.dtype.byteorder == "="
def test_numpy_dtype_pickling(): # numpy dtype hashing is tricky to get right: see #231, #239, #251 #1080, # #1082, and explanatory comments inside # ``joblib.hashing.NumpyHasher.save``. # In this test, we make sure that the pickling of numpy dtypes is robust to # object identity and object copy. dt1 = np.dtype('f4') dt2 = np.dtype('f4') # simple dtypes objects are interned assert dt1 is dt2 assert hash(dt1) == hash(dt2) dt1_roundtripped = pickle.loads(pickle.dumps(dt1)) assert dt1 is not dt1_roundtripped assert hash(dt1) == hash(dt1_roundtripped) assert hash([dt1, dt1]) == hash([dt1_roundtripped, dt1_roundtripped]) assert hash([dt1, dt1]) == hash([dt1, dt1_roundtripped]) complex_dt1 = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2, ))]) complex_dt2 = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2, ))]) # complex dtypes objects are not interned assert hash(complex_dt1) == hash(complex_dt2) complex_dt1_roundtripped = pickle.loads(pickle.dumps(complex_dt1)) assert complex_dt1_roundtripped is not complex_dt1 assert hash(complex_dt1) == hash(complex_dt1_roundtripped) assert hash([complex_dt1, complex_dt1]) == hash( [complex_dt1_roundtripped, complex_dt1_roundtripped]) assert hash([complex_dt1, complex_dt1]) == hash([complex_dt1_roundtripped, complex_dt1])
def test_compressed_pickle_dump_and_load(): expected_list = [ np.arange(5, dtype=np.dtype('<i8')), np.arange(5, dtype=np.dtype('>i8')), np.arange(5, dtype=np.dtype('<f8')), np.arange(5, dtype=np.dtype('>f8')), np.array([1, 'abc', { 'a': 1, 'b': 2 }], dtype='O'), # .tostring actually returns bytes and is a # compatibility alias for .tobytes which was # added in 1.9.0 np.arange(256, dtype=np.uint8).tostring(), # np.matrix is a subclass of np.ndarray, here we want # to verify this type of object is correctly unpickled # among versions. np.matrix([0, 1, 2], dtype=np.dtype('<i8')), np.matrix([0, 1, 2], dtype=np.dtype('>i8')), u"C'est l'\xe9t\xe9 !" ] with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f: fname = f.name try: dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1) nose.tools.assert_equal(len(dumped_filenames), 1) result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): nose.tools.assert_equal(result.dtype, expected.dtype) np.testing.assert_equal(result, expected) else: nose.tools.assert_equal(result, expected) finally: os.remove(fname)