Exemple #1
0
def test_joblib_pickle_across_python_versions():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the compressed and non compressed
    # pickles in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma')
    pickle_filenames = [os.path.join(test_data_dir, fn)
                        for fn in os.listdir(test_data_dir)
                        if any(fn.endswith(ext) for ext in pickle_extensions)]

    for fname in pickle_filenames:
        _check_pickle(fname, expected_list)
Exemple #2
0
def test_joblib_pickle_across_python_versions():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
def test_joblib_pickle_across_python_versions():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the compressed and non compressed
    # pickles in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma', 'lz4')
    pickle_filenames = [os.path.join(test_data_dir, fn)
                        for fn in os.listdir(test_data_dir)
                        if any(fn.endswith(ext) for ext in pickle_extensions)]

    for fname in pickle_filenames:
        _check_pickle(fname, expected_list)
Exemple #4
0
def test_joblib_decompression_format_support():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        u"C'est l'\xe9t\xe9 !"
    ]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma')
    compress_filenames_list = [
        glob.glob(os.path.join(test_data_dir, ext)) for ext in extensions
    ]
    compress_filenames = sum(compress_filenames_list, [])

    for fname in compress_filenames:
        _check_compression_format(fname, expected_list)
def test_joblib_pickle_across_python_versions():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
def test_compressed_pickle_dump_and_load():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    # Need to test both code branches (whether array size is greater
    # or smaller than cache_size)
    for cache_size in [0, 1e9]:
        try:
            dumped_filenames = numpy_pickle.dump(
                expected_list, fname, compress=1,
                cache_size=cache_size)
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        finally:
            for fn in dumped_filenames:
                os.remove(fn)
def test_joblib_decompression_format_support():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma')
    compress_filenames_list = [glob.glob(os.path.join(test_data_dir, ext))
                               for ext in extensions]
    compress_filenames = sum(compress_filenames_list, [])

    for fname in compress_filenames:
        _check_compression_format(fname, expected_list)
Exemple #8
0
def test_dtype():
    # Test that we obtain the same hash for object owning several dtype,
    # whatever the past of these dtypes. Catter for cache invalidation with
    # complex dtype
    a = np.dtype([('f1', np.uint), ('f2', np.int32)])
    b = a
    c = pickle.loads(pickle.dumps(a))
    assert hash([a, c]) == hash([a, b])
Exemple #9
0
def test_dtype():
    # Test that we obtain the same hash for object owning several dtype,
    # whatever the past of these dtypes. Catter for cache invalidation with
    # complex dtype
    a = np.dtype([('f1', np.uint), ('f2', np.int32)])
    b = a
    c = pickle.loads(pickle.dumps(a))
    assert hash([a, c]) == hash([a, b])
Exemple #10
0
def test_compressed_pickle_dump_and_load(tmpdir):
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('>i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.arange(5, dtype=np.dtype('>f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        np.arange(256, dtype=np.uint8).tobytes(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
        u"C'est l'\xe9t\xe9 !"
    ]

    fname = tmpdir.join('temp.pkl.gz').strpath

    dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
    assert len(dumped_filenames) == 1
    result_list = numpy_pickle.load(fname)
    for result, expected in zip(result_list, expected_list):
        if isinstance(expected, np.ndarray):
            assert result.dtype == expected.dtype
            np.testing.assert_equal(result, expected)
        else:
            assert result == expected
Exemple #11
0
def test_compressed_pickle_dump_and_load():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    # Need to test both code branches (whether array size is greater
    # or smaller than cache_size)
    for cache_size in [0, 1e9]:
        try:
            dumped_filenames = numpy_pickle.dump(expected_list,
                                                 fname,
                                                 compress=1,
                                                 cache_size=cache_size)
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        finally:
            for fn in dumped_filenames:
                os.remove(fn)
Exemple #12
0
def test_compressed_pickle_dump_and_load():
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('>i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.arange(5, dtype=np.dtype('>f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
        assert len(dumped_filenames) == 1
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                assert result.dtype == expected.dtype
                np.testing.assert_equal(result, expected)
            else:
                assert result == expected
    finally:
        os.remove(fname)
Exemple #13
0
def test_numpy_array_byte_order_mismatch_detection():
    # List of numpy arrays with big endian byteorder.
    be_arrays = [
        np.array([(1, 2.0), (3, 4.0)], dtype=[('', '>i8'), ('', '>f8')]),
        np.arange(3, dtype=np.dtype('>i8')),
        np.arange(3, dtype=np.dtype('>f8'))
    ]

    # Verify the byteorder mismatch is correctly detected.
    for array in be_arrays:
        if sys.byteorder == 'big':
            assert not _is_numpy_array_byte_order_mismatch(array)
        else:
            assert _is_numpy_array_byte_order_mismatch(array)
        converted = _ensure_native_byte_order(array)
        if converted.dtype.fields:
            for f in converted.dtype.fields.values():
                f[0].byteorder == '='
        else:
            assert converted.dtype.byteorder == "="

    # List of numpy arrays with little endian byteorder.
    le_arrays = [
        np.array([(1, 2.0), (3, 4.0)], dtype=[('', '<i8'), ('', '<f8')]),
        np.arange(3, dtype=np.dtype('<i8')),
        np.arange(3, dtype=np.dtype('<f8'))
    ]

    # Verify the byteorder mismatch is correctly detected.
    for array in le_arrays:
        if sys.byteorder == 'little':
            assert not _is_numpy_array_byte_order_mismatch(array)
        else:
            assert _is_numpy_array_byte_order_mismatch(array)
        converted = _ensure_native_byte_order(array)
        if converted.dtype.fields:
            for f in converted.dtype.fields.values():
                f[0].byteorder == '='
        else:
            assert converted.dtype.byteorder == "="
Exemple #14
0
def test_numpy_dtype_pickling():
    # numpy dtype hashing is tricky to get right: see #231, #239, #251 #1080,
    # #1082, and explanatory comments inside
    # ``joblib.hashing.NumpyHasher.save``.

    # In this test, we make sure that the pickling of numpy dtypes is robust to
    # object identity and object copy.

    dt1 = np.dtype('f4')
    dt2 = np.dtype('f4')

    # simple dtypes objects are interned
    assert dt1 is dt2
    assert hash(dt1) == hash(dt2)

    dt1_roundtripped = pickle.loads(pickle.dumps(dt1))
    assert dt1 is not dt1_roundtripped
    assert hash(dt1) == hash(dt1_roundtripped)

    assert hash([dt1, dt1]) == hash([dt1_roundtripped, dt1_roundtripped])
    assert hash([dt1, dt1]) == hash([dt1, dt1_roundtripped])

    complex_dt1 = np.dtype([('name', np.str_, 16),
                            ('grades', np.float64, (2, ))])
    complex_dt2 = np.dtype([('name', np.str_, 16),
                            ('grades', np.float64, (2, ))])

    # complex dtypes objects are not interned
    assert hash(complex_dt1) == hash(complex_dt2)

    complex_dt1_roundtripped = pickle.loads(pickle.dumps(complex_dt1))
    assert complex_dt1_roundtripped is not complex_dt1
    assert hash(complex_dt1) == hash(complex_dt1_roundtripped)

    assert hash([complex_dt1, complex_dt1]) == hash(
        [complex_dt1_roundtripped, complex_dt1_roundtripped])
    assert hash([complex_dt1,
                 complex_dt1]) == hash([complex_dt1_roundtripped, complex_dt1])
Exemple #15
0
def test_compressed_pickle_dump_and_load():
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('>i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.arange(5, dtype=np.dtype('>f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
        u"C'est l'\xe9t\xe9 !"
    ]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
        nose.tools.assert_equal(len(dumped_filenames), 1)
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                nose.tools.assert_equal(result.dtype, expected.dtype)
                np.testing.assert_equal(result, expected)
            else:
                nose.tools.assert_equal(result, expected)
    finally:
        os.remove(fname)