Example #1
0
def test_compressed_pickle_dump_and_load():
    expected_list = [
        np.arange(5, dtype=np.int64),
        np.arange(5, dtype=np.float64),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        numpy_pickle.dump(expected_list, fname, compress=1)
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                nose.tools.assert_equal(result.dtype, expected.dtype)
                np.testing.assert_equal(result, expected)
            else:
                nose.tools.assert_equal(result, expected)
    finally:
        os.remove(fname)
Example #2
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    assert hash(a) == hash(b)
Example #3
0
def test_compressed_pickle_dump_and_load():
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('>i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.arange(5, dtype=np.dtype('>f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
        assert len(dumped_filenames) == 1
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                assert result.dtype == expected.dtype
                np.testing.assert_equal(result, expected)
            else:
                assert result == expected
    finally:
        os.remove(fname)
Example #4
0
def test_joblib_pickle_across_python_versions():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the compressed and non compressed
    # pickles in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma')
    pickle_filenames = [os.path.join(test_data_dir, fn)
                        for fn in os.listdir(test_data_dir)
                        if any(fn.endswith(ext) for ext in pickle_extensions)]

    for fname in pickle_filenames:
        _check_pickle(fname, expected_list)
Example #5
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    nose.tools.assert_equal(hash(a), hash(b))
Example #6
0
def test_joblib_pickle_across_python_versions():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
Example #7
0
def test_joblib_pickle_across_python_versions():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
Example #8
0
def test_joblib_decompression_format_support():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        u"C'est l'\xe9t\xe9 !"
    ]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma')
    compress_filenames_list = [
        glob.glob(os.path.join(test_data_dir, ext)) for ext in extensions
    ]
    compress_filenames = sum(compress_filenames_list, [])

    for fname in compress_filenames:
        _check_compression_format(fname, expected_list)
Example #9
0
def test_compressed_pickle_dump_and_load(tmpdir):
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('>i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.arange(5, dtype=np.dtype('>f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        np.arange(256, dtype=np.uint8).tobytes(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
        u"C'est l'\xe9t\xe9 !"
    ]

    fname = tmpdir.join('temp.pkl.gz').strpath

    dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
    assert len(dumped_filenames) == 1
    result_list = numpy_pickle.load(fname)
    for result, expected in zip(result_list, expected_list):
        if isinstance(expected, np.ndarray):
            assert result.dtype == expected.dtype
            np.testing.assert_equal(result, expected)
        else:
            assert result == expected
Example #10
0
def test_compressed_pickle_dump_and_load():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    # Need to test both code branches (whether array size is greater
    # or smaller than cache_size)
    for cache_size in [0, 1e9]:
        try:
            dumped_filenames = numpy_pickle.dump(
                expected_list, fname, compress=1,
                cache_size=cache_size)
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        finally:
            for fn in dumped_filenames:
                os.remove(fn)
Example #11
0
 def create_objects_to_hash():
     rng = np.random.RandomState(42)
     # Being explicit about dtypes in order to avoid
     # architecture-related differences. Also using 'f4' rather than
     # 'f8' for float arrays because 'f8' arrays generated by
     # rng.random.randn don't seem to be bit-identical on 32bit and
     # 64bit machines.
     to_hash_list = [
         rng.randint(-1000, high=1000, size=50).astype('<i8'),
         tuple(rng.randn(3).astype('<f4') for _ in range(5)),
         [rng.randn(3).astype('<f4') for _ in range(5)],
         {
             -3333:
             rng.randn(3, 5).astype('<f4'),
             0: [
                 rng.randint(10, size=20).astype('<i8'),
                 rng.randn(10).astype('<f4')
             ]
         },
         # Non regression cases for
         # https://github.com/joblib/joblib/issues/308
         np.arange(100, dtype='<i8').reshape((10, 10)),
         # Fortran contiguous array
         np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))),
         # Non contiguous array
         np.arange(100, dtype='<i8').reshape((10, 10))[:, :2],
     ]
     return to_hash_list
def test_joblib_pickle_across_python_versions():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the compressed and non compressed
    # pickles in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    pickle_extensions = ('.pkl', '.gz', '.gzip', '.bz2', '.xz', '.lzma', 'lz4')
    pickle_filenames = [os.path.join(test_data_dir, fn)
                        for fn in os.listdir(test_data_dir)
                        if any(fn.endswith(ext) for ext in pickle_extensions)]

    for fname in pickle_filenames:
        _check_pickle(fname, expected_list)
Example #13
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    assert hash(a) == hash(b)
Example #14
0
def test_joblib_decompression_format_support():
    # We need to be specific about dtypes in particular endianness
    # because the pickles can be generated on one architecture and
    # the tests run on another one. See
    # https://github.com/joblib/joblib/issues/279.
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     u"C'est l'\xe9t\xe9 !"]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))

    extensions = ('.gz', '.gzip', '.bz2', '.xz', '.lzma')
    compress_filenames_list = [glob.glob(os.path.join(test_data_dir, ext))
                               for ext in extensions]
    compress_filenames = sum(compress_filenames_list, [])

    for fname in compress_filenames:
        _check_compression_format(fname, expected_list)
Example #15
0
def test_hashes_stay_the_same_with_numpy_objects():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = np.random.RandomState(42)
    # Being explicit about dtypes in order to avoid
    # architecture-related differences. Also using 'f4' rather than
    # 'f8' for float arrays because 'f8' arrays generated by
    # rng.random.randn don't seem to be bit-identical on 32bit and
    # 64bit machines.
    to_hash_list = [
        rng.randint(-1000, high=1000, size=50).astype('<i8'),
        tuple(rng.randn(3).astype('<f4') for _ in range(5)),
        [rng.randn(3).astype('<f4') for _ in range(5)],
        {
            -3333:
            rng.randn(3, 5).astype('<f4'),
            0: [
                rng.randint(10, size=20).astype('<i8'),
                rng.randn(10).astype('<f4')
            ]
        },
        # Non regression cases for https://github.com/joblib/joblib/issues/308.
        # Generated with joblib 0.9.4.
        np.arange(100, dtype='<i8').reshape((10, 10)),
        # Fortran contiguous array
        np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))),
        # Non contiguous array
        np.arange(100, dtype='<i8').reshape((10, 10))[:, :2],
    ]

    # These expected results have been generated with joblib 0.9.0
    expected_dict = {
        'py2': [
            '80f2387e7752abbda2658aafed49e086',
            '0d700f7f25ea670fd305e4cd93b0e8cd',
            '83a2bdf843e79e4b3e26521db73088b9',
            '63e0efd43c0a9ad92a07e8ce04338dd3',
            '03fef702946b602c852b8b4e60929914',
            '07074691e90d7098a85956367045c81e',
            'd264cf79f353aa7bbfa8349e3df72d8f'
        ],
        'py3': [
            '10a6afc379ca2708acfbaef0ab676eab',
            '988a7114f337f381393025911ebc823b',
            'c6809f4b97e35f2fa0ee8d653cbd025c',
            'b3ad17348e32728a7eb9cda1e7ede438',
            '927b3e6b0b6a037e8e035bda134e0b05',
            '108f6ee98e7db19ea2006ffd208f4bf1',
            'bd48ccaaff28e16e6badee81041b7180'
        ]
    }

    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        assert hash(to_hash) == expected
Example #16
0
def test_compressed_pickle_python_2_3_compatibility():
    expected_list = [np.arange(5, dtype=np.int64),
                     np.arange(5, dtype=np.float64),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    # These files have been generated with the
    # joblib/test/data/create_numpy_pickle.py script for the relevant
    # python and joblib versions
    basenames = ['joblib_0.8.4_compressed_pickle_py27.gz',
                 'joblib_0.9.0_compressed_pickle_py27.gz',
                 'joblib_0.8.4_compressed_pickle_py33.gz',
                 'joblib_0.9.0_compressed_pickle_py33.gz',
                 'joblib_0.8.4_compressed_pickle_py34.gz',
                 'joblib_0.9.0_compressed_pickle_py34.gz']
    data_filenames = [os.path.join(test_data_dir, bname)
                      for bname in basenames]

    for fname in data_filenames:
        version_match = re.match(r'.+py(\d)(\d).gz', fname)
        python_version_used_for_writing = tuple(
            [int(each) for each in version_match.groups()])
        python_version_used_for_reading = sys.version_info[:2]

        python_version_to_default_pickle_protocol = {
            (2, 6): 2, (2, 7): 2,
            (3, 0): 3, (3, 1): 3, (3, 2): 3, (3, 3): 3, (3, 4): 4}

        pickle_reading_protocol = python_version_to_default_pickle_protocol[
            python_version_used_for_reading]
        pickle_writing_protocol = python_version_to_default_pickle_protocol[
            python_version_used_for_writing]
        if ('0.8.4' not in fname or
                pickle_reading_protocol >=
                pickle_writing_protocol):
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        else:
            # For joblib <= 0.8.4 compressed pickles written with
            # python `version = v` can not be read by python with
            # `version < v' because of differences in the default
            # pickle protocol (2 for python 2, 3 for python 3.3 and 4
            # for python 3.4)
            try:
                numpy_pickle.load(fname)
                raise AssertionError('Numpy pickle loading should '
                                     'have raised a ValueError exception')
            except ValueError as e:
                nose.tools.assert_true(
                    'unsupported pickle protocol' in str(e.args))
Example #17
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    nose.tools.assert_equal(hash(a),
                            hash(b))
Example #18
0
def test_numpy_datetime_array():
    # memoryview is not supported for some dtypes e.g. datetime64
    # see https://github.com/joblib/joblib/issues/188 for more details
    dtypes = ['datetime64[s]', 'timedelta64[D]']

    a_hash = hash(np.arange(10))
    arrays = (np.arange(0, 10, dtype=dtype) for dtype in dtypes)
    for array in arrays:
        assert hash(array) != a_hash
Example #19
0
def test_numpy_datetime_array():
    # memoryview is not supported for some dtypes e.g. datetime64
    # see https://github.com/joblib/joblib/issues/188 for more details
    dtypes = ['datetime64[s]', 'timedelta64[D]']

    a_hash = hash(np.arange(10))
    arrays = (np.arange(0, 10, dtype=dtype) for dtype in dtypes)
    for array in arrays:
        nose.tools.assert_not_equal(hash(array), a_hash)
Example #20
0
def test_hashes_stay_the_same_with_numpy_objects():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = np.random.RandomState(42)
    # Being explicit about dtypes in order to avoid
    # architecture-related differences. Also using 'f4' rather than
    # 'f8' for float arrays because 'f8' arrays generated by
    # rng.random.randn don't seem to be bit-identical on 32bit and
    # 64bit machines.
    to_hash_list = [
        rng.randint(-1000, high=1000, size=50).astype('<i8'),
        tuple(rng.randn(3).astype('<f4') for _ in range(5)),
        [rng.randn(3).astype('<f4') for _ in range(5)],
        {
            -3333: rng.randn(3, 5).astype('<f4'),
            0: [
                rng.randint(10, size=20).astype('<i8'),
                rng.randn(10).astype('<f4')
            ]
        },
        # Non regression cases for https://github.com/joblib/joblib/issues/308.
        # Generated with joblib 0.9.4.
        np.arange(100, dtype='<i8').reshape((10, 10)),
        # Fortran contiguous array
        np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))),
        # Non contiguous array
        np.arange(100, dtype='<i8').reshape((10, 10))[:, :2],
    ]

    # These expected results have been generated with joblib 0.9.0
    expected_dict = {'py2': ['80f2387e7752abbda2658aafed49e086',
                             '0d700f7f25ea670fd305e4cd93b0e8cd',
                             '83a2bdf843e79e4b3e26521db73088b9',
                             '63e0efd43c0a9ad92a07e8ce04338dd3',
                             '03fef702946b602c852b8b4e60929914',
                             '07074691e90d7098a85956367045c81e',
                             'd264cf79f353aa7bbfa8349e3df72d8f'],
                     'py3': ['10a6afc379ca2708acfbaef0ab676eab',
                             '988a7114f337f381393025911ebc823b',
                             'c6809f4b97e35f2fa0ee8d653cbd025c',
                             'b3ad17348e32728a7eb9cda1e7ede438',
                             '927b3e6b0b6a037e8e035bda134e0b05',
                             '108f6ee98e7db19ea2006ffd208f4bf1',
                             'bd48ccaaff28e16e6badee81041b7180']}

    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        yield assert_equal, hash(to_hash), expected
Example #21
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    nose.tools.assert_not_equal(hash(a), hash(b))

    c = np.asfortranarray(a)
    nose.tools.assert_not_equal(hash(a), hash(c))
Example #22
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    assert hash(a) != hash(b)

    c = np.asfortranarray(a)
    assert hash(a) != hash(c)
Example #23
0
def test_memmapping_temp_folder_thread_safety():
    # Concurrent calls to Parallel with the loky backend will use the same
    # executor, and thus the same reducers. Make sure that those reducers use
    # different temporary folders depending on which Parallel objects called
    # them, which is necessary to limit potential race conditions during the
    # garbage collection of temporary memmaps.
    array = np.arange(int(1e2))

    temp_dirs_thread_1 = set()
    temp_dirs_thread_2 = set()

    def concurrent_get_filename(array, temp_dirs):
        with Parallel(backend='loky', n_jobs=2, max_nbytes=10) as p:
            for i in range(10):
                [filename
                 ] = p(delayed(getattr)(array, 'filename') for _ in range(1))
                temp_dirs.add(os.path.dirname(filename))

    t1 = threading.Thread(target=concurrent_get_filename,
                          args=(array, temp_dirs_thread_1))
    t2 = threading.Thread(target=concurrent_get_filename,
                          args=(array, temp_dirs_thread_2))

    t1.start()
    t2.start()

    t1.join()
    t2.join()

    assert len(temp_dirs_thread_1) == 1
    assert len(temp_dirs_thread_2) == 1

    assert temp_dirs_thread_1 != temp_dirs_thread_2
Example #24
0
def test_managed_backend_reuse_temp_folder(backend):
    # Test that calls to a managed parallel object reuse the same memmaps.
    array = np.arange(int(1e2))
    with Parallel(n_jobs=2, backend=backend, max_nbytes=10) as p:
        [filename_1] = p(delayed(getattr)(array, 'filename') for _ in range(1))
        [filename_2] = p(delayed(getattr)(array, 'filename') for _ in range(1))
    assert os.path.dirname(filename_2) == os.path.dirname(filename_1)
Example #25
0
def test_direct_mmap(tmpdir):
    testfile = str(tmpdir.join('arr.dat'))
    a = np.arange(10, dtype='uint8')
    a.tofile(testfile)

    def _read_array():
        with open(testfile) as fd:
            mm = mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ, offset=0)
        return np.ndarray((10, ), dtype=np.uint8, buffer=mm, offset=0)

    def func(x):
        return x**2

    arr = _read_array()

    # this is expected to work and gives the reference
    ref = Parallel(n_jobs=2)(delayed(func)(x) for x in [a])

    # now test that it work with the mmap array
    results = Parallel(n_jobs=2)(delayed(func)(x) for x in [arr])
    np.testing.assert_array_equal(results, ref)

    # also test with a mmap array read in the subprocess
    def worker():
        return _read_array()

    results = Parallel(n_jobs=2)(delayed(worker)() for _ in range(1))
    np.testing.assert_array_equal(results[0], arr)
Example #26
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)),
                   order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    nose.tools.assert_not_equal(hash(a), hash(b))

    c = np.asfortranarray(a)
    nose.tools.assert_not_equal(hash(a), hash(c))
Example #27
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)),
                   order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    assert hash(a) != hash(b)

    c = np.asfortranarray(a)
    assert hash(a) != hash(c)
Example #28
0
def test_compressed_pickle_dump_and_load():
    expected_list = [np.arange(5, dtype=np.int64),
                     np.arange(5, dtype=np.float64),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        numpy_pickle.dump(expected_list, f.name, compress=1)
        result_list = numpy_pickle.load(f.name)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                nose.tools.assert_equal(result.dtype, expected.dtype)
                np.testing.assert_equal(result, expected)
            else:
                nose.tools.assert_equal(result, expected)
Example #29
0
def test_hashes_stay_the_same_with_numpy_objects():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = np.random.RandomState(42)
    # Being explicit about dtypes in order to avoid
    # architecture-related differences. Also using 'f4' rather than
    # 'f8' for float arrays because 'f8' arrays generated by
    # rng.random.randn don't seem to be bit-identical on 32bit and
    # 64bit machines.
    to_hash_list = [
        rng.randint(-1000, high=1000, size=50).astype('<i8'),
        tuple(rng.randn(3).astype('<f4') for _ in range(5)),
        [rng.randn(3).astype('<f4') for _ in range(5)],
        {
            -3333: rng.randn(3, 5).astype('<f4'),
            0: [
                rng.randint(10, size=20).astype('<i8'),
                rng.randn(10).astype('<f4')
            ]
        },
        # Non regression cases for https://github.com/joblib/joblib/issues/308.
        # Generated with joblib 0.9.4.
        np.arange(100, dtype='<i8').reshape((10, 10)),
        # Fortran contiguous array
        np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))),
        # Non contiguous array
        np.arange(100, dtype='<i8').reshape((10, 10))[:, :2],
    ]

    # These expected results have been generated with joblib 0.9.0
    expected_hashes = [
        '10a6afc379ca2708acfbaef0ab676eab',
        '988a7114f337f381393025911ebc823b',
        'c6809f4b97e35f2fa0ee8d653cbd025c',
        'b3ad17348e32728a7eb9cda1e7ede438',
        '927b3e6b0b6a037e8e035bda134e0b05',
        '108f6ee98e7db19ea2006ffd208f4bf1',
        'bd48ccaaff28e16e6badee81041b7180'
    ]

    for to_hash, expected in zip(to_hash_list, expected_hashes):
        assert hash(to_hash) == expected
Example #30
0
def test_joblib_pickle_across_python_versions():
    expected_list = [np.arange(5, dtype=np.int64),
                     np.arange(5, dtype=np.float64),
                     np.array([1, 'abc', {'a': 1, 'b': 2}]),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    # Testing all the *.gz and *.pkl (compressed and non compressed
    # pickles) in joblib/test/data. These pickles were generated by
    # the joblib/test/data/create_numpy_pickle.py script for the
    # relevant python, joblib and numpy versions.
    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    data_filenames = glob.glob(os.path.join(test_data_dir, '*.gz'))
    data_filenames += glob.glob(os.path.join(test_data_dir, '*.pkl'))

    for fname in data_filenames:
        _check_pickle(fname, expected_list)
Example #31
0
def test_parallel_isolated_temp_folders(backend):
    # Test that consecutive Parallel call use isolated subfolders, even
    # for the loky backend that reuses its executor instance across calls.
    array = np.arange(int(1e2))
    [filename_1] = Parallel(n_jobs=2, backend=backend,
                            max_nbytes=10)(delayed(getattr)(array, 'filename')
                                           for _ in range(1))
    [filename_2] = Parallel(n_jobs=2, backend=backend,
                            max_nbytes=10)(delayed(getattr)(array, 'filename')
                                           for _ in range(1))
    assert os.path.dirname(filename_2) != os.path.dirname(filename_1)
Example #32
0
def test_numpy_array_byte_order_mismatch_detection():
    # List of numpy arrays with big endian byteorder.
    be_arrays = [
        np.array([(1, 2.0), (3, 4.0)], dtype=[('', '>i8'), ('', '>f8')]),
        np.arange(3, dtype=np.dtype('>i8')),
        np.arange(3, dtype=np.dtype('>f8'))
    ]

    # Verify the byteorder mismatch is correctly detected.
    for array in be_arrays:
        if sys.byteorder == 'big':
            assert not _is_numpy_array_byte_order_mismatch(array)
        else:
            assert _is_numpy_array_byte_order_mismatch(array)
        converted = _ensure_native_byte_order(array)
        if converted.dtype.fields:
            for f in converted.dtype.fields.values():
                f[0].byteorder == '='
        else:
            assert converted.dtype.byteorder == "="

    # List of numpy arrays with little endian byteorder.
    le_arrays = [
        np.array([(1, 2.0), (3, 4.0)], dtype=[('', '<i8'), ('', '<f8')]),
        np.arange(3, dtype=np.dtype('<i8')),
        np.arange(3, dtype=np.dtype('<f8'))
    ]

    # Verify the byteorder mismatch is correctly detected.
    for array in le_arrays:
        if sys.byteorder == 'little':
            assert not _is_numpy_array_byte_order_mismatch(array)
        else:
            assert _is_numpy_array_byte_order_mismatch(array)
        converted = _ensure_native_byte_order(array)
        if converted.dtype.fields:
            for f in converted.dtype.fields.values():
                f[0].byteorder == '='
        else:
            assert converted.dtype.byteorder == "="
Example #33
0
def test_compressed_pickle_dump_and_load():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    # Need to test both code branches (whether array size is greater
    # or smaller than cache_size)
    for cache_size in [0, 1e9]:
        try:
            dumped_filenames = numpy_pickle.dump(expected_list,
                                                 fname,
                                                 compress=1,
                                                 cache_size=cache_size)
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        finally:
            for fn in dumped_filenames:
                os.remove(fn)
Example #34
0
def test_compressed_pickle_dump_and_load():
    expected_list = [
        np.arange(5, dtype=np.dtype('<i8')),
        np.arange(5, dtype=np.dtype('>i8')),
        np.arange(5, dtype=np.dtype('<f8')),
        np.arange(5, dtype=np.dtype('>f8')),
        np.array([1, 'abc', {
            'a': 1,
            'b': 2
        }], dtype='O'),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        # np.matrix is a subclass of np.ndarray, here we want
        # to verify this type of object is correctly unpickled
        # among versions.
        np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
        np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
        u"C'est l'\xe9t\xe9 !"
    ]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
        nose.tools.assert_equal(len(dumped_filenames), 1)
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                nose.tools.assert_equal(result.dtype, expected.dtype)
                np.testing.assert_equal(result, expected)
            else:
                nose.tools.assert_equal(result, expected)
    finally:
        os.remove(fname)
Example #35
0
def test_high_dimension_memmap_array_reducing(tmpdir):
    assert_array_equal = np.testing.assert_array_equal

    filename = tmpdir.join('test.mmap').strpath

    # Create a high dimensional memmap
    a = np.memmap(filename,
                  dtype=np.float64,
                  shape=(100, 15, 15, 3),
                  mode='w+')
    a[:] = np.arange(100 * 15 * 15 * 3).reshape(a.shape)

    # Create some slices/indices at various dimensions
    b = a[0:10]
    c = a[:, 5:10]
    d = a[:, :, :, 0]
    e = a[1:3:4]

    # Array reducer with auto dumping disabled
    reducer = ArrayMemmapForwardReducer(None, tmpdir.strpath, 'c', True)

    def reconstruct_array_or_memmap(x):
        cons, args = reducer(x)
        return cons(*args)

    a_reconstructed = reconstruct_array_or_memmap(a)
    assert has_shareable_memory(a_reconstructed)
    assert isinstance(a_reconstructed, np.memmap)
    assert_array_equal(a_reconstructed, a)

    b_reconstructed = reconstruct_array_or_memmap(b)
    assert has_shareable_memory(b_reconstructed)
    assert_array_equal(b_reconstructed, b)

    c_reconstructed = reconstruct_array_or_memmap(c)
    assert has_shareable_memory(c_reconstructed)
    assert_array_equal(c_reconstructed, c)

    d_reconstructed = reconstruct_array_or_memmap(d)
    assert has_shareable_memory(d_reconstructed)
    assert_array_equal(d_reconstructed, d)

    e_reconstructed = reconstruct_array_or_memmap(e)
    assert has_shareable_memory(e_reconstructed)
    assert_array_equal(e_reconstructed, e)
Example #36
0
def test_pickle_in_socket():
    # test that joblib can pickle in sockets
    test_array = np.arange(10)
    _ADDR = ("localhost", 12345)
    listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listener.bind(_ADDR)
    listener.listen(1)

    client = socket.create_connection(_ADDR)
    server, client_addr = listener.accept()

    with server.makefile("wb") as sf:
        numpy_pickle.dump(test_array, sf)

    with client.makefile("rb") as cf:
        array_reloaded = numpy_pickle.load(cf)

    np.testing.assert_array_equal(array_reloaded, test_array)
Example #37
0
def test_high_dimension_memmap_array_reducing():
    assert_array_equal = np.testing.assert_array_equal

    filename = os.path.join(TEMP_FOLDER, 'test.mmap')

    # Create a high dimensional memmap
    a = np.memmap(filename,
                  dtype=np.float64,
                  shape=(100, 15, 15, 3),
                  mode='w+')
    a[:] = np.arange(100 * 15 * 15 * 3).reshape(a.shape)

    # Create some slices/indices at various dimensions
    b = a[0:10]
    c = a[:, 5:10]
    d = a[:, :, :, 0]
    e = a[1:3:4]

    def reconstruct_memmap(x):
        cons, args = reduce_memmap(x)
        res = cons(*args)
        return res

    a_reconstructed = reconstruct_memmap(a)
    assert_true(has_shareable_memory(a_reconstructed))
    assert_true(isinstance(a_reconstructed, np.memmap))
    assert_array_equal(a_reconstructed, a)

    b_reconstructed = reconstruct_memmap(b)
    assert_true(has_shareable_memory(b_reconstructed))
    assert_array_equal(b_reconstructed, b)

    c_reconstructed = reconstruct_memmap(c)
    assert_true(has_shareable_memory(c_reconstructed))
    assert_array_equal(c_reconstructed, c)

    d_reconstructed = reconstruct_memmap(d)
    assert_true(has_shareable_memory(d_reconstructed))
    assert_array_equal(d_reconstructed, d)

    e_reconstructed = reconstruct_memmap(e)
    assert_true(has_shareable_memory(e_reconstructed))
    assert_array_equal(e_reconstructed, e)
Example #38
0
def test_high_dimension_memmap_array_reducing(tmpdir):
    assert_array_equal = np.testing.assert_array_equal

    filename = tmpdir.join('test.mmap').strpath

    # Create a high dimensional memmap
    a = np.memmap(filename, dtype=np.float64, shape=(100, 15, 15, 3),
                  mode='w+')
    a[:] = np.arange(100 * 15 * 15 * 3).reshape(a.shape)

    # Create some slices/indices at various dimensions
    b = a[0:10]
    c = a[:, 5:10]
    d = a[:, :, :, 0]
    e = a[1:3:4]

    def reconstruct_memmap(x):
        cons, args = reduce_memmap(x)
        res = cons(*args)
        return res

    a_reconstructed = reconstruct_memmap(a)
    assert has_shareable_memory(a_reconstructed)
    assert isinstance(a_reconstructed, np.memmap)
    assert_array_equal(a_reconstructed, a)

    b_reconstructed = reconstruct_memmap(b)
    assert has_shareable_memory(b_reconstructed)
    assert_array_equal(b_reconstructed, b)

    c_reconstructed = reconstruct_memmap(c)
    assert has_shareable_memory(c_reconstructed)
    assert_array_equal(c_reconstructed, c)

    d_reconstructed = reconstruct_memmap(d)
    assert has_shareable_memory(d_reconstructed)
    assert_array_equal(d_reconstructed, d)

    e_reconstructed = reconstruct_memmap(e)
    assert has_shareable_memory(e_reconstructed)
    assert_array_equal(e_reconstructed, e)
Example #39
0
def test_pickle_in_socket():
    # test that joblib can pickle in sockets
    if not PY3_OR_LATER:
        raise SkipTest("Cannot peek or seek in socket in python 2.")

    test_array = np.arange(10)
    _ADDR = ("localhost", 12345)
    listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listener.bind(_ADDR)
    listener.listen(1)

    client = socket.create_connection(_ADDR)
    server, client_addr = listener.accept()

    with server.makefile("wb") as sf:
        numpy_pickle.dump(test_array, sf)

    with client.makefile("rb") as cf:
        array_reloaded = numpy_pickle.load(cf)

    np.testing.assert_array_equal(array_reloaded, test_array)
Example #40
0
def test_pickle_in_socket():
    # test that joblib can pickle in sockets
    if not PY3_OR_LATER:
        raise SkipTest("Cannot peek or seek in socket in python 2.")

    test_array = np.arange(10)
    _ADDR = ("10.0.0.7", 12345)
    listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listener.bind(_ADDR)
    listener.listen(1)

    client = socket.create_connection(_ADDR)
    server, client_addr = listener.accept()

    with server.makefile("wb") as sf:
        numpy_pickle.dump(test_array, sf)

    with client.makefile("rb") as cf:
        array_reloaded = numpy_pickle.load(cf)

    np.testing.assert_array_equal(array_reloaded, test_array)
Example #41
0
def test_workaround_against_bad_memmap_with_copied_buffers(tmpdir):
    """Check that memmaps with a bad buffer are returned as regular arrays

    Unary operations and ufuncs on memmap instances return a new memmap
    instance with an in-memory buffer (probably a numpy bug).
    """
    assert_array_equal = np.testing.assert_array_equal

    p = MemmapingPool(3, max_nbytes=10, temp_folder=tmpdir.strpath)
    try:
        # Send a complex, large-ish view on a array that will be converted to
        # a memmap in the worker process
        a = np.asarray(np.arange(6000).reshape((1000, 2, 3)),
                       order='F')[:, :1, :]

        # Call a non-inplace multiply operation on the worker and memmap and
        # send it back to the parent.
        b = p.apply_async(_worker_multiply, args=(a, 3)).get()
        assert not has_shareable_memory(b)
        assert_array_equal(b, 3 * a)
    finally:
        p.terminate()
        del p
Example #42
0
def test_workaround_against_bad_memmap_with_copied_buffers(factory, tmpdir):
    """Check that memmaps with a bad buffer are returned as regular arrays

    Unary operations and ufuncs on memmap instances return a new memmap
    instance with an in-memory buffer (probably a numpy bug).
    """
    assert_array_equal = np.testing.assert_array_equal

    p = factory(3, max_nbytes=10, temp_folder=tmpdir.strpath)
    try:
        # Send a complex, large-ish view on a array that will be converted to
        # a memmap in the worker process
        a = np.asarray(np.arange(6000).reshape((1000, 2, 3)),
                       order='F')[:, :1, :]

        # Call a non-inplace multiply operation on the worker and memmap and
        # send it back to the parent.
        b = p.apply_async(_worker_multiply, args=(a, 3)).get()
        assert not has_shareable_memory(b)
        assert_array_equal(b, 3 * a)
    finally:
        p.terminate()
        del p
Example #43
0
 def __init__(self):
     self.array_float = np.arange(100, dtype='float64')
     self.array_int = np.ones(100, dtype='int32')
     self.array_obj = np.array(['a', 10, 20.0], dtype='object')
Example #44
0
def test_numpy_datetime_array(dtype):
    # memoryview is not supported for some dtypes e.g. datetime64
    # see https://github.com/joblib/joblib/issues/188 for more details
    a_hash = hash(np.arange(10))
    array = np.arange(0, 10, dtype=dtype)
    assert hash(array) != a_hash
Example #45
0
 def __init__(self):
     self.array_float = np.arange(100, dtype='float64')
     self.array_int = np.ones(100, dtype='int32')
     self.array_obj = np.array(['a', 10, 20.0], dtype='object')
Example #46
0
def test_memmap_based_array_reducing(tmpdir):
    """Check that it is possible to reduce a memmap backed array"""
    assert_array_equal = np.testing.assert_array_equal
    filename = tmpdir.join('test.mmap').strpath

    # Create a file larger than what will be used by a
    buffer = np.memmap(filename, dtype=np.float64, shape=500, mode='w+')

    # Fill the original buffer with negative markers to detect over of
    # underflow in case of test failures
    buffer[:] = - 1.0 * np.arange(buffer.shape[0], dtype=buffer.dtype)
    buffer.flush()

    # Memmap a 2D fortran array on a offseted subsection of the previous
    # buffer
    a = np.memmap(filename, dtype=np.float64, shape=(3, 5, 4),
                  mode='r+', order='F', offset=4)
    a[:] = np.arange(60).reshape(a.shape)

    # Build various views that share the buffer with the original memmap

    # b is an memmap sliced view on an memmap instance
    b = a[1:-1, 2:-1, 2:4]

    # c and d are array views
    c = np.asarray(b)
    d = c.T

    # Array reducer with auto dumping disabled
    reducer = ArrayMemmapReducer(None, tmpdir.strpath, 'c')

    def reconstruct_array(x):
        cons, args = reducer(x)
        return cons(*args)

    def reconstruct_memmap(x):
        cons, args = reduce_memmap(x)
        return cons(*args)

    # Reconstruct original memmap
    a_reconstructed = reconstruct_memmap(a)
    assert has_shareable_memory(a_reconstructed)
    assert isinstance(a_reconstructed, np.memmap)
    assert_array_equal(a_reconstructed, a)

    # Reconstruct strided memmap view
    b_reconstructed = reconstruct_memmap(b)
    assert has_shareable_memory(b_reconstructed)
    assert_array_equal(b_reconstructed, b)

    # Reconstruct arrays views on memmap base
    c_reconstructed = reconstruct_array(c)
    assert not isinstance(c_reconstructed, np.memmap)
    assert has_shareable_memory(c_reconstructed)
    assert_array_equal(c_reconstructed, c)

    d_reconstructed = reconstruct_array(d)
    assert not isinstance(d_reconstructed, np.memmap)
    assert has_shareable_memory(d_reconstructed)
    assert_array_equal(d_reconstructed, d)

    # Test graceful degradation on fake memmap instances with in-memory
    # buffers
    a3 = a * 3
    assert not has_shareable_memory(a3)
    a3_reconstructed = reconstruct_memmap(a3)
    assert not has_shareable_memory(a3_reconstructed)
    assert not isinstance(a3_reconstructed, np.memmap)
    assert_array_equal(a3_reconstructed, a * 3)

    # Test graceful degradation on arrays derived from fake memmap instances
    b3 = np.asarray(a3)
    assert not has_shareable_memory(b3)

    b3_reconstructed = reconstruct_array(b3)
    assert isinstance(b3_reconstructed, np.ndarray)
    assert not has_shareable_memory(b3_reconstructed)
    assert_array_equal(b3_reconstructed, b3)
Example #47
0
def test_compressed_pickle_python_2_3_compatibility():
    expected_list = [
        np.arange(5, dtype=np.int64),
        np.arange(5, dtype=np.float64),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
    # These files have been generated with the
    # joblib/test/data/create_numpy_pickle.py script for the relevant
    # python and joblib versions
    basenames = [
        'joblib_0.8.4_compressed_pickle_py27.gz',
        'joblib_0.9.0_compressed_pickle_py27.gz',
        'joblib_0.8.4_compressed_pickle_py33.gz',
        'joblib_0.9.0_compressed_pickle_py33.gz',
        'joblib_0.8.4_compressed_pickle_py34.gz',
        'joblib_0.9.0_compressed_pickle_py34.gz'
    ]
    data_filenames = [
        os.path.join(test_data_dir, bname) for bname in basenames
    ]

    for fname in data_filenames:
        version_match = re.match(r'.+py(\d)(\d).gz', fname)
        py_version_used_for_writing = tuple(
            [int(each) for each in version_match.groups()])
        py_version_used_for_reading = sys.version_info[:2]

        # Use Pickle protocol 4 for Python 3.4 and later
        py_version_to_default_pickle_protocol = {
            (2, 6): 2,
            (2, 7): 2,
            (3, 0): 3,
            (3, 1): 3,
            (3, 2): 3,
            (3, 3): 3
        }
        pickle_reading_protocol = py_version_to_default_pickle_protocol.get(
            py_version_used_for_reading, 4)
        pickle_writing_protocol = py_version_to_default_pickle_protocol.get(
            py_version_used_for_writing, 4)
        if ('0.8.4' not in fname
                or pickle_reading_protocol >= pickle_writing_protocol):
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        else:
            # For joblib <= 0.8.4 compressed pickles written with
            # python `version = v` can not be read by python with
            # `version < v' because of differences in the default
            # pickle protocol (2 for python 2, 3 for python 3.3 and 4
            # for python 3.4)
            try:
                numpy_pickle.load(fname)
                raise AssertionError('Numpy pickle loading should '
                                     'have raised a ValueError exception')
            except ValueError as e:
                nose.tools.assert_true(
                    'unsupported pickle protocol' in str(e.args))
Example #48
0
def test_memmap_based_array_reducing(tmpdir):
    """Check that it is possible to reduce a memmap backed array"""
    assert_array_equal = np.testing.assert_array_equal
    filename = tmpdir.join('test.mmap').strpath

    # Create a file larger than what will be used by a
    buffer = np.memmap(filename, dtype=np.float64, shape=500, mode='w+')

    # Fill the original buffer with negative markers to detect over of
    # underflow in case of test failures
    buffer[:] = -1.0 * np.arange(buffer.shape[0], dtype=buffer.dtype)
    buffer.flush()

    # Memmap a 2D fortran array on a offseted subsection of the previous
    # buffer
    a = np.memmap(filename,
                  dtype=np.float64,
                  shape=(3, 5, 4),
                  mode='r+',
                  order='F',
                  offset=4)
    a[:] = np.arange(60).reshape(a.shape)

    # Build various views that share the buffer with the original memmap

    # b is an memmap sliced view on an memmap instance
    b = a[1:-1, 2:-1, 2:4]

    # c and d are array views
    c = np.asarray(b)
    d = c.T

    # Array reducer with auto dumping disabled
    reducer = ArrayMemmapReducer(None, tmpdir.strpath, 'c')

    def reconstruct_array(x):
        cons, args = reducer(x)
        return cons(*args)

    def reconstruct_memmap(x):
        cons, args = reduce_memmap(x)
        return cons(*args)

    # Reconstruct original memmap
    a_reconstructed = reconstruct_memmap(a)
    assert has_shareable_memory(a_reconstructed)
    assert isinstance(a_reconstructed, np.memmap)
    assert_array_equal(a_reconstructed, a)

    # Reconstruct strided memmap view
    b_reconstructed = reconstruct_memmap(b)
    assert has_shareable_memory(b_reconstructed)
    assert_array_equal(b_reconstructed, b)

    # Reconstruct arrays views on memmap base
    c_reconstructed = reconstruct_array(c)
    assert not isinstance(c_reconstructed, np.memmap)
    assert has_shareable_memory(c_reconstructed)
    assert_array_equal(c_reconstructed, c)

    d_reconstructed = reconstruct_array(d)
    assert not isinstance(d_reconstructed, np.memmap)
    assert has_shareable_memory(d_reconstructed)
    assert_array_equal(d_reconstructed, d)

    # Test graceful degradation on fake memmap instances with in-memory
    # buffers
    a3 = a * 3
    assert not has_shareable_memory(a3)
    a3_reconstructed = reconstruct_memmap(a3)
    assert not has_shareable_memory(a3_reconstructed)
    assert not isinstance(a3_reconstructed, np.memmap)
    assert_array_equal(a3_reconstructed, a * 3)

    # Test graceful degradation on arrays derived from fake memmap instances
    b3 = np.asarray(a3)
    assert not has_shareable_memory(b3)

    b3_reconstructed = reconstruct_array(b3)
    assert isinstance(b3_reconstructed, np.ndarray)
    assert not has_shareable_memory(b3_reconstructed)
    assert_array_equal(b3_reconstructed, b3)
Example #49
0
def test_numpy_datetime_array(dtype):
    # memoryview is not supported for some dtypes e.g. datetime64
    # see https://github.com/joblib/joblib/issues/188 for more details
    a_hash = hash(np.arange(10))
    array = np.arange(0, 10, dtype=dtype)
    assert hash(array) != a_hash