Beispiel #1
0
def test_compress_level_error(wrong_compress):
    # Verify that passing an invalid compress argument raises an error.
    exception_msg = ('Non valid compress level given: '
                     '"{0}"'.format(wrong_compress))
    with raises(ValueError) as excinfo:
        numpy_pickle.dump('dummy', 'foo', compress=wrong_compress)
    excinfo.match(exception_msg)
def cache_value(value, filename, decimal=7):
    """Helper function for checking that a value hasn't changed between
    two invocations.

    First call: write value is a file
    Second call: check that what was written is identical to the value
        provided in the second call.
        TODO: only numpy arrays are compared, other values still have to
        be compared.

    Parameters
    ==========
    value: arbitrary Python value
        this could include numpy objects. Uses persistence from joblib to
        achieve high efficiency.

    """
    from joblib.numpy_pickle import dump, load
    base_dir = os.path.split(filename)[0]
    if not os.path.isdir(base_dir):
        os.makedirs(base_dir)

    if os.path.isfile(filename):
        cached = load(filename)
        np.testing.assert_almost_equal(cached, value, decimal=decimal)
    else:
        dump(value, filename)
Beispiel #3
0
def test_numpy_persistence_bufferred_array_compression():
    big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(big_array, filename, compress=True)
    arr_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(big_array, arr_reloaded)
def test_file_handle_persistence_compressed_mmap():
    obj = np.random.random((10, 10))
    filename = env['filename'] + str(random.randint(0, 1000))

    with open(filename, 'wb') as f:
        numpy_pickle.dump(obj, f, compress=('gzip', 3))

    with closing(gzip.GzipFile(filename, 'rb')) as f:
        with warnings.catch_warnings(record=True) as caught_warnings:
            warnings.simplefilter("always")
            numpy_pickle.load(f, mmap_mode='r+')
            nose.tools.assert_equal(len(caught_warnings), 1)
            for warn in caught_warnings:
                nose.tools.assert_equal(warn.category, UserWarning)
                nose.tools.assert_equal(
                    warn.message.args[0], 'File "%(filename)s" is compressed '
                    'using "%(compressor)s" which is not '
                    'compatible with mmap_mode '
                    '"%(mmap_mode)s" flag '
                    'passed. mmap_mode option will be '
                    'ignored.' % {
                        'filename': "",
                        'mmap_mode': 'r+',
                        'compressor': 'GzipFile'
                    })
Beispiel #5
0
def test_joblib_compression_formats():
    compresslevels = (1, 3, 6)
    filename = env['filename'] + str(random.randint(0, 1000))
    objects = (np.ones(shape=(100, 100), dtype='f8'),
               range(10),
               {'a': 1, 2: 'b'}, [], (), {}, 0, 1.0)

    for compress in compresslevels:
        for cmethod in _COMPRESSORS:
            dump_filename = filename + "." + cmethod
            for obj in objects:
                if not PY3_OR_LATER and cmethod in ('xz', 'lzma'):
                    # Lzma module only available for python >= 3.3
                    msg = "{} compression is only available".format(cmethod)
                    assert_raises_regex(NotImplementedError, msg,
                                        numpy_pickle.dump, obj, dump_filename,
                                        compress=(cmethod, compress))
                else:
                    numpy_pickle.dump(obj, dump_filename,
                                      compress=(cmethod, compress))
                    # Verify the file contains the right magic number
                    with open(dump_filename, 'rb') as f:
                        assert _detect_compressor(f) == cmethod
                    # Verify the reloaded object is correct
                    obj_reloaded = numpy_pickle.load(dump_filename)
                    assert isinstance(obj_reloaded, type(obj))
                    if isinstance(obj, np.ndarray):
                        np.testing.assert_array_equal(obj_reloaded, obj)
                    else:
                        assert obj_reloaded == obj
                    os.remove(dump_filename)
Beispiel #6
0
def test_numpy_subclass(tmpdir):
    filename = tmpdir.join('test.pkl').strpath
    a = SubArray((10, ))
    numpy_pickle.dump(a, filename)
    c = numpy_pickle.load(filename)
    assert isinstance(c, SubArray)
    np.testing.assert_array_equal(c, a)
Beispiel #7
0
def test_numpy_persistence_bufferred_array_compression():
    big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(big_array, filename, compress=True)
    arr_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(big_array, arr_reloaded)
Beispiel #8
0
def test_compress_tuple_argument():
    compress_tuples = (('zlib', 3),
                       ('gzip', 3))

    # Verify the tuple is correctly taken into account.
    filename = env['filename'] + str(random.randint(0, 1000))
    for compress in compress_tuples:
        numpy_pickle.dump("dummy", filename,
                          compress=compress)
        # Verify the file contains the right magic number
        with open(filename, 'rb') as f:
            assert _detect_compressor(f) == compress[0]

    # Verify setting a wrong compress tuple raises a ValueError.
    assert_raises_regex(ValueError,
                        'Compress argument tuple should contain exactly '
                        '2 elements',
                        numpy_pickle.dump, "dummy", filename,
                        compress=('zlib', 3, 'extra'))

    # Verify a tuple with a wrong compress method raises a ValueError.
    msg = 'Non valid compression method given: "{}"'.format('wrong')
    assert_raises_regex(ValueError, msg,
                        numpy_pickle.dump, "dummy", filename,
                        compress=('wrong', 3))

    # Verify a tuple with a wrong compress level raises a ValueError.
    msg = 'Non valid compress level given: "{}"'.format('wrong')
    assert_raises_regex(ValueError, msg,
                        numpy_pickle.dump, "dummy", filename,
                        compress=('zlib', 'wrong'))
Beispiel #9
0
def test_file_handle_persistence():
    objs = [np.random.random((10, 10)),
            "some data",
            np.matrix([0, 1, 2])]
    fobjs = [bz2.BZ2File, gzip.GzipFile]
    if PY3_OR_LATER:
        import lzma
        fobjs += [lzma.LZMAFile]
    filename = env['filename'] + str(random.randint(0, 1000))

    for obj in objs:
        for fobj in fobjs:
            with fobj(filename, 'wb') as f:
                numpy_pickle.dump(obj, f)

            # using the same decompressor prevents from internally
            # decompress again.
            with fobj(filename, 'rb') as f:
                obj_reloaded = numpy_pickle.load(f)

            # when needed, the correct decompressor should be used when
            # passing a raw file handle.
            with open(filename, 'rb') as f:
                obj_reloaded_2 = numpy_pickle.load(f)

            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
                np.testing.assert_array_equal(obj_reloaded_2, obj)
            else:
                assert obj_reloaded == obj
                assert obj_reloaded_2 == obj

            os.remove(filename)
Beispiel #10
0
def test_numpy_persistence_bufferred_array_compression(tmpdir):
    big_array = np.ones((_IO_BUFFER_SIZE + 100), dtype=np.uint8)
    filename = tmpdir.join('test.pkl').strpath
    numpy_pickle.dump(big_array, filename, compress=True)
    arr_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(big_array, arr_reloaded)
Beispiel #11
0
def test_compression_using_file_extension():
    # test that compression method corresponds to the given filename extension.
    extensions_dict = {
        # valid compressor extentions
        '.z': 'zlib',
        '.gz': 'gzip',
        '.bz2': 'bz2',
        '.lzma': 'lzma',
        '.xz': 'xz',
        # invalid compressor extensions
        '.pkl': 'not-compressed',
        '': 'not-compressed'
    }
    filename = env['filename'] + str(random.randint(0, 1000))
    obj = "object to dump"

    for ext, cmethod in extensions_dict.items():
        dump_fname = filename + ext
        if not PY3_OR_LATER and cmethod in ('xz', 'lzma'):
            # Lzma module only available for python >= 3.3
            msg = "{0} compression is only available".format(cmethod)
            assert_raises_regex(NotImplementedError, msg, numpy_pickle.dump,
                                obj, dump_fname)
        else:
            numpy_pickle.dump(obj, dump_fname)
            # Verify the file contains the right magic number
            with open(dump_fname, 'rb') as f:
                nose.tools.assert_equal(_detect_compressor(f), cmethod)
            # Verify the reloaded object is correct
            obj_reloaded = numpy_pickle.load(dump_fname)
            nose.tools.assert_true(isinstance(obj_reloaded, type(obj)))
            nose.tools.assert_equal(obj_reloaded, obj)
            os.remove(dump_fname)
Beispiel #12
0
def test_numpy_subclass():
    filename = env['filename']
    a = SubArray((10,))
    numpy_pickle.dump(a, filename)
    c = numpy_pickle.load(filename)
    assert isinstance(c, SubArray)
    np.testing.assert_array_equal(c, a)
Beispiel #13
0
def test_numpy_subclass():
    filename = env['filename']
    a = SubArray((10, ))
    numpy_pickle.dump(a, filename)
    c = numpy_pickle.load(filename)
    nose.tools.assert_true(isinstance(c, SubArray))
    np.testing.assert_array_equal(c, a)
Beispiel #14
0
def test_joblib_compression_formats(tmpdir, compress, cmethod):
    filename = tmpdir.join('test.pkl').strpath
    objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {
        'a': 1,
        2: 'b'
    }, [], (), {}, 0, 1.0)

    if cmethod in ("lzma", "xz") and lzma is None:
        pytest.skip("lzma is support not available")

    elif cmethod == 'lz4' and with_lz4.args[0]:
        # Skip the test if lz4 is not installed. We here use the with_lz4
        # skipif fixture whose argument is True when lz4 is not installed
        pytest.skip("lz4 is not installed.")

    dump_filename = filename + "." + cmethod
    for obj in objects:
        numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress))
        # Verify the file contains the right magic number
        with open(dump_filename, 'rb') as f:
            assert _detect_compressor(f) == cmethod
        # Verify the reloaded object is correct
        obj_reloaded = numpy_pickle.load(dump_filename)
        assert isinstance(obj_reloaded, type(obj))
        if isinstance(obj, np.ndarray):
            np.testing.assert_array_equal(obj_reloaded, obj)
        else:
            assert obj_reloaded == obj
Beispiel #15
0
def test_compress_string_argument(tmpdir, compress_string):
    # Verify the string is correctly taken into account.
    filename = tmpdir.join('test.pkl').strpath
    numpy_pickle.dump("dummy", filename, compress=compress_string)
    # Verify the file contains the right magic number
    with open(filename, 'rb') as f:
        assert _detect_compressor(f) == compress_string
Beispiel #16
0
def test_file_handle_persistence(tmpdir):
    objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])]
    fobjs = [bz2.BZ2File, gzip.GzipFile]
    if lzma is not None:
        fobjs += [lzma.LZMAFile]
    filename = tmpdir.join('test.pkl').strpath

    for obj in objs:
        for fobj in fobjs:
            with fobj(filename, 'wb') as f:
                numpy_pickle.dump(obj, f)

            # using the same decompressor prevents from internally
            # decompress again.
            with fobj(filename, 'rb') as f:
                obj_reloaded = numpy_pickle.load(f)

            # when needed, the correct decompressor should be used when
            # passing a raw file handle.
            with open(filename, 'rb') as f:
                obj_reloaded_2 = numpy_pickle.load(f)

            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
                np.testing.assert_array_equal(obj_reloaded_2, obj)
            else:
                assert obj_reloaded == obj
                assert obj_reloaded_2 == obj
Beispiel #17
0
def test_compressed_pickle_dump_and_load():
    expected_list = [
        np.arange(5, dtype=np.int64),
        np.arange(5, dtype=np.float64),
        # .tostring actually returns bytes and is a
        # compatibility alias for .tobytes which was
        # added in 1.9.0
        np.arange(256, dtype=np.uint8).tostring(),
        u"C'est l'\xe9t\xe9 !"
    ]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        numpy_pickle.dump(expected_list, fname, compress=1)
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                nose.tools.assert_equal(result.dtype, expected.dtype)
                np.testing.assert_equal(result, expected)
            else:
                nose.tools.assert_equal(result, expected)
    finally:
        os.remove(fname)
Beispiel #18
0
def test_compression_using_file_extension():
    # test that compression method corresponds to the given filename extension.
    extensions_dict = {
        # valid compressor extentions
        '.z': 'zlib',
        '.gz': 'gzip',
        '.bz2': 'bz2',
        '.lzma': 'lzma',
        '.xz': 'xz',
        # invalid compressor extensions
        '.pkl': 'not-compressed',
        '': 'not-compressed'
    }
    filename = env['filename'] + str(random.randint(0, 1000))
    obj = "object to dump"

    for ext, cmethod in extensions_dict.items():
        dump_fname = filename + ext
        if not PY3_OR_LATER and cmethod in ('xz', 'lzma'):
            # Lzma module only available for python >= 3.3
            msg = "{} compression is only available".format(cmethod)
            assert_raises_regex(NotImplementedError, msg,
                                numpy_pickle.dump, obj, dump_fname)
        else:
            numpy_pickle.dump(obj, dump_fname)
            # Verify the file contains the right magic number
            with open(dump_fname, 'rb') as f:
                assert _detect_compressor(f) == cmethod
            # Verify the reloaded object is correct
            obj_reloaded = numpy_pickle.load(dump_fname)
            assert isinstance(obj_reloaded, type(obj))
            assert obj_reloaded == obj
            os.remove(dump_fname)
Beispiel #19
0
def test_file_handle_persistence():
    objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])]
    fobjs = [open]
    if not PY26:
        fobjs += [bz2.BZ2File, gzip.GzipFile]
    if PY3_OR_LATER:
        import lzma
        fobjs += [lzma.LZMAFile]
    filename = env['filename'] + str(random.randint(0, 1000))

    for obj in objs:
        for fobj in fobjs:
            with fobj(filename, 'wb') as f:
                numpy_pickle.dump(obj, f)

            # using the same decompressor prevents from internally
            # decompress again.
            with fobj(filename, 'rb') as f:
                obj_reloaded = numpy_pickle.load(f)

            # when needed, the correct decompressor should be used when
            # passing a raw file handle.
            with open(filename, 'rb') as f:
                obj_reloaded_2 = numpy_pickle.load(f)

            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
                np.testing.assert_array_equal(obj_reloaded_2, obj)
            else:
                nose.tools.assert_equal(obj_reloaded, obj)
                nose.tools.assert_equal(obj_reloaded_2, obj)

            os.remove(filename)
Beispiel #20
0
def test_joblib_compression_formats(tmpdir, compress, cmethod):
    filename = tmpdir.join('test.pkl').strpath
    objects = (np.ones(shape=(100, 100), dtype='f8'), range(10), {
        'a': 1,
        2: 'b'
    }, [], (), {}, 0, 1.0)

    dump_filename = filename + "." + cmethod
    for obj in objects:
        if not PY3_OR_LATER and cmethod in ('lzma', 'xz', 'lz4'):
            # Lzma module only available for python >= 3.3
            msg = "{} compression is only available".format(cmethod)
            error = NotImplementedError
            if cmethod == 'lz4':
                error = ValueError
            with raises(error) as excinfo:
                numpy_pickle.dump(obj,
                                  dump_filename,
                                  compress=(cmethod, compress))
            excinfo.match(msg)
        else:
            numpy_pickle.dump(obj, dump_filename, compress=(cmethod, compress))
            # Verify the file contains the right magic number
            with open(dump_filename, 'rb') as f:
                assert _detect_compressor(f) == cmethod
            # Verify the reloaded object is correct
            obj_reloaded = numpy_pickle.load(dump_filename)
            assert isinstance(obj_reloaded, type(obj))
            if isinstance(obj, np.ndarray):
                np.testing.assert_array_equal(obj_reloaded, obj)
            else:
                assert obj_reloaded == obj
Beispiel #21
0
def test_numpy_persistence():
    filename = env['filename']
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    for compress in (False, True, 0, 3):
        # We use 'a.T' to have a non C-contiguous array.
        for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])):
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))

            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress)

            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)
            # Check that only one file was created
            nose.tools.assert_equal(filenames[0], this_filename)
            # Check that this file does exist
            nose.tools.assert_true(
                os.path.exists(os.path.join(env['dir'], filenames[0])))

            # Unpickle the object
            obj_ = numpy_pickle.load(this_filename)
            # Check that the items are indeed arrays
            for item in obj_:
                nose.tools.assert_true(isinstance(item, np.ndarray))
            # And finally, check that all the values are equal.
            np.testing.assert_array_equal(np.array(obj), np.array(obj_))

        # Now test with array subclasses
        for obj in (np.matrix(np.zeros(10)),
                    np.memmap(filename + str(random.randint(0, 1000)) + 'mmap',
                              mode='w+', shape=4, dtype=np.float)):
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress)
            # All is cached in one file
            nose.tools.assert_equal(len(filenames), 1)

            obj_ = numpy_pickle.load(this_filename)
            if (type(obj) is not np.memmap and
                    hasattr(obj, '__array_prepare__')):
                # We don't reconstruct memmaps
                nose.tools.assert_true(isinstance(obj_, type(obj)))

            np.testing.assert_array_equal(obj_, obj)

        # Test with an object containing multiple numpy arrays
        obj = ComplexTestObject()
        filenames = numpy_pickle.dump(obj, this_filename,
                                      compress=compress)
        # All is cached in one file
        nose.tools.assert_equal(len(filenames), 1)

        obj_loaded = numpy_pickle.load(this_filename)
        nose.tools.assert_true(isinstance(obj_loaded, type(obj)))
        np.testing.assert_array_equal(obj_loaded.array_float, obj.array_float)
        np.testing.assert_array_equal(obj_loaded.array_int, obj.array_int)
        np.testing.assert_array_equal(obj_loaded.array_obj, obj.array_obj)
Beispiel #22
0
def test_memmap_persistence():
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')

    nose.tools.assert_true(isinstance(b, np.memmap))
Beispiel #23
0
def test_memmap_persistence():
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')
    if [int(x) for x in np.__version__.split('.', 2)[:2]] >= [1, 3]:
        nose.tools.assert_true(isinstance(b, np.memmap))
Beispiel #24
0
 def persist_input(self, args_tuple, kwargs_dict, filtered_args_dict):
     DirectoryJob.persist_input(self, args_tuple, kwargs_dict,
                                filtered_args_dict)
     call_info = dict(func=self.func,
                      version_info=self.func.version_info,
                      args=args_tuple,
                      kwargs=kwargs_dict)
     numpy_pickle.dump(call_info, pjoin(self._work_path, 'input.pkl'))
Beispiel #25
0
def test_memmap_persistence():
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')

    nose.tools.assert_true(isinstance(b, np.memmap))
Beispiel #26
0
def test_standard_types(tmpdir, compress, member):
    # Test pickling and saving with standard types.
    filename = tmpdir.join('test.pkl').strpath
    numpy_pickle.dump(member, filename, compress=compress)
    _member = numpy_pickle.load(filename)
    # We compare the pickled instance to the reloaded one only if it
    # can be compared to a copied one
    if member == copy.deepcopy(member):
        assert member == _member
Beispiel #27
0
def test_masked_array_persistence():
    # The special-case picker fails, because saving masked_array
    # not implemented, but it just delegates to the standard pickler.
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    a = np.ma.masked_greater(a, 0.5)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')
    nose.tools.assert_true(isinstance(b, np.ma.masked_array))
Beispiel #28
0
def atomic_pickle(data, path, filename):
    fd, workfile = tempfile.mkstemp(prefix=filename + '-', dir=path)
    try:
        os.close(fd)
        numpy_pickle.dump(data, workfile)
        os.rename(workfile, pjoin(path, filename))
    except:
        if os.path.exists(workfile):
            os.unlink(workfile)
        raise
Beispiel #29
0
def atomic_pickle(data, path, filename):
    fd, workfile = tempfile.mkstemp(prefix=filename + '-', dir=path)
    try:
        os.close(fd)
        numpy_pickle.dump(data, workfile)
        os.rename(workfile, pjoin(path, filename))
    except:
        if os.path.exists(workfile):
            os.unlink(workfile)
        raise
Beispiel #30
0
def test_in_memory_persistence():
    objs = [np.random.random((10, 10)), "some data", np.matrix([0, 1, 2])]
    for obj in objs:
        f = io.BytesIO()
        numpy_pickle.dump(obj, f)
        obj_reloaded = numpy_pickle.load(f)
        if isinstance(obj, np.ndarray):
            np.testing.assert_array_equal(obj_reloaded, obj)
        else:
            assert obj_reloaded == obj
Beispiel #31
0
def test_masked_array_persistence():
    # The special-case picker fails, because saving masked_array
    # not implemented, but it just delegates to the standard pickler.
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    a = np.ma.masked_greater(a, 0.5)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')
    assert isinstance(b, np.ma.masked_array)
Beispiel #32
0
def test_masked_array_persistence(tmpdir):
    # The special-case picker fails, because saving masked_array
    # not implemented, but it just delegates to the standard pickler.
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    a = np.ma.masked_greater(a, 0.5)
    filename = tmpdir.join('test.pkl').strpath
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')
    assert isinstance(b, np.ma.masked_array)
Beispiel #33
0
def test_file_handle_persistence_mmap(tmpdir):
    obj = np.random.random((10, 10))
    filename = tmpdir.join('test.pkl').strpath

    with open(filename, 'wb') as f:
        numpy_pickle.dump(obj, f)

    with open(filename, 'rb') as f:
        obj_reloaded = numpy_pickle.load(f, mmap_mode='r+')

    np.testing.assert_array_equal(obj_reloaded, obj)
Beispiel #34
0
def test_lz4_compression_without_lz4(tmpdir):
    # Check that lz4 cannot be used when dependency is not available.
    fname = tmpdir.join('test.nolz4').strpath
    data = 'test data'
    with raises(ValueError) as excinfo:
        numpy_pickle.dump(data, fname, compress='lz4')
    excinfo.match(LZ4_NOT_INSTALLED_ERROR)

    with raises(ValueError) as excinfo:
        numpy_pickle.dump(data, fname + '.lz4')
    excinfo.match(LZ4_NOT_INSTALLED_ERROR)
Beispiel #35
0
def test_file_handle_persistence_mmap():
    obj = np.random.random((10, 10))
    filename = env['filename'] + str(random.randint(0, 1000))

    with open(filename, 'wb') as f:
        numpy_pickle.dump(obj, f)

    with open(filename, 'rb') as f:
        obj_reloaded = numpy_pickle.load(f, mmap_mode='r+')

    np.testing.assert_array_equal(obj_reloaded, obj)
Beispiel #36
0
def test_file_handle_persistence_mmap():
    obj = np.random.random((10, 10))
    filename = env['filename'] + str(random.randint(0, 1000))

    with open(filename, 'wb') as f:
        numpy_pickle.dump(obj, f)

    with open(filename, 'rb') as f:
        obj_reloaded = numpy_pickle.load(f, mmap_mode='r+')

    np.testing.assert_array_equal(obj_reloaded, obj)
Beispiel #37
0
def test_pickle_highest_protocol():
    # ensure persistence of a numpy array is valid even when using
    # the pickle HIGHEST_PROTOCOL.
    # see https://github.com/joblib/joblib/issues/362

    filename = env['filename'] + str(random.randint(0, 1000))
    test_array = np.zeros(10)

    numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL)
    array_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(array_reloaded, test_array)
Beispiel #38
0
def test_pathlib():
    try:
        from pathlib import Path
    except ImportError:
        pass
    else:
        filename = env['filename']
        value = 123
        numpy_pickle.dump(value, Path(filename))
        nose.tools.assert_equal(numpy_pickle.load(filename), value)
        numpy_pickle.dump(value, filename)
        nose.tools.assert_equal(numpy_pickle.load(Path(filename)), value)
Beispiel #39
0
def test_in_memory_persistence():
    objs = [np.random.random((10, 10)),
            "some data",
            np.matrix([0, 1, 2])]
    for obj in objs:
        f = io.BytesIO()
        numpy_pickle.dump(obj, f)
        obj_reloaded = numpy_pickle.load(f)
        if isinstance(obj, np.ndarray):
            np.testing.assert_array_equal(obj_reloaded, obj)
        else:
            assert obj_reloaded == obj
Beispiel #40
0
def test_memmap_with_padding(tmpdir):
    # Test that memmaped arrays return by numpy.load are correctly aligned

    fname = tmpdir.join('test.mmap').strpath
    arr = np.random.randn(10)
    numpy_pickle.dump(arr, fname)
    memmap = numpy_pickle.load(fname, mmap_mode='r')

    assert isinstance(memmap, np.memmap)
    np.testing.assert_array_equal(arr, memmap)

    assert isinstance(memmap.ctypes.data / 8, int)
Beispiel #41
0
def test_pathlib():
    try:
        from pathlib import Path
    except ImportError:
        pass
    else:
        filename = env['filename']
        value = 123
        numpy_pickle.dump(value, Path(filename))
        assert numpy_pickle.load(filename) == value
        numpy_pickle.dump(value, filename)
        assert numpy_pickle.load(Path(filename)) == value
Beispiel #42
0
def test_pickle_highest_protocol(tmpdir):
    # ensure persistence of a numpy array is valid even when using
    # the pickle HIGHEST_PROTOCOL.
    # see https://github.com/joblib/joblib/issues/362

    filename = tmpdir.join('test.pkl').strpath
    test_array = np.zeros(10)

    numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL)
    array_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(array_reloaded, test_array)
Beispiel #43
0
def test_pathlib(tmpdir):
    try:
        from pathlib import Path
    except ImportError:
        pass
    else:
        filename = tmpdir.join('test.pkl').strpath
        value = 123
        numpy_pickle.dump(value, Path(filename))
        assert numpy_pickle.load(filename) == value
        numpy_pickle.dump(value, filename)
        assert numpy_pickle.load(Path(filename)) == value
Beispiel #44
0
def test_pickle_highest_protocol():
    # ensure persistence of a numpy array is valid even when using
    # the pickle HIGHEST_PROTOCOL.
    # see https://github.com/joblib/joblib/issues/362

    filename = env['filename'] + str(random.randint(0, 1000))
    test_array = np.zeros(10)

    numpy_pickle.dump(test_array, filename, protocol=pickle.HIGHEST_PROTOCOL)
    array_reloaded = numpy_pickle.load(filename)

    np.testing.assert_array_equal(array_reloaded, test_array)
Beispiel #45
0
def test_pathlib():
    try:
        from pathlib import Path
    except ImportError:
        pass
    else:
        filename = env['filename']
        value = 123
        numpy_pickle.dump(value, Path(filename))
        nose.tools.assert_equal(numpy_pickle.load(filename), value)
        numpy_pickle.dump(value, filename)
        nose.tools.assert_equal(numpy_pickle.load(Path(filename)), value)
def test_pathlib():
    try:
        from pathlib import Path
    except ImportError:
        pass
    else:
        filename = env['filename']
        value = 123
        numpy_pickle.dump(value, Path(filename))
        assert numpy_pickle.load(filename) == value
        numpy_pickle.dump(value, filename)
        assert numpy_pickle.load(Path(filename)) == value
Beispiel #47
0
def test_load_memmap_with_big_offset(tmpdir):
    # Test that numpy memmap offset is set correctly if greater than
    # mmap.ALLOCATIONGRANULARITY, see
    # https://github.com/joblib/joblib/issues/451 and
    # https://github.com/numpy/numpy/pull/8443 for more details.
    fname = tmpdir.join('test.mmap').strpath
    size = mmap.ALLOCATIONGRANULARITY
    obj = [np.zeros(size, dtype='uint8'), np.ones(size, dtype='uint8')]
    numpy_pickle.dump(obj, fname)
    memmaps = numpy_pickle.load(fname, mmap_mode='r')
    assert isinstance(memmaps[1], np.memmap)
    assert memmaps[1].offset > size
    np.testing.assert_array_equal(obj, memmaps)
Beispiel #48
0
def test_standard_types():
    # Test pickling and saving with standard types.
    filename = env['filename']
    for compress in [0, 1]:
        for member in typelist:
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))
            numpy_pickle.dump(member, this_filename, compress=compress)
            _member = numpy_pickle.load(this_filename)
            # We compare the pickled instance to the reloaded one only if it
            # can be compared to a copied one
            if member == copy.deepcopy(member):
                yield assert_equal, member, _member
Beispiel #49
0
def test_non_contiguous_array_pickling():
    filename = env['filename'] + str(random.randint(0, 1000))

    for array in [  # Array that triggers a contiguousness issue with nditer,
                    # see https://github.com/joblib/joblib/pull/352 and see
                    # https://github.com/joblib/joblib/pull/353
                    np.asfortranarray([[1, 2], [3, 4]])[1:],
                    # Non contiguous array with works fine with nditer
                    np.ones((10, 50, 20), order='F')[:, :1, :]]:
        assert not array.flags.c_contiguous
        assert not array.flags.f_contiguous
        numpy_pickle.dump(array, filename)
        array_reloaded = numpy_pickle.load(filename)
        np.testing.assert_array_equal(array_reloaded, array)
        os.remove(filename)
Beispiel #50
0
def test_numpy_persistence():
    filename = env['filename']
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))
    for compress, cache_size in ((0, 0), (1, 0), (1, 10)):
        # We use 'a.T' to have a non C-contiguous array.
        for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])):
            # Change the file name to avoid side effects between tests
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress,
                                          cache_size=cache_size)
            # Check that one file was created per array
            if not compress:
                nose.tools.assert_equal(len(filenames), len(obj) + 1)
            # Check that these files do exist
            for file in filenames:
                nose.tools.assert_true(
                    os.path.exists(os.path.join(env['dir'], file)))

            # Unpickle the object
            obj_ = numpy_pickle.load(this_filename)
            # Check that the items are indeed arrays
            for item in obj_:
                nose.tools.assert_true(isinstance(item, np.ndarray))
            # And finally, check that all the values are equal.
            nose.tools.assert_true(np.all(np.array(obj) ==
                                                np.array(obj_)))

        # Now test with array subclasses
        for obj in (
                    np.matrix(np.zeros(10)),
                    np.core.multiarray._reconstruct(np.memmap, (), np.float)
                   ):
            this_filename = filename + str(random.randint(0, 1000))
            filenames = numpy_pickle.dump(obj, this_filename,
                                          compress=compress,
                                          cache_size=cache_size)
            obj_ = numpy_pickle.load(this_filename)
            if (type(obj) is not np.memmap
                        and hasattr(obj, '__array_prepare__')):
                # We don't reconstruct memmaps
                nose.tools.assert_true(isinstance(obj_, type(obj)))

    # Finally smoke test the warning in case of compress + mmap_mode
    this_filename = filename + str(random.randint(0, 1000))
    numpy_pickle.dump(a, this_filename, compress=1)
    numpy_pickle.load(this_filename, mmap_mode='r')
Beispiel #51
0
def test_compressed_pickle_dump_and_load():
    # XXX: temporarily disable this test on non little-endian machines
    if sys.byteorder != 'little':
        raise nose.SkipTest('Skipping this test on non little-endian machines')

    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    # Need to test both code branches (whether array size is greater
    # or smaller than cache_size)
    for cache_size in [0, 1e9]:
        try:
            dumped_filenames = numpy_pickle.dump(
                expected_list, fname, compress=1,
                cache_size=cache_size)
            result_list = numpy_pickle.load(fname)
            for result, expected in zip(result_list, expected_list):
                if isinstance(expected, np.ndarray):
                    nose.tools.assert_equal(result.dtype, expected.dtype)
                    np.testing.assert_equal(result, expected)
                else:
                    nose.tools.assert_equal(result, expected)
        finally:
            for fn in dumped_filenames:
                os.remove(fn)
Beispiel #52
0
def test_compressed_pickle_dump_and_load():
    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
                     np.arange(5, dtype=np.dtype('>i8')),
                     np.arange(5, dtype=np.dtype('<f8')),
                     np.arange(5, dtype=np.dtype('>f8')),
                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     # np.matrix is a subclass of np.ndarray, here we want
                     # to verify this type of object is correctly unpickled
                     # among versions.
                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
                     np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        fname = f.name

    try:
        dumped_filenames = numpy_pickle.dump(expected_list, fname, compress=1)
        assert len(dumped_filenames) == 1
        result_list = numpy_pickle.load(fname)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                assert result.dtype == expected.dtype
                np.testing.assert_equal(result, expected)
            else:
                assert result == expected
    finally:
        os.remove(fname)
Beispiel #53
0
def test_file_handle_persistence_in_memory_mmap():
    obj = np.random.random((10, 10))
    buf = io.BytesIO()

    numpy_pickle.dump(obj, buf)

    with warnings.catch_warnings(record=True) as caught_warnings:
        warnings.simplefilter("always")
        numpy_pickle.load(buf, mmap_mode='r+')
        assert len(caught_warnings) == 1
        for warn in caught_warnings:
            assert warn.category == UserWarning
            assert (warn.message.args[0] ==
                    'In memory persistence is not compatible with '
                    'mmap_mode "%(mmap_mode)s" flag passed. mmap_mode '
                    'option will be ignored.' % {'mmap_mode': 'r+'})
Beispiel #54
0
def test_memmap_persistence_mixed_dtypes():
    # loading datastructures that have sub-arrays with dtype=object
    # should not prevent memmaping on fixed size dtype sub-arrays.
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    b = np.array([1, 'b'], dtype=object)
    construct = (a, b)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(construct, filename)
    a_clone, b_clone = numpy_pickle.load(filename, mmap_mode='r')

    # the floating point array has been memory mapped
    assert isinstance(a_clone, np.memmap)

    # the object-dtype array has been loaded in memory
    assert not isinstance(b_clone, np.memmap)
Beispiel #55
0
def test_memmap_persistence():
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, filename)
    b = numpy_pickle.load(filename, mmap_mode='r')

    assert isinstance(b, np.memmap)

    # Test with an object containing multiple numpy arrays
    filename = env['filename'] + str(random.randint(0, 1000))
    obj = ComplexTestObject()
    numpy_pickle.dump(obj, filename)
    obj_loaded = numpy_pickle.load(filename, mmap_mode='r')
    assert isinstance(obj_loaded, type(obj))
    assert isinstance(obj_loaded.array_float, np.memmap)
    assert not obj_loaded.array_float.flags.writeable
    assert isinstance(obj_loaded.array_int, np.memmap)
    assert not obj_loaded.array_int.flags.writeable
    # Memory map not allowed for numpy object arrays
    assert not isinstance(obj_loaded.array_obj, np.memmap)
    np.testing.assert_array_equal(obj_loaded.array_float,
                                  obj.array_float)
    np.testing.assert_array_equal(obj_loaded.array_int,
                                  obj.array_int)
    np.testing.assert_array_equal(obj_loaded.array_obj,
                                  obj.array_obj)

    # Test we can write in memmaped arrays
    obj_loaded = numpy_pickle.load(filename, mmap_mode='r+')
    assert obj_loaded.array_float.flags.writeable
    obj_loaded.array_float[0:10] = 10.0
    assert obj_loaded.array_int.flags.writeable
    obj_loaded.array_int[0:10] = 10

    obj_reloaded = numpy_pickle.load(filename, mmap_mode='r')
    np.testing.assert_array_equal(obj_reloaded.array_float,
                                  obj_loaded.array_float)
    np.testing.assert_array_equal(obj_reloaded.array_int,
                                  obj_loaded.array_int)

    # Test w+ mode is caught and the mode has switched to r+
    numpy_pickle.load(filename, mmap_mode='w+')
    assert obj_loaded.array_int.flags.writeable
    assert obj_loaded.array_int.mode == 'r+'
    assert obj_loaded.array_float.flags.writeable
    assert obj_loaded.array_float.mode == 'r+'
Beispiel #56
0
def test_compress_mmap_mode_warning():
    # Test the warning in case of compress + mmap_mode
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    this_filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, this_filename, compress=1)
    with warnings.catch_warnings(record=True) as caught_warnings:
        warnings.simplefilter("always")
        numpy_pickle.load(this_filename, mmap_mode='r+')
        assert len(caught_warnings) == 1
        for warn in caught_warnings:
            assert warn.category == UserWarning
            assert (warn.message.args[0] ==
                    'mmap_mode "%(mmap_mode)s" is not compatible with '
                    'compressed file %(filename)s. "%(mmap_mode)s" flag will '
                    'be ignored.' % {'filename': this_filename,
                                     'mmap_mode': 'r+'})
Beispiel #57
0
def test_file_handle_persistence_compressed_mmap():
    obj = np.random.random((10, 10))
    filename = env['filename'] + str(random.randint(0, 1000))

    with open(filename, 'wb') as f:
        numpy_pickle.dump(obj, f, compress=('gzip', 3))

    with closing(gzip.GzipFile(filename, 'rb')) as f:
        with warnings.catch_warnings(record=True) as caught_warnings:
            warnings.simplefilter("always")
            numpy_pickle.load(f, mmap_mode='r+')
            assert len(caught_warnings) == 1
            for warn in caught_warnings:
                assert warn.category == UserWarning
                assert (warn.message.args[0] ==
                        '"%(fileobj)r" is not a raw file, mmap_mode '
                        '"%(mmap_mode)s" flag will be ignored.'
                        % {'fileobj': f, 'mmap_mode': 'r+'})
Beispiel #58
0
def test_cache_size_warning():
    # Check deprecation warning raised when cache size is not None
    filename = env['filename'] + str(random.randint(0, 1000))
    rnd = np.random.RandomState(0)
    a = rnd.random_sample((10, 2))

    for cache_size in (None, 0, 10):
        with warnings.catch_warnings(record=True) as caught_warnings:
            warnings.simplefilter("always")
            numpy_pickle.dump(a, filename, cache_size=cache_size)
            expected_nb_warnings = 1 if cache_size is not None else 0
            assert len(caught_warnings) == expected_nb_warnings
            for warn in caught_warnings:
                assert warn.category == DeprecationWarning
                assert (warn.message.args[0] ==
                        "Please do not set 'cache_size' in joblib.dump, this "
                        "parameter has no effect and will be removed. You "
                        "used 'cache_size={0}'".format(cache_size))
Beispiel #59
0
def test_compressed_pickle_dump_and_load():
    expected_list = [np.arange(5, dtype=np.int64),
                     np.arange(5, dtype=np.float64),
                     # .tostring actually returns bytes and is a
                     # compatibility alias for .tobytes which was
                     # added in 1.9.0
                     np.arange(256, dtype=np.uint8).tostring(),
                     u"C'est l'\xe9t\xe9 !"]

    with tempfile.NamedTemporaryFile(suffix='.gz', dir=env['dir']) as f:
        numpy_pickle.dump(expected_list, f.name, compress=1)
        result_list = numpy_pickle.load(f.name)
        for result, expected in zip(result_list, expected_list):
            if isinstance(expected, np.ndarray):
                nose.tools.assert_equal(result.dtype, expected.dtype)
                np.testing.assert_equal(result, expected)
            else:
                nose.tools.assert_equal(result, expected)
Beispiel #60
0
def test_compress_mmap_mode_warning():
    # Test the warning in case of compress + mmap_mode
    rnd = np.random.RandomState(0)
    a = rnd.random_sample(10)
    this_filename = env['filename'] + str(random.randint(0, 1000))
    numpy_pickle.dump(a, this_filename, compress=1)
    with warnings.catch_warnings(record=True) as caught_warnings:
        warnings.simplefilter("always")
        numpy_pickle.load(this_filename, mmap_mode='r+')
        nose.tools.assert_equal(len(caught_warnings), 1)
        for warn in caught_warnings:
            nose.tools.assert_equal(warn.category, DeprecationWarning)
            nose.tools.assert_equal(warn.message.args[0],
                                    'File "%(filename)s" is compressed using '
                                    '"%(compressor)s" which is not compatible '
                                    'with mmap_mode "%(mmap_mode)s" flag '
                                    'passed.' % {'filename': this_filename,
                                                 'mmap_mode': 'r+',
                                                 'compressor': 'zlib'})