def test_hashes_are_different_between_c_and_fortran_contiguous_arrays(): # We want to be sure that the c-contiguous and f-contiguous versions of the # same array produce 2 different hashes. rng = np.random.RandomState(0) arr_c = rng.random_sample((10, 10)) arr_f = np.asfortranarray(arr_c) assert hash(arr_c) != hash(arr_f)
def test_hash_numpy_noncontiguous(): a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :] b = np.ascontiguousarray(a) nose.tools.assert_not_equal(hash(a), hash(b)) c = np.asfortranarray(a) nose.tools.assert_not_equal(hash(a), hash(c))
def test_hash_numpy_noncontiguous(): a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :] b = np.ascontiguousarray(a) assert hash(a) != hash(b) c = np.asfortranarray(a) assert hash(a) != hash(c)
def create_objects_to_hash(): rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, # Non regression cases for # https://github.com/joblib/joblib/issues/308 np.arange(100, dtype='<i8').reshape((10, 10)), # Fortran contiguous array np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))), # Non contiguous array np.arange(100, dtype='<i8').reshape((10, 10))[:, :2], ] return to_hash_list
def test_hashes_stay_the_same_with_numpy_objects(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, # Non regression cases for https://github.com/joblib/joblib/issues/308. # Generated with joblib 0.9.4. np.arange(100, dtype='<i8').reshape((10, 10)), # Fortran contiguous array np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))), # Non contiguous array np.arange(100, dtype='<i8').reshape((10, 10))[:, :2], ] # These expected results have been generated with joblib 0.9.0 expected_dict = { 'py2': [ '80f2387e7752abbda2658aafed49e086', '0d700f7f25ea670fd305e4cd93b0e8cd', '83a2bdf843e79e4b3e26521db73088b9', '63e0efd43c0a9ad92a07e8ce04338dd3', '03fef702946b602c852b8b4e60929914', '07074691e90d7098a85956367045c81e', 'd264cf79f353aa7bbfa8349e3df72d8f' ], 'py3': [ '10a6afc379ca2708acfbaef0ab676eab', '988a7114f337f381393025911ebc823b', 'c6809f4b97e35f2fa0ee8d653cbd025c', 'b3ad17348e32728a7eb9cda1e7ede438', '927b3e6b0b6a037e8e035bda134e0b05', '108f6ee98e7db19ea2006ffd208f4bf1', 'bd48ccaaff28e16e6badee81041b7180' ] } py_version_str = 'py3' if PY3_OR_LATER else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): assert hash(to_hash) == expected
def test_hashes_stay_the_same_with_numpy_objects(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, # Non regression cases for https://github.com/joblib/joblib/issues/308. # Generated with joblib 0.9.4. np.arange(100, dtype='<i8').reshape((10, 10)), # Fortran contiguous array np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))), # Non contiguous array np.arange(100, dtype='<i8').reshape((10, 10))[:, :2], ] # These expected results have been generated with joblib 0.9.0 expected_dict = {'py2': ['80f2387e7752abbda2658aafed49e086', '0d700f7f25ea670fd305e4cd93b0e8cd', '83a2bdf843e79e4b3e26521db73088b9', '63e0efd43c0a9ad92a07e8ce04338dd3', '03fef702946b602c852b8b4e60929914', '07074691e90d7098a85956367045c81e', 'd264cf79f353aa7bbfa8349e3df72d8f'], 'py3': ['10a6afc379ca2708acfbaef0ab676eab', '988a7114f337f381393025911ebc823b', 'c6809f4b97e35f2fa0ee8d653cbd025c', 'b3ad17348e32728a7eb9cda1e7ede438', '927b3e6b0b6a037e8e035bda134e0b05', '108f6ee98e7db19ea2006ffd208f4bf1', 'bd48ccaaff28e16e6badee81041b7180']} py_version_str = 'py3' if PY3_OR_LATER else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): yield assert_equal, hash(to_hash), expected
def test_non_contiguous_array_pickling(tmpdir): filename = tmpdir.join('test.pkl').strpath for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :]]: assert not array.flags.c_contiguous assert not array.flags.f_contiguous numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array)
def test_non_contiguous_array_pickling(): filename = env['filename'] + str(random.randint(0, 1000)) for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :]]: assert not array.flags.c_contiguous assert not array.flags.f_contiguous numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array) os.remove(filename)
def test_non_contiguous_array_pickling(): filename = env['filename'] + str(random.randint(0, 1000)) for array in [ # Array that triggers a contiguousness issue with nditer, # see https://github.com/joblib/joblib/pull/352 and see # https://github.com/joblib/joblib/pull/353 np.asfortranarray([[1, 2], [3, 4]])[1:], # Non contiguous array with works fine with nditer np.ones((10, 50, 20), order='F')[:, :1, :] ]: nose.tools.assert_false(array.flags.c_contiguous) nose.tools.assert_false(array.flags.f_contiguous) numpy_pickle.dump(array, filename) array_reloaded = numpy_pickle.load(filename) np.testing.assert_array_equal(array_reloaded, array) os.remove(filename)
def test_hashes_stay_the_same_with_numpy_objects(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, # Non regression cases for https://github.com/joblib/joblib/issues/308. # Generated with joblib 0.9.4. np.arange(100, dtype='<i8').reshape((10, 10)), # Fortran contiguous array np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))), # Non contiguous array np.arange(100, dtype='<i8').reshape((10, 10))[:, :2], ] # These expected results have been generated with joblib 0.9.0 expected_hashes = [ '10a6afc379ca2708acfbaef0ab676eab', '988a7114f337f381393025911ebc823b', 'c6809f4b97e35f2fa0ee8d653cbd025c', 'b3ad17348e32728a7eb9cda1e7ede438', '927b3e6b0b6a037e8e035bda134e0b05', '108f6ee98e7db19ea2006ffd208f4bf1', 'bd48ccaaff28e16e6badee81041b7180' ] for to_hash, expected in zip(to_hash_list, expected_hashes): assert hash(to_hash) == expected