def testTokenize(self): v = (1, 2.3, '456', u'789', b'101112', None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.int64) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = {'a', 'xyz', 'uvw'} self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x='abcd', y=98765) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) # pandas relative if pd is not None: df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]], index=['a'], columns=['中文', 'data']) v = [df, df.index, df.columns, df['data']] self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))
def testTokenize(self): v = (1, 2.3, '456', u'789', b'101112', None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.int64) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = {'a', 'xyz', 'uvw'} self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x='abcd', y=98765) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))
def testTokenize(self): import shutil import tempfile class TestEnum(Enum): VAL1 = 'val1' tempdir = tempfile.mkdtemp('mars_test_utils_') try: filename = os.path.join(tempdir, 'test_npa.dat') mmp_array = np.memmap(filename, dtype=float, mode='w+', shape=(3, 4)) mmp_array[:] = np.random.random((3, 4)).astype(float) mmp_array.flush() del mmp_array mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4)) mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4)) try: v = [ 1, 2.3, '456', u'789', b'101112', None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.array(10), np.array([b'\x01\x32\xff']), np.int64, TestEnum.VAL1 ] copy_v = copy.deepcopy(v) self.assertEqual( utils.tokenize(v + [mmp_array1], ext_data=1234), utils.tokenize(copy_v + [mmp_array2], ext_data=1234)) finally: del mmp_array1, mmp_array2 finally: shutil.rmtree(tempdir) v = {'a', 'xyz', 'uvw'} self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x='abcd', y=98765) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) # pandas relative if pd is not None: df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]], index=['a'], columns=['中文', 'data']) v = [df, df.index, df.columns, df['data']] self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) non_tokenizable_cls = type('non_tokenizable_cls', (object, ), {}) with self.assertRaises(TypeError): utils.tokenize(non_tokenizable_cls()) class CustomizedTokenize(object): def __mars_tokenize__(self): return id(type(self)), id(non_tokenizable_cls) self.assertEqual(utils.tokenize(CustomizedTokenize()), utils.tokenize(CustomizedTokenize()))
def test_tokenize(): import shutil import tempfile class TestEnum(Enum): VAL1 = 'val1' tempdir = tempfile.mkdtemp('mars_test_utils_') try: filename = os.path.join(tempdir, 'test_npa.dat') mmp_array = np.memmap(filename, dtype=float, mode='w+', shape=(3, 4)) mmp_array[:] = np.random.random((3, 4)).astype(float) mmp_array.flush() del mmp_array mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4)) mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4)) try: v = [1, 2.3, '456', u'789', b'101112', 2147483649, None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.array(10), np.array([b'\x01\x32\xff']), np.int64, TestEnum.VAL1] copy_v = copy.deepcopy(v) assert (utils.tokenize(v + [mmp_array1], ext_data=1234) == utils.tokenize(copy_v + [mmp_array2], ext_data=1234)) finally: del mmp_array1, mmp_array2 finally: shutil.rmtree(tempdir) v = {'a', 'xyz', 'uvw'} assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) v = dict(x='abcd', y=98765) assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345) assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) # pandas relative if pd is not None: df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]], index=['a'], columns=['中文', 'data']) v = [df, df.index, df.columns, df['data'], pd.Categorical(list('ABCD'))] assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) class NonTokenizableCls: def __getstate__(self): raise SystemError with pytest.raises(TypeError): utils.tokenize(NonTokenizableCls()) class CustomizedTokenize(object): def __mars_tokenize__(self): return id(type(self)), id(NonTokenizableCls) assert utils.tokenize(CustomizedTokenize()) == utils.tokenize(CustomizedTokenize()) v = lambda x: x + 1 assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) def f(a, b): return np.add(a, b) assert utils.tokenize(f) == utils.tokenize(copy.deepcopy(f)) partial_f = partial(f, 1, k=0) partial_f2 = partial(f, 1, k=1) assert utils.tokenize(partial_f) == utils.tokenize(copy.deepcopy(partial_f)) assert utils.tokenize(partial_f) != utils.tokenize(partial_f2)