Ejemplo n.º 1
0
    def testTokenize(self):
        v = (1, 2.3, '456', u'789', b'101112', None, np.ndarray,
             [912, 'uvw'], np.arange(0, 10), np.int64)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = {'a', 'xyz', 'uvw'}
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = dict(x='abcd', y=98765)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        # pandas relative
        if pd is not None:
            df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]],
                              index=['a'], columns=['中文', 'data'])
            v = [df, df.index, df.columns, df['data']]
            self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))
Ejemplo n.º 2
0
    def testTokenize(self):
        v = (1, 2.3, '456', u'789', b'101112', None, np.ndarray, [912, 'uvw'],
             np.arange(0, 10), np.int64)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = {'a', 'xyz', 'uvw'}
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = dict(x='abcd', y=98765)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))
Ejemplo n.º 3
0
    def testTokenize(self):
        import shutil
        import tempfile

        class TestEnum(Enum):
            VAL1 = 'val1'

        tempdir = tempfile.mkdtemp('mars_test_utils_')
        try:
            filename = os.path.join(tempdir, 'test_npa.dat')
            mmp_array = np.memmap(filename,
                                  dtype=float,
                                  mode='w+',
                                  shape=(3, 4))
            mmp_array[:] = np.random.random((3, 4)).astype(float)
            mmp_array.flush()
            del mmp_array

            mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4))
            mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4))

            try:
                v = [
                    1, 2.3, '456', u'789', b'101112', None, np.ndarray,
                    [912, 'uvw'],
                    np.arange(0, 10),
                    np.array(10),
                    np.array([b'\x01\x32\xff']), np.int64, TestEnum.VAL1
                ]
                copy_v = copy.deepcopy(v)
                self.assertEqual(
                    utils.tokenize(v + [mmp_array1], ext_data=1234),
                    utils.tokenize(copy_v + [mmp_array2], ext_data=1234))
            finally:
                del mmp_array1, mmp_array2
        finally:
            shutil.rmtree(tempdir)

        v = {'a', 'xyz', 'uvw'}
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = dict(x='abcd', y=98765)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        # pandas relative
        if pd is not None:
            df = pd.DataFrame([[utils.to_binary('测试'),
                                utils.to_text('数据')]],
                              index=['a'],
                              columns=['中文', 'data'])
            v = [df, df.index, df.columns, df['data']]
            self.assertEqual(utils.tokenize(v),
                             utils.tokenize(copy.deepcopy(v)))

        non_tokenizable_cls = type('non_tokenizable_cls', (object, ), {})
        with self.assertRaises(TypeError):
            utils.tokenize(non_tokenizable_cls())

        class CustomizedTokenize(object):
            def __mars_tokenize__(self):
                return id(type(self)), id(non_tokenizable_cls)

        self.assertEqual(utils.tokenize(CustomizedTokenize()),
                         utils.tokenize(CustomizedTokenize()))
Ejemplo n.º 4
0
def test_tokenize():
    import shutil
    import tempfile

    class TestEnum(Enum):
        VAL1 = 'val1'

    tempdir = tempfile.mkdtemp('mars_test_utils_')
    try:
        filename = os.path.join(tempdir, 'test_npa.dat')
        mmp_array = np.memmap(filename, dtype=float, mode='w+', shape=(3, 4))
        mmp_array[:] = np.random.random((3, 4)).astype(float)
        mmp_array.flush()
        del mmp_array

        mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4))
        mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4))

        try:
            v = [1, 2.3, '456', u'789', b'101112', 2147483649, None, np.ndarray,
                 [912, 'uvw'], np.arange(0, 10), np.array(10), np.array([b'\x01\x32\xff']),
                 np.int64, TestEnum.VAL1]
            copy_v = copy.deepcopy(v)
            assert (utils.tokenize(v + [mmp_array1], ext_data=1234)
                    == utils.tokenize(copy_v + [mmp_array2], ext_data=1234))
        finally:
            del mmp_array1, mmp_array2
    finally:
        shutil.rmtree(tempdir)

    v = {'a', 'xyz', 'uvw'}
    assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v))

    v = dict(x='abcd', y=98765)
    assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v))

    v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345)
    assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v))

    # pandas relative
    if pd is not None:
        df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]],
                          index=['a'], columns=['中文', 'data'])
        v = [df, df.index, df.columns, df['data'], pd.Categorical(list('ABCD'))]
        assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v))

    class NonTokenizableCls:
        def __getstate__(self):
            raise SystemError

    with pytest.raises(TypeError):
        utils.tokenize(NonTokenizableCls())

    class CustomizedTokenize(object):
        def __mars_tokenize__(self):
            return id(type(self)), id(NonTokenizableCls)

    assert utils.tokenize(CustomizedTokenize()) == utils.tokenize(CustomizedTokenize())

    v = lambda x: x + 1
    assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v))

    def f(a, b):
        return np.add(a, b)
    assert utils.tokenize(f) == utils.tokenize(copy.deepcopy(f))

    partial_f = partial(f, 1, k=0)
    partial_f2 = partial(f, 1, k=1)
    assert utils.tokenize(partial_f) == utils.tokenize(copy.deepcopy(partial_f))
    assert utils.tokenize(partial_f) != utils.tokenize(partial_f2)