def run_script(script, data=None, n_workers=1, mode='exec', command_argv=None, retry_when_fail=False, odps_params=None, session=None, run_kwargs=None): from mars.remote.run_script import _extract_inputs if hasattr(script, 'read'): code = script.read() else: with open(os.path.abspath(script), 'rb') as f: code = f.read() inputs = _extract_inputs(data) op = RunScript(data=data, code=to_binary(code), world_size=n_workers, retry_when_fail=retry_when_fail, command_args=command_argv) op.extra_params['project'] = odps_params['project'] op.extra_params['endpoint'] = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] return op(inputs).execute(session=session, **(run_kwargs or {})).fetch(session=session)
def testTokenize(self): import shutil import tempfile class TestEnum(Enum): VAL1 = 'val1' tempdir = tempfile.mkdtemp('mars_test_utils_') try: filename = os.path.join(tempdir, 'test_npa.dat') mmp_array = np.memmap(filename, dtype=float, mode='w+', shape=(3, 4)) mmp_array[:] = np.random.random((3, 4)).astype(float) mmp_array.flush() del mmp_array mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4)) mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4)) try: v = [1, 2.3, '456', u'789', b'101112', None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.array(10), np.array([b'\x01\x32\xff']), np.int64, TestEnum.VAL1] copy_v = copy.deepcopy(v) self.assertEqual(utils.tokenize(v + [mmp_array1], ext_data=1234), utils.tokenize(copy_v + [mmp_array2], ext_data=1234)) finally: del mmp_array1, mmp_array2 finally: shutil.rmtree(tempdir) v = {'a', 'xyz', 'uvw'} self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x='abcd', y=98765) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) # pandas relative if pd is not None: df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]], index=['a'], columns=['中文', 'data']) v = [df, df.index, df.columns, df['data']] self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) non_tokenizable_cls = type('non_tokenizable_cls', (object,), {}) with self.assertRaises(TypeError): utils.tokenize(non_tokenizable_cls()) class CustomizedTokenize(object): def __mars_tokenize__(self): return id(type(self)), id(non_tokenizable_cls) self.assertEqual(utils.tokenize(CustomizedTokenize()), utils.tokenize(CustomizedTokenize()))
def distribute(self, uid): if not isinstance(uid, six.string_types): return 0 id_parts = uid.split(':') if len(id_parts) == 2: allocate_id = int(hashlib.md5(to_binary(uid)).hexdigest(), 16) % (self.n_process - 1) + 1 return allocate_id elif id_parts[0] == 'w': return int(id_parts[1]) else: return 0
def testAttributeAsDict(self): node4 = Node4(a=to_binary('中文'), b=np.random.randint(4, size=(3, 4)), c=np.datetime64(datetime.datetime.now()), d=np.timedelta64(datetime.timedelta(seconds=1234)), e=np.dtype('int'), f={'a': [True, False, False], 'd': [False, None]}, h=(1234, to_text('测试'), '属性', None, np.datetime64('1066-10-13'), np.timedelta64(1, 'D'), np.dtype([('x', 'i4'), ('y', 'f4')])), i=(slice(10), slice(0, 2), None, slice(2, 0, -1)), j=Node5(a='aa'), k=[Node5(a='bb'), None], l=Node6(b=3, nid=1)) pbs = ProtobufSerializeProvider() serial = node4.serialize(pbs) d_node4 = Node4.deserialize(pbs, serial) self.assertEqual(node4.a, d_node4.a) self.assertTrue(np.array_equal(node4.b, d_node4.b)) self.assertEqual(node4.c, d_node4.c) self.assertEqual(node4.d, d_node4.d) self.assertEqual(node4.e, d_node4.e) self.assertEqual(node4.f, d_node4.f) self.assertFalse(hasattr(d_node4, 'g')) self.assertEqual(node4.h, d_node4.h) self.assertEqual(node4.i, d_node4.i) self.assertEqual(node4.j.a, d_node4.j.a) self.assertEqual(node4.k[0].a, d_node4.k[0].a) self.assertIsNone(d_node4.k[1]) self.assertIsInstance(d_node4.l, Node7) self.assertEqual(d_node4.l.b, 3) jss = JsonSerializeProvider() serial = node4.serialize(jss) serial = json.loads(json.dumps(serial), object_hook=OrderedDict) d_node4 = Node4.deserialize(jss, serial) self.assertEqual(node4.a, d_node4.a) self.assertTrue(np.array_equal(node4.b, d_node4.b)) self.assertEqual(node4.c, d_node4.c) self.assertEqual(node4.d, d_node4.d) self.assertEqual(node4.e, d_node4.e) self.assertEqual(node4.f, d_node4.f) self.assertFalse(hasattr(d_node4, 'g')) self.assertEqual(node4.h, d_node4.h) self.assertEqual(node4.i, d_node4.i) self.assertEqual(node4.j.a, d_node4.j.a) self.assertEqual(node4.k[0].a, d_node4.k[0].a) self.assertIsNone(d_node4.k[1]) self.assertIsInstance(d_node4.l, Node7) self.assertEqual(d_node4.l.b, 3)
def testTokenize(self): v = (1, 2.3, '456', u'789', b'101112', None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.int64) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = {'a', 'xyz', 'uvw'} self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) v = dict(x='abcd', y=98765) self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v))) # pandas relative if pd is not None: df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]], index=['a'], columns=['中文', 'data']) v = [df, df.index, df.columns, df['data']] self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))
def run_script(script, n_workers=1, mode='exec', command_argv=None, odps_params=None, session=None, run_kwargs=None): if hasattr(script, 'read'): code = script.read() else: with open(os.path.abspath(script), 'rb') as f: code = f.read() if mode not in ['exec', 'spawn']: raise TypeError('Unsupported mode {}'.format(mode)) op = RunScript(code=to_binary(code), mode=mode, world_size=n_workers, command_args=command_argv) op.extra_params['project'] = odps_params['project'] op.extra_params['endpoint'] = odps_params['endpoint'] return op().execute(session=session, **(run_kwargs or {})).fetch(session=session)
def testStringConversion(self): s = None self.assertIsNone(utils.to_binary(s)) self.assertIsNone(utils.to_str(s)) self.assertIsNone(utils.to_text(s)) s = 'abcdefg' self.assertIsInstance(utils.to_binary(s), bytes) self.assertEqual(utils.to_binary(s), b'abcdefg') self.assertIsInstance(utils.to_str(s), str) self.assertEqual(utils.to_str(s), 'abcdefg') self.assertIsInstance(utils.to_text(s), unicode) self.assertEqual(utils.to_text(s), u'abcdefg') ustr = type('ustr', (str, ), {}) self.assertIsInstance(utils.to_str(ustr(s)), str) self.assertEqual(utils.to_str(ustr(s)), 'abcdefg') s = b'abcdefg' self.assertIsInstance(utils.to_binary(s), bytes) self.assertEqual(utils.to_binary(s), b'abcdefg') self.assertIsInstance(utils.to_str(s), str) self.assertEqual(utils.to_str(s), 'abcdefg') self.assertIsInstance(utils.to_text(s), unicode) self.assertEqual(utils.to_text(s), u'abcdefg') ubytes = type('ubytes', (bytes, ), {}) self.assertIsInstance(utils.to_binary(ubytes(s)), bytes) self.assertEqual(utils.to_binary(ubytes(s)), b'abcdefg') s = u'abcdefg' self.assertIsInstance(utils.to_binary(s), bytes) self.assertEqual(utils.to_binary(s), b'abcdefg') self.assertIsInstance(utils.to_str(s), str) self.assertEqual(utils.to_str(s), 'abcdefg') self.assertIsInstance(utils.to_text(s), unicode) self.assertEqual(utils.to_text(s), u'abcdefg') uunicode = type('uunicode', (unicode, ), {}) self.assertIsInstance(utils.to_text(uunicode(s)), unicode) self.assertEqual(utils.to_text(uunicode(s)), u'abcdefg') with self.assertRaises(TypeError): utils.to_binary(utils) with self.assertRaises(TypeError): utils.to_str(utils) with self.assertRaises(TypeError): utils.to_text(utils)
def testAttributeAsDict(self): other_data = {} if pd: df = pd.DataFrame( { 'a': [1, 2, 3], 'b': [to_text('测试'), to_binary('属性'), 'c'] }, index=[[0, 0, 1], ['测试', '属性', '测试']]) other_data['m'] = df.columns other_data['mm'] = df.index other_data['n'] = df['b'] other_data['o'] = df other_data['p'] = [df.columns, df.index, df['a'], df] node4 = Node4(a=to_binary('中文'), b=np.random.randint(4, size=(3, 4)), c=np.datetime64(datetime.datetime.now()), d=np.timedelta64(datetime.timedelta(seconds=1234)), e=np.dtype('int'), f={ 'a': [True, False, False], 'd': [False, None] }, h=(1234, to_text('测试'), '属性', None, np.datetime64('1066-10-13'), np.timedelta64(1, 'D'), np.dtype([('x', 'i4'), ('y', 'f4')])), i=(slice(10), slice(0, 2), None, slice(2, 0, -1)), j=Node5(a='aa'), k=[Node5(a='bb'), None], l=Node6(b=3, nid=1), **other_data) pbs = ProtobufSerializeProvider() serial = node4.serialize(pbs) d_node4 = Node4.deserialize(pbs, serial) self.assertEqual(node4.a, d_node4.a) np.testing.assert_array_equal(node4.b, d_node4.b) self.assertEqual(node4.c, d_node4.c) self.assertEqual(node4.d, d_node4.d) self.assertEqual(node4.e, d_node4.e) self.assertEqual(node4.f, d_node4.f) self.assertFalse(hasattr(d_node4, 'g')) self.assertEqual(node4.h, d_node4.h) self.assertEqual(node4.i, d_node4.i) self.assertEqual(node4.j.a, d_node4.j.a) self.assertEqual(node4.k[0].a, d_node4.k[0].a) self.assertIsNone(d_node4.k[1]) self.assertIsInstance(d_node4.l, Node7) self.assertEqual(d_node4.l.b, 3) if pd: pd.testing.assert_index_equal(node4.m, d_node4.m) pd.testing.assert_index_equal(node4.mm, d_node4.mm) pd.testing.assert_series_equal(node4.n, d_node4.n) pd.testing.assert_frame_equal(node4.o, d_node4.o) pd.testing.assert_index_equal(node4.p[0], d_node4.p[0]) pd.testing.assert_index_equal(node4.p[1], d_node4.p[1]) pd.testing.assert_series_equal(node4.p[2], d_node4.p[2]) pd.testing.assert_frame_equal(node4.p[3], d_node4.p[3]) jss = JsonSerializeProvider() serial = node4.serialize(jss) serial = json.loads(json.dumps(serial), object_hook=OrderedDict) d_node4 = Node4.deserialize(jss, serial) self.assertEqual(node4.a, d_node4.a) np.testing.assert_array_equal(node4.b, d_node4.b) self.assertEqual(node4.c, d_node4.c) self.assertEqual(node4.d, d_node4.d) self.assertEqual(node4.e, d_node4.e) self.assertEqual(node4.f, d_node4.f) self.assertFalse(hasattr(d_node4, 'g')) self.assertEqual(node4.h, d_node4.h) self.assertEqual(node4.i, d_node4.i) self.assertEqual(node4.j.a, d_node4.j.a) self.assertEqual(node4.k[0].a, d_node4.k[0].a) self.assertIsNone(d_node4.k[1]) self.assertIsInstance(d_node4.l, Node7) self.assertEqual(d_node4.l.b, 3) if pd: pd.testing.assert_index_equal(node4.m, d_node4.m) pd.testing.assert_index_equal(node4.mm, d_node4.mm) pd.testing.assert_series_equal(node4.n, d_node4.n) pd.testing.assert_frame_equal(node4.o, d_node4.o) pd.testing.assert_index_equal(node4.p[0], d_node4.p[0]) pd.testing.assert_index_equal(node4.p[1], d_node4.p[1]) pd.testing.assert_series_equal(node4.p[2], d_node4.p[2]) pd.testing.assert_frame_equal(node4.p[3], d_node4.p[3])
def test_tokenize(): import shutil import tempfile class TestEnum(Enum): VAL1 = 'val1' tempdir = tempfile.mkdtemp('mars_test_utils_') try: filename = os.path.join(tempdir, 'test_npa.dat') mmp_array = np.memmap(filename, dtype=float, mode='w+', shape=(3, 4)) mmp_array[:] = np.random.random((3, 4)).astype(float) mmp_array.flush() del mmp_array mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4)) mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4)) try: v = [1, 2.3, '456', u'789', b'101112', 2147483649, None, np.ndarray, [912, 'uvw'], np.arange(0, 10), np.array(10), np.array([b'\x01\x32\xff']), np.int64, TestEnum.VAL1] copy_v = copy.deepcopy(v) assert (utils.tokenize(v + [mmp_array1], ext_data=1234) == utils.tokenize(copy_v + [mmp_array2], ext_data=1234)) finally: del mmp_array1, mmp_array2 finally: shutil.rmtree(tempdir) v = {'a', 'xyz', 'uvw'} assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) v = dict(x='abcd', y=98765) assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345) assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) # pandas relative if pd is not None: df = pd.DataFrame([[utils.to_binary('测试'), utils.to_text('数据')]], index=['a'], columns=['中文', 'data']) v = [df, df.index, df.columns, df['data'], pd.Categorical(list('ABCD'))] assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) class NonTokenizableCls: def __getstate__(self): raise SystemError with pytest.raises(TypeError): utils.tokenize(NonTokenizableCls()) class CustomizedTokenize(object): def __mars_tokenize__(self): return id(type(self)), id(NonTokenizableCls) assert utils.tokenize(CustomizedTokenize()) == utils.tokenize(CustomizedTokenize()) v = lambda x: x + 1 assert utils.tokenize(v) == utils.tokenize(copy.deepcopy(v)) def f(a, b): return np.add(a, b) assert utils.tokenize(f) == utils.tokenize(copy.deepcopy(f)) partial_f = partial(f, 1, k=0) partial_f2 = partial(f, 1, k=1) assert utils.tokenize(partial_f) == utils.tokenize(copy.deepcopy(partial_f)) assert utils.tokenize(partial_f) != utils.tokenize(partial_f2)
def test_string_conversion(): s = None assert utils.to_binary(s) is None assert utils.to_str(s) is None assert utils.to_text(s) is None s = 'abcdefg' assert isinstance(utils.to_binary(s), bytes) assert utils.to_binary(s) == b'abcdefg' assert isinstance(utils.to_str(s), str) assert utils.to_str(s) == 'abcdefg' assert isinstance(utils.to_text(s), str) assert utils.to_text(s) == u'abcdefg' ustr = type('ustr', (str,), {}) assert isinstance(utils.to_str(ustr(s)), str) assert utils.to_str(ustr(s)) == 'abcdefg' s = b'abcdefg' assert isinstance(utils.to_binary(s), bytes) assert utils.to_binary(s) == b'abcdefg' assert isinstance(utils.to_str(s), str) assert utils.to_str(s) == 'abcdefg' assert isinstance(utils.to_text(s), str) assert utils.to_text(s) == u'abcdefg' ubytes = type('ubytes', (bytes,), {}) assert isinstance(utils.to_binary(ubytes(s)), bytes) assert utils.to_binary(ubytes(s)) == b'abcdefg' s = u'abcdefg' assert isinstance(utils.to_binary(s), bytes) assert utils.to_binary(s) == b'abcdefg' assert isinstance(utils.to_str(s), str) assert utils.to_str(s) == 'abcdefg' assert isinstance(utils.to_text(s), str) assert utils.to_text(s) == u'abcdefg' uunicode = type('uunicode', (str,), {}) assert isinstance(utils.to_text(uunicode(s)), str) assert utils.to_text(uunicode(s)) == u'abcdefg' with pytest.raises(TypeError): utils.to_binary(utils) with pytest.raises(TypeError): utils.to_str(utils) with pytest.raises(TypeError): utils.to_text(utils)
def testAttributeAsDict(self): other_data = {} if pd: df = pd.DataFrame( { 'a': [1, 2, 3], 'b': [to_text('测试'), to_binary('属性'), 'c'] }, index=[[0, 0, 1], ['测试', '属性', '测试']]) other_data['w'] = df.columns other_data['ww'] = df.index other_data['x'] = df['b'] other_data['y'] = df other_data['z'] = [df.columns, df.index, df['a'], df] node4 = Node4(a=to_binary('中文'), b=np.random.randint(4, size=(3, 4)), c=np.datetime64(datetime.datetime.now()), d=np.timedelta64(datetime.timedelta(seconds=1234)), e=np.dtype('int'), f={ 'a': [True, False, False], 'd': [False, None] }, h=(1234, to_text('测试'), '属性', None, np.datetime64('1066-10-13'), np.timedelta64(1, 'D'), np.dtype([('x', 'i4'), ('y', 'f4')])), i=(slice(10), slice(0, 2), None, slice(2, 0, -1), slice('a', 'b'), slice(datetime.datetime.now(), datetime.datetime.now())), j=Node5(a='aa', b=slice(1, 100, 3)), k=[Node5(a='bb', b=slice(200, -1, -4)), None], l=Node6(b=3, nid=1), m=Node6(b=4, nid=2), n=[Node5(a='cc', b=slice(100, -2, -5)), None], **other_data) pbs = ProtobufSerializeProvider() serial = node4.serialize(pbs) d_node4 = Node4.deserialize(pbs, serial) self.assertEqual(node4.a, d_node4.a) np.testing.assert_array_equal(node4.b, d_node4.b) self.assertEqual(node4.c, d_node4.c) self.assertEqual(node4.d, d_node4.d) self.assertEqual(node4.e, d_node4.e) self.assertEqual(node4.f, d_node4.f) self.assertFalse(hasattr(d_node4, 'g')) self.assertEqual(node4.h, d_node4.h) self.assertEqual(node4.i, d_node4.i) self.assertEqual(node4.j.a, d_node4.j.a) self.assertEqual(node4.j.b, d_node4.j.b) self.assertEqual(node4.k[0].a, d_node4.k[0].a) self.assertEqual(node4.k[0].b, d_node4.k[0].b) self.assertIsNone(d_node4.k[1]) self.assertIsInstance(d_node4.l, Node7) self.assertEqual(node4.l.b, d_node4.l.b) self.assertIsInstance(d_node4.m, Node7) self.assertEqual(node4.m.b, d_node4.m.b) self.assertIsInstance(d_node4.n[0], Node5) self.assertEqual(node4.n[0].a, d_node4.n[0].a) self.assertEqual(node4.n[0].b, d_node4.n[0].b) self.assertIsNone(d_node4.n[1]) if pd: pd.testing.assert_index_equal(node4.w, d_node4.w) pd.testing.assert_index_equal(node4.ww, d_node4.ww) pd.testing.assert_series_equal(node4.x, d_node4.x) pd.testing.assert_frame_equal(node4.y, d_node4.y) pd.testing.assert_index_equal(node4.z[0], d_node4.z[0]) pd.testing.assert_index_equal(node4.z[1], d_node4.z[1]) pd.testing.assert_series_equal(node4.z[2], d_node4.z[2]) pd.testing.assert_frame_equal(node4.z[3], d_node4.z[3]) with self.assertRaises(TypeError): node42 = Node4(j=Node6()) node42.serialize(pbs) with self.assertRaises(TypeError): node6 = Node6(nid=0) node7 = Node7(nid=1, r=node6) node7.serialize(pbs) with self.assertRaises(TypeError): node6 = Node6(nid=0) node7 = Node7(nid=1, rl=[node6]) node7.serialize(pbs) node61 = Node6(nid=0) node62 = Node6(nid=0, r=node61) serial = node62.serialize(pbs) d_node62 = Node6.deserialize(pbs, serial) self.assertIsInstance(d_node62.r, Node6) node61 = Node6(nid=0) node62 = Node6(nid=0, rl=[node61]) serial = node62.serialize(pbs) d_node62 = Node6.deserialize(pbs, serial) self.assertIsInstance(d_node62.rl[0], Node6) jss = JsonSerializeProvider() serial = node4.serialize(jss) serial = json.loads(json.dumps(serial), object_hook=OrderedDict) d_node4 = Node4.deserialize(jss, serial) self.assertEqual(node4.a, d_node4.a) np.testing.assert_array_equal(node4.b, d_node4.b) self.assertEqual(node4.c, d_node4.c) self.assertEqual(node4.d, d_node4.d) self.assertEqual(node4.e, d_node4.e) self.assertEqual(node4.f, d_node4.f) self.assertFalse(hasattr(d_node4, 'g')) self.assertEqual(node4.h, d_node4.h) self.assertEqual(node4.i, d_node4.i) self.assertEqual(node4.j.a, d_node4.j.a) self.assertEqual(node4.k[0].a, d_node4.k[0].a) self.assertIsNone(d_node4.k[1]) self.assertIsInstance(d_node4.l, Node7) self.assertEqual(node4.l.b, d_node4.l.b) self.assertIsInstance(d_node4.m, Node7) self.assertEqual(node4.m.b, d_node4.m.b) self.assertIsInstance(d_node4.n[0], Node5) self.assertEqual(node4.n[0].a, d_node4.n[0].a) self.assertEqual(node4.n[0].b, d_node4.n[0].b) self.assertIsNone(d_node4.n[1]) if pd: pd.testing.assert_index_equal(node4.w, d_node4.w) pd.testing.assert_index_equal(node4.ww, d_node4.ww) pd.testing.assert_series_equal(node4.x, d_node4.x) pd.testing.assert_frame_equal(node4.y, d_node4.y) pd.testing.assert_index_equal(node4.z[0], d_node4.z[0]) pd.testing.assert_index_equal(node4.z[1], d_node4.z[1]) pd.testing.assert_series_equal(node4.z[2], d_node4.z[2]) pd.testing.assert_frame_equal(node4.z[3], d_node4.z[3]) with self.assertRaises(TypeError): node42 = Node4(j=Node6()) node42.serialize(jss) with self.assertRaises(TypeError): node6 = Node6() node7 = Node7(r=node6) node7.serialize(jss) with self.assertRaises(TypeError): node6 = Node6(nid=0) node7 = Node7(nid=1, rl=[node6]) node7.serialize(jss) node61 = Node6() node62 = Node6(r=node61) serial = node62.serialize(jss) d_node62 = Node6.deserialize(jss, serial) self.assertIsInstance(d_node62.r, Node6) node61 = Node6(nid=0) node62 = Node6(nid=0, rl=[node61]) serial = node62.serialize(jss) d_node62 = Node6.deserialize(jss, serial) self.assertIsInstance(d_node62.rl[0], Node6)