def testRunWithoutFetch(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.ones((10, 20)) + 1 self.assertIsNone(session.run(a, fetch=False)) np.testing.assert_array_equal(a.execute(session=session), np.ones((10, 20)) + 1)
def testExecutableTuple(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, web=True) as cluster: with new_session('http://' + cluster._web_endpoint).as_default(): a = mt.ones((20, 10), chunk_size=10) u, s, v = (mt.linalg.svd(a)).execute() np.testing.assert_allclose(u.dot(np.diag(s).dot(v)), np.ones((20, 10)))
def testGraphFail(self): op = SerializeMustFailOperand(f=3) tensor = op.new_tensor(None, (3, 3)) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: with self.assertRaises(SystemError): cluster.session.run(tensor)
def testGraphFail(self, *_): op = SerializeMustFailOperand(f=3) tensor = op.new_tensor(None, (3, 3)) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: with self.assertRaises(ExecutionFailed): cluster.session.run(tensor, timeout=_exec_timeout)
def testFetchDataFrameSlices(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session a = mt.random.rand(10, 10, chunk_size=3) df = md.DataFrame(a) r = session.run(df) r_slice1 = session.fetch(df.iloc[:2]) pd.testing.assert_frame_equal(r.iloc[:2], r_slice1) r_slice2 = session.fetch(df.iloc[2:8, 2:8]) pd.testing.assert_frame_equal(r.iloc[2:8, 2:8], r_slice2) r_slice3 = session.fetch(df.iloc[:, 2:]) pd.testing.assert_frame_equal(r.iloc[:, 2:], r_slice3) r_slice4 = session.fetch(df.iloc[:, -5:]) pd.testing.assert_frame_equal(r.iloc[:, -5:], r_slice4) r_slice5 = session.fetch(df.iloc[4]) pd.testing.assert_series_equal(r.iloc[4], r_slice5) r_slice6 = session.fetch(df.iloc[6:9]) pd.testing.assert_frame_equal(r.iloc[6:9], r_slice6) # test repr pdf = pd.DataFrame(np.random.randint(1000, size=(80, 10))) df2 = md.DataFrame(pdf, chunk_size=41) self.assertEqual(repr(df2.execute(session=session)), repr(pdf)) ps = pdf[0] s = md.Series(ps, chunk_size=41) self.assertEqual(repr(s.execute(session=session)), repr(ps)) web_session = new_session('http://' + cluster._web_endpoint) r = web_session.run(df) r_slice1 = web_session.fetch(df.iloc[:2]) pd.testing.assert_frame_equal(r.iloc[:2], r_slice1) r_slice2 = web_session.fetch(df.iloc[2:8, 2:8]) pd.testing.assert_frame_equal(r.iloc[2:8, 2:8], r_slice2) r_slice3 = web_session.fetch(df.iloc[:, 2:]) pd.testing.assert_frame_equal(r.iloc[:, 2:], r_slice3) r_slice4 = web_session.fetch(df.iloc[:, -5:]) pd.testing.assert_frame_equal(r.iloc[:, -5:], r_slice4) r_slice5 = web_session.fetch(df.iloc[4]) pd.testing.assert_series_equal(r.iloc[4], r_slice5) r_slice6 = web_session.fetch(df.iloc[6:9]) pd.testing.assert_frame_equal(r.iloc[6:9], r_slice6)
def testIndexTensorExecute(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = 2 a_splits = mt.split(a, 2) r1, r2 = session.run(a_splits[0], a[idx], timeout=_exec_timeout) np.testing.assert_array_equal(r1, r2) np.testing.assert_array_equal(r1, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = mt.ones((5, 5)) * 2 r = session2.run(a[idx], timeout=_exec_timeout) np.testing.assert_array_equal(r, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session3: a = mt.random.rand(100, 5) slice1 = a[:10] slice2 = a[10:20] r1, r2, expected = session3.run(slice1, slice2, a, timeout=_exec_timeout) np.testing.assert_array_equal(r1, expected[:10]) np.testing.assert_array_equal(r2, expected[10:20]) with new_session(cluster.endpoint) as session4: a = mt.random.rand(100, 5) a[:10] = mt.ones((10, 5)) a[10:20] = 2 r = session4.run(a, timeout=_exec_timeout) np.testing.assert_array_equal(r[:10], np.ones((10, 5))) np.testing.assert_array_equal(r[10:20], np.ones((10, 5)) * 2) with new_session(cluster.endpoint) as session5: raw = np.random.rand(10, 10) a = mt.tensor(raw, chunk_size=(5, 4)) b = a[a.argmin(axis=1), mt.tensor(np.arange(10))] r = session5.run(b, timeout=_exec_timeout, compose=False) np.testing.assert_array_equal( r, raw[raw.argmin(axis=1), np.arange(10)])
def testRunWithoutCompose(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=False) as cluster: session = cluster.session arr1 = (mt.ones((10, 10), chunk_size=3) + 1) * 2 r1 = session.run(arr1, timeout=_exec_timeout) arr2 = (mt.ones((10, 10), chunk_size=4) + 1) * 2 r2 = session.run(arr2, compose=False, timeout=_exec_timeout) np.testing.assert_array_equal(r1, r2)
def testMutableTensorCtor(self): def testWithGivenSession(session): from mars.tensor.core import mutable_tensor # cannot get non-existing mutable tensor with self.assertRaises(ValueError): mutable_tensor("test") # should be create mut1 = mutable_tensor("test", (4, 5), dtype='int32', chunk_size=3) # should be get mut2 = mutable_tensor("test") # mut1 should equal to mut2, but are not the same object self.assertEqual(mut1.shape, mut2.shape) self.assertEqual(mut1.dtype, mut2.dtype) # LocalSession return the same MutableTensor instance when `get_mutable_tensor`. if isinstance(session._sess, LocalSession): self.assertTrue(mut1 is mut2) else: self.assertTrue(mut1 is not mut2) mut2[1:4, 2] = 8 mut2[2:4] = np.arange(10).reshape(2, 5) expected = np.zeros((4, 5), dtype='int32') expected[1:4, 2] = 8 expected[2:4] = np.arange(10).reshape(2, 5) # cannot be sealed twice # # Note that we operate on `mut2`, if we seal `mut1`, the result may not be correct. # # When we operate both on `mut1` and `mut2`, the result may not correct since the # two MutableTensor instances both main their own local buffers, but they cannot # be both sealed. arr = mut2.seal() with self.assertRaises(ValueError): mut1.seal() # check value np.testing.assert_array_equal(session.fetch(arr), expected) with new_session().as_default() as session: testWithGivenSession(session) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session.as_default() testWithGivenSession(session) with new_session('http://' + cluster._web_endpoint).as_default() as web_session: testWithGivenSession(web_session)
def testEagerMode(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: self.assertIsInstance(Session.default_or_local()._sess, LocalClusterSession) with option_context({'eager_mode': True}): a_data = np.random.rand(10, 10) a = mt.tensor(a_data, chunk_size=3) np.testing.assert_array_equal(a, a_data) r1 = a + 1 expected1 = a_data + 1 np.testing.assert_array_equal(r1, expected1) r2 = r1.dot(r1) expected2 = expected1.dot(expected1) np.testing.assert_array_almost_equal(r2, expected2) a = mt.ones((10, 10), chunk_size=3) with self.assertRaises(ValueError): a.fetch() r = a.dot(a) np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10) with new_session('http://' + cluster._web_endpoint).as_default(): self.assertIsInstance(Session.default_or_local()._sess, WebSession) with option_context({'eager_mode': True}): a_data = np.random.rand(10, 10) a = mt.tensor(a_data, chunk_size=3) np.testing.assert_array_equal(a, a_data) r1 = a + 1 expected1 = a_data + 1 np.testing.assert_array_equal(r1, expected1) r2 = r1.dot(r1) expected2 = expected1.dot(expected1) np.testing.assert_array_almost_equal(r2, expected2) a = mt.ones((10, 10), chunk_size=3) with self.assertRaises(ValueError): a.fetch() r = a.dot(a) np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10)
def testCudaCluster(self, *_): from mars.dataframe.datasource.dataframe import from_pandas as from_pandas_df with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1)) df = from_pandas_df(pdf, chunk_size=(13, 21)) cdf = df.to_gpu() result = session.run(cdf) pd.testing.assert_frame_equal(pdf, result)
def testMultipleOutputTensorExecute(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session t = mt.random.rand(20, 5, chunk_size=5) r = mt.linalg.svd(t) res = session.run((t, ) + r, timeout=_exec_timeout) U, s, V = res[1:] np.testing.assert_allclose(res[0], U.dot(np.diag(s).dot(V))) raw = np.random.rand(20, 5) # to test the fuse, the graph should be fused t = mt.array(raw) U, s, V = mt.linalg.svd(t) r = U.dot(mt.diag(s).dot(V)) res = r.execute() np.testing.assert_allclose(raw, res) # test submit part of svd outputs t = mt.array(raw) U, s, V = mt.linalg.svd(t) with new_session(cluster.endpoint) as session2: U_result, s_result = session2.run(U, s, timeout=_exec_timeout) U_expected, s_expectd, _ = np.linalg.svd(raw, full_matrices=False) np.testing.assert_allclose(U_result, U_expected) np.testing.assert_allclose(s_result, s_expectd) with new_session(cluster.endpoint) as session2: U_result, s_result = session2.run(U + 1, s + 1, timeout=_exec_timeout) U_expected, s_expectd, _ = np.linalg.svd(raw, full_matrices=False) np.testing.assert_allclose(U_result, U_expected + 1) np.testing.assert_allclose(s_result, s_expectd + 1) with new_session(cluster.endpoint) as session2: t = mt.array(raw) _, s, _ = mt.linalg.svd(t) del _ s_result = session2.run(s, timeout=_exec_timeout) s_expected = np.linalg.svd(raw, full_matrices=False)[1] np.testing.assert_allclose(s_result, s_expected)
def testSparse(self): import scipy.sparse as sps with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session # calculate sparse with no element in matrix a = sps.csr_matrix((10000, 10000)) b = sps.csr_matrix((10000, 1)) t1 = mt.tensor(a) t2 = mt.tensor(b) session.run(t1 * t2)
def testLocalClusterWithWeb(self): with new_cluster(scheduler_n_process=2, worker_n_process=3, web=True) as cluster: with cluster.session as session: t = mt.ones((3, 3), chunk_size=2) result = session.run(t) np.testing.assert_array_equal(result, np.ones((3, 3))) with new_session('http://' + cluster._web_endpoint) as session: t = mt.ones((3, 3), chunk_size=2) result = session.run(t) np.testing.assert_array_equal(result, np.ones((3, 3)))
def testTileContextInLocalCluster(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', modules=[__name__], web=True) as cluster: session = cluster.session raw = np.random.rand(10, 20) data = mt.tensor(raw) session.run(data) data2 = TileWithContextOperand().new_tensor([data], shape=data.shape) result = session.run(data2) np.testing.assert_array_equal(raw * raw.nbytes, result)
def testBoolIndexingExecute(self, *_): mock_nsplits = ((4, 4, 4, 4), ) MarsAPI.get_tensor_nsplits.return_value = mock_nsplits with new_cluster(scheduler_n_process=2, worker_n_process=2) as cluster: a = mt.random.rand(8, 8, chunk_size=4) a[2:6, 2:6] = mt.ones((4, 4)) * 2 b = a[a > 1] self.assertEqual(b.shape, (np.nan, )) cluster.session.run(b, fetch=False) self.assertEqual(b.shape, (16, )) c = b.reshape((4, 4)) self.assertEqual(c.shape, (4, 4))
def testMutableTensorString(self): def testWithGivenSession(session): from mars.tensor.core import mutable_tensor # simple dtype. mut1 = mutable_tensor("test", (4, ), dtype='<U16', chunk_size=3) mut1[0] = 'a' mut1[1] = 'bb' mut1[2] = 'cccc' mut1[3] = 'dddddddd' arr1 = mut1.seal() expected = np.empty((4, ), dtype='<U16') expected[0] = 'a' expected[1] = 'bb' expected[2] = 'cccc' expected[3] = 'dddddddd' np.testing.assert_array_equal(session.fetch(arr1), expected) # structured array that contains string dtype = np.dtype([('x', np.int32), ('y', '<U16')]) mut2 = mutable_tensor("test", (4, ), dtype=dtype, chunk_size=3) mut2[0] = (0, 'a') mut2[1] = (1, 'bb') mut2[2] = (2, 'cccc') mut2[3] = (3, 'dddddddd') arr2 = mut2.seal() expected = np.empty((4, ), dtype=dtype) expected[0] = (0, 'a') expected[1] = (1, 'bb') expected[2] = (2, 'cccc') expected[3] = (3, 'dddddddd') np.testing.assert_array_equal(session.fetch(arr2), expected) with new_session().as_default() as session: testWithGivenSession(session) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session.as_default() testWithGivenSession(session) with new_session( 'http://' + cluster._web_endpoint).as_default() as web_session: testWithGivenSession(web_session)
def testFetchDataFrameSlices(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session a = mt.random.rand(10, 10, chunk_size=3) df = md.DataFrame(a) r = session.run(df) r_slice1 = session.fetch(df.iloc[:2]) pd.testing.assert_frame_equal(r.iloc[:2], r_slice1) r_slice2 = session.fetch(df.iloc[2:8, 2:8]) pd.testing.assert_frame_equal(r.iloc[2:8, 2:8], r_slice2) r_slice3 = session.fetch(df.iloc[:, 2:]) pd.testing.assert_frame_equal(r.iloc[:, 2:], r_slice3) r_slice4 = session.fetch(df.iloc[:, -5:]) pd.testing.assert_frame_equal(r.iloc[:, -5:], r_slice4) r_slice5 = session.fetch(df.iloc[4]) pd.testing.assert_series_equal(r.iloc[4], r_slice5) r_slice6 = session.fetch(df.iloc[6:9]) pd.testing.assert_frame_equal(r.iloc[6:9], r_slice6) web_session = new_session('http://' + cluster._web_endpoint) r = web_session.run(df) r_slice1 = web_session.fetch(df.iloc[:2]) pd.testing.assert_frame_equal(r.iloc[:2], r_slice1) r_slice2 = web_session.fetch(df.iloc[2:8, 2:8]) pd.testing.assert_frame_equal(r.iloc[2:8, 2:8], r_slice2) r_slice3 = web_session.fetch(df.iloc[:, 2:]) pd.testing.assert_frame_equal(r.iloc[:, 2:], r_slice3) r_slice4 = web_session.fetch(df.iloc[:, -5:]) pd.testing.assert_frame_equal(r.iloc[:, -5:], r_slice4) r_slice5 = web_session.fetch(df.iloc[4]) pd.testing.assert_series_equal(r.iloc[4], r_slice5) r_slice6 = web_session.fetch(df.iloc[6:9]) pd.testing.assert_frame_equal(r.iloc[6:9], r_slice6)
def testMutableTensorFillValue(self): def testWithGivenSession(session): from mars.tensor.core import mutable_tensor # simple dtype. mut1 = mutable_tensor("test", (4, 5), dtype='double', fill_value=123.456, chunk_size=3) mut1[1:4, 2] = 8 mut1[2:4] = np.arange(10).reshape(2, 5) arr1 = mut1.seal() expected = np.full((4, 5), fill_value=123.456, dtype='double') expected[1:4, 2] = 8 expected[2:4] = np.arange(10).reshape(2, 5) np.testing.assert_array_equal(session.fetch(arr1), expected) # structured dtype, but the `fill_value` cannot be tuple (consistent with np.full). dtype = np.dtype([('x', np.int32), ('y', np.double)]) mut2 = mutable_tensor("test", (4, 5), dtype=dtype, fill_value=123.456, chunk_size=3) mut2[1:4, 2] = (1, 2.) mut2[2:4] = np.arange(10).reshape(2, 5) arr2 = mut2.seal() expected = np.full((4, 5), fill_value=123.456, dtype=dtype) expected[1:4, 2] = (1, 2.) expected[2:4] = np.arange(10).reshape(2, 5) np.testing.assert_array_equal(session.fetch(arr2), expected) with new_session().as_default() as session: testWithGivenSession(session) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session.as_default() testWithGivenSession(session) with new_session( 'http://' + cluster._web_endpoint).as_default() as web_session: testWithGivenSession(web_session)
def testFetchSlices(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session a = mt.random.rand(10, 10, 10, chunk_size=3) r = session.run(a) r_slice1 = session.fetch(a[:2]) np.testing.assert_array_equal(r[:2], r_slice1) r_slice2 = session.fetch(a[2:8, 2:8]) np.testing.assert_array_equal(r[2:8, 2:8], r_slice2) r_slice3 = session.fetch(a[:, 2:]) np.testing.assert_array_equal(r[:, 2:], r_slice3) r_slice4 = session.fetch(a[:, 2:, -5:]) np.testing.assert_array_equal(r[:, 2:, -5:], r_slice4) r_slice5 = session.fetch(a[0]) np.testing.assert_array_equal(r[0], r_slice5) # test repr with np.printoptions(threshold=100): raw = np.random.randint(1000, size=(3, 4, 6)) b = mt.tensor(raw, chunk_size=3) self.assertEqual(repr(b.execute(session=session)), repr(raw)) web_session = new_session('http://' + cluster._web_endpoint) r = web_session.run(a) r_slice1 = web_session.fetch(a[:2]) np.testing.assert_array_equal(r[:2], r_slice1) r_slice2 = web_session.fetch(a[2:8, 2:8]) np.testing.assert_array_equal(r[2:8, 2:8], r_slice2) r_slice3 = web_session.fetch(a[:, 2:]) np.testing.assert_array_equal(r[:, 2:], r_slice3) r_slice4 = web_session.fetch(a[:, 2:, -5:]) np.testing.assert_array_equal(r[:, 2:, -5:], r_slice4) r_slice5 = web_session.fetch(a[4]) np.testing.assert_array_equal(r[4], r_slice5)
def testSingleOutputTensorExecute(self): with new_cluster(scheduler_n_process=2, worker_n_process=2) as cluster: self.assertIs(cluster.session, Session.default_or_local()) t = mt.random.rand(10) r = t.sum() res = r.execute() self.assertTrue(np.isscalar(res)) self.assertLess(res, 10) t = mt.random.rand(10) r = t.sum() * 4 - 1 res = r.execute() self.assertLess(res, 39)
def testTiledTensor(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.ones((10, 10), chunk_size=3) b = a.dot(a) b = b.tiles() r = session.run(b, timeout=_exec_timeout) np.testing.assert_array_equal(r, np.ones((10, 10)) * 10) a = a.tiles() b = a + 1 r = session.run(b, timeout=_exec_timeout) np.testing.assert_array_equal(r, np.ones((10, 10)) + 1)
def testClusterSession(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: sess1 = cluster.session sess2 = new_session(cluster.endpoint, session_id=sess1.session_id) self.assertNotEqual(sess1, sess2) self.assertEqual(sess1.session_id, sess2.session_id) session_id = str(uuid.uuid4()) with self.assertRaises(ValueError) as cm: new_session(cluster.endpoint, session_id=session_id) expected_msg = "The session with id = %s doesn't exist" % session_id self.assertEqual(cm.exception.args[0], expected_msg) sess1.close() with self.assertRaises(ValueError) as cm: new_session(cluster.endpoint, session_id=sess1.session_id) expected_msg = "The session with id = %s doesn't exist" % sess1.session_id self.assertEqual(cm.exception.args[0], expected_msg) web_sess1 = new_session('http://' + cluster._web_endpoint) web_sess2 = new_session('http://' + cluster._web_endpoint, session_id=web_sess1.session_id) self.assertNotEqual(web_sess1, web_sess2) self.assertEqual(web_sess1.session_id, web_sess2.session_id) session_id = str(uuid.uuid4()) with self.assertRaises(ValueError) as cm: new_session('http://' + cluster._web_endpoint, session_id=session_id) expected_msg = "The session with id = %s doesn't exist" % session_id self.assertEqual(cm.exception.args[0], expected_msg) web_sess1.close() with self.assertRaises(ValueError) as cm: new_session('http://' + cluster._web_endpoint, session_id=web_sess1.session_id) expected_msg = "The session with id = %s doesn't exist" % web_sess1.session_id self.assertEqual(cm.exception.args[0], expected_msg)
def testRerunTensor(self): with new_cluster(scheduler_n_process=2, worker_n_process=2) as cluster: session = cluster.session a = mt.ones((10, 10)) + 1 result1 = session.run(a) np.testing.assert_array_equal(result1, np.ones((10, 10)) + 1) result2 = session.run(a) np.testing.assert_array_equal(result1, result2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 10) a_result1 = session2.run(a) b = mt.ones((10, 10)) a_result2, b_result = session2.run(a, b) np.testing.assert_array_equal(a_result1, a_result2) np.testing.assert_array_equal(b_result, np.ones((10, 10)))
def testRemoteFunctionInLocalCluster(self): with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M', modules=[__name__], web=True) as cluster: session = cluster.session def f(x): return x + 1 def g(x, y): return x * y a = mr.spawn(f, 3) b = mr.spawn(f, 4) c = mr.spawn(g, (a, b)) r = session.run(c, timeout=_exec_timeout) self.assertEqual(r, 20) e = mr.spawn(f, mr.spawn(f, 2)) r = session.run(e, timeout=_exec_timeout) self.assertEqual(r, 4) session2 = new_session(cluster.endpoint) expect_session_id = session2.session_id def f2(): session = Session.default assert isinstance(session._sess, ClusterSession) assert session._sess.session_id == expect_session_id t = mt.ones((3, 2)) return t.sum().to_numpy() self.assertEqual( cloudpickle.loads(cloudpickle.dumps( Session.default)).session_id, session.session_id) self.assertIsInstance(serialize_function(f2), bytes) d = mr.spawn(f2, retry_when_fail=False) r = session2.run(d, timeout=_exec_timeout) self.assertEqual(r, 6)
def testTileContextInLocalCluster(self): class FakeOp(TensorAbs): _op_type_ = 9870102948 _multiplier = Int64Field('multiplier') @classmethod def tile(cls, op): context = get_context() self.assertEqual(context.running_mode, RunningMode.local_cluster) inp_chunk = op.inputs[0].chunks[0] inp_size = context.get_chunk_metas([inp_chunk.key ])[0].chunk_size chunk_op = op.copy().reset_key() chunk_op._multiplier = inp_size chunk = chunk_op.new_chunk([inp_chunk], shape=inp_chunk.shape) new_op = op.copy() return new_op.new_tensors(op.inputs, shape=op.outputs[0].shape, order=op.outputs[0].order, nsplits=op.inputs[0].nsplits, chunks=[chunk]) @classmethod def execute(cls, ctx, op): ctx[op.outputs[0].key] = ctx[op.inputs[0].key] * op._multiplier with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session raw = np.random.rand(10, 20) data = mt.tensor(raw) session.run(data) data2 = FakeOp().new_tensor([data], shape=data.shape) result = session.run(data2) np.testing.assert_array_equal(raw * raw.nbytes, result)
def testLearnInLocalCluster(self, *_): from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m)
def testRerunTensor(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.ones((10, 10)) + 1 result1 = session.run(a, timeout=_exec_timeout) np.testing.assert_array_equal(result1, np.ones((10, 10)) + 1) result2 = session.run(a, timeout=_exec_timeout) np.testing.assert_array_equal(result1, result2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 10) a_result1 = session2.run(a, timeout=_exec_timeout) b = mt.ones((10, 10)) a_result2, b_result = session2.run(a, b, timeout=_exec_timeout) np.testing.assert_array_equal(a_result1, a_result2) np.testing.assert_array_equal(b_result, np.ones((10, 10)))
def testIndexTensorExecute(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = 2 a_splits = mt.split(a, 2) r1, r2 = session.run(a_splits[0], a[idx]) np.testing.assert_array_equal(r1, r2) np.testing.assert_array_equal(r1, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = mt.ones((5, 5)) * 2 r = session2.run(a[idx]) np.testing.assert_array_equal(r, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session3: a = mt.random.rand(100, 5) slice1 = a[:10] slice2 = a[10:20] r1, r2, expected = session3.run(slice1, slice2, a) np.testing.assert_array_equal(r1, expected[:10]) np.testing.assert_array_equal(r2, expected[10:20]) with new_session(cluster.endpoint) as session4: a = mt.random.rand(100, 5) a[:10] = mt.ones((10, 5)) a[10:20] = 2 r = session4.run(a) np.testing.assert_array_equal(r[:10], np.ones((10, 5))) np.testing.assert_array_equal(r[10:20], np.ones((10, 5)) * 2)
def testMutableTensorSeal(self): def testWithGivenSession(session): mut = session.create_mutable_tensor("test", (4, 5), dtype='int32', chunk_size=3) mut[1:4, 2] = 8 mut[2:4] = np.arange(10).reshape(2, 5) mut[1] = np.arange(5) arr = mut.seal() expected = np.zeros((4, 5), dtype='int32') expected[1:4, 2] = 8 expected[2:4] = np.arange(10).reshape(2, 5) expected[1] = np.arange(5) # check chunk properties for chunk1, chunk2 in zip(mut.chunks, arr.chunks): self.assertEqual(chunk1.key, chunk2.key) self.assertEqual(chunk1.index, chunk2.index) self.assertEqual(chunk1.shape, chunk2.shape) self.assertEqual(chunk1.dtype, chunk2.dtype) # check value np.testing.assert_array_equal(session.fetch(arr), expected) # check operations on the sealed tensor np.testing.assert_array_equal(session.run(arr + 1), expected + 1) np.testing.assert_array_equal(session.run(arr + arr), expected + expected) np.testing.assert_array_equal(session.run(arr.sum()), expected.sum()) with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session.as_default() testWithGivenSession(session) with new_session( 'http://' + cluster._web_endpoint).as_default() as web_session: testWithGivenSession(web_session)
def testStoreHDF5ForLocalCluster(self): with new_cluster(worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session raw = np.random.RandomState(0).rand(10, 20) t = mt.tensor(raw, chunk_size=11) dataset = 'test_dataset' with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, 'test_read_{}.hdf5'.format(int(time.time()))) r = mt.tohdf5(filename, t, dataset=dataset) session.run(r, timeout=_exec_timeout) with h5py.File(filename, 'r') as f: result = np.asarray(f[dataset]) np.testing.assert_array_equal(result, raw)