def testDataSource(self): from mars.tensor.base.broadcast_to import TensorBroadcastTo data = np.random.random((10, 3)) t = tensor(data, chunk_size=2) self.assertFalse(t.op.gpu) t = t.tiles() self.assertTrue((t.chunks[0].op.data == data[:2, :2]).all()) self.assertTrue((t.chunks[1].op.data == data[:2, 2:3]).all()) self.assertTrue((t.chunks[2].op.data == data[2:4, :2]).all()) self.assertTrue((t.chunks[3].op.data == data[2:4, 2:3]).all()) self.assertEqual(t.key, tensor(data, chunk_size=2).tiles().key) self.assertNotEqual(t.key, tensor(data, chunk_size=3).tiles().key) self.assertNotEqual( t.key, tensor(np.random.random((10, 3)), chunk_size=2).tiles().key) t = tensor(data, chunk_size=2, gpu=True) t = t.tiles() self.assertTrue(t.op.gpu) self.assertTrue(t.chunks[0].op.gpu) t = full((2, 2), 2, dtype='f4') self.assertFalse(t.op.gpu) self.assertEqual(t.shape, (2, 2)) self.assertEqual(t.dtype, np.float32) t = full((2, 2), [1.0, 2.0], dtype='f4') self.assertEqual(t.shape, (2, 2)) self.assertEqual(t.dtype, np.float32) self.assertIsInstance(t.op, TensorBroadcastTo) with self.assertRaises(ValueError): full((2, 2), [1.0, 2.0, 3.0], dtype='f4')
def testFaissNearestNeighborsExecution(self): rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) # test faiss execution X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2') nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2') snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0], decimal=6) np.testing.assert_almost_equal(result[1], expected[1]) # test return_distance=False ret = nn.kneighbors(Y, return_distance=False) result = ret.fetch() np.testing.assert_almost_equal(result, expected[1]) # test y is x ret = nn.kneighbors() expected = snn.kneighbors() result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0], decimal=5) np.testing.assert_almost_equal(result[1], expected[1])
def testCopyAndView(self): data = np.random.rand(10, 20) a = tensor(data, chunk_size=6) b = a.view() b[:5] = 10 npa = data.copy() npb = npa.view() npb[:5] = 10 np.testing.assert_array_equal(b.execute(), npb) np.testing.assert_array_equal(a.execute(), npa) data = np.random.rand(10, 20) a = tensor(data, chunk_size=6) b = a.copy() b[:5] = 10 npa = data.copy() npb = npa.copy() npb[:5] = 10 np.testing.assert_array_equal(b.execute(), npb) np.testing.assert_array_equal(a.execute(), npa)
def testDataFrameCreate(self): sess = new_session() tensor = mt.ones((2, 2)) df = md.DataFrame(tensor) df_result = sess.run(df) df2 = md.DataFrame(df) df2 = sess.run(df2) np.testing.assert_equal(df_result.values, np.ones((2, 2))) pd.testing.assert_frame_equal(df_result, df2) raw_a = np.random.rand(10) raw_b = np.random.randint(1000, size=10) df = md.DataFrame({ 'a': mt.tensor(raw_a), 'b': mt.tensor(raw_b) }, columns=['b', 'a']) df_result = sess.run(df) pd.testing.assert_frame_equal( df_result, pd.DataFrame({ 'a': raw_a, 'b': raw_b }, columns=['b', 'a']))
def testGraphFail(self, *_): op = SerializeMustFailOperand(f=3) tensor = op.new_tensor(None, (3, 3)) try: raise ValueError except: # noqa: E722 exc = sys.exc_info()[1] with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', modules=[__name__], options={'scheduler.retry_num': 1}) as cluster: with self.assertRaises(ExecutionFailed): try: cluster.session.run(tensor, timeout=_exec_timeout) except ExecutionFailed as ex: self.assertIsInstance(ex.__cause__, TypeError) raise data = mt.tensor(np.random.rand(10, 20)) data2 = TileFailOperand(_exc_serial=pickle.dumps(exc)).new_tensor([data], shape=data.shape) with self.assertRaises(ExecutionFailed): try: cluster.session.run(data2) except ExecutionFailed as ex: self.assertIsInstance(ex.__cause__, ValueError) raise data = mt.tensor(np.random.rand(20, 10)) data2 = ExecFailOperand(_exc_serial=pickle.dumps(exc)).new_tensor([data], shape=data.shape) with self.assertRaises(ExecutionFailed): try: cluster.session.run(data2) except ExecutionFailed as ex: self.assertIsInstance(ex.__cause__, ValueError) raise
def testViewDataOnSlice(self): data = np.random.rand(10, 20) a = tensor(data, chunk_size=6) b = a[:5, 5:10] b[:3, :3] = 3 npa = data.copy() npb = npa[:5, 5:10] npb[:3, :3] = 3 np.testing.assert_array_equal(b.execute(), npb) np.testing.assert_array_equal(a.execute(), npa) data = np.random.rand(10, 20) a = tensor(data, chunk_size=6) b = a[:7] b += 1 npa = data.copy() npb = npa[:7] npb += 1 np.testing.assert_array_equal(b.execute(), npb) np.testing.assert_array_equal(a.execute(), npa)
def testInsert(self): rs = np.random.RandomState(0) raw = pd.DataFrame(rs.rand(10, 4), columns=['A', 'B', 'C', 'D']) with self.assertRaises(ValueError): tensor = mt.tensor(rs.rand(10, 10), chunk_size=4) df = md.DataFrame(raw.copy(deep=True), chunk_size=3) df.insert(4, 'E', tensor) df = md.DataFrame(raw.copy(deep=True), chunk_size=3) df.insert(4, 'E', 0) raw_dup = raw.copy(deep=True) raw_dup.insert(4, 'E', 0) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df, concat=True)[0], raw_dup) raw_tensor = rs.rand(10) tensor = mt.tensor(raw_tensor, chunk_size=4) df = md.DataFrame(raw.copy(deep=True), chunk_size=3) df.insert(4, 'E', tensor) raw_dup = raw.copy(deep=True) raw_dup.insert(4, 'E', raw_tensor) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df, concat=True)[0], raw_dup)
def testNorm(self): data = np.random.rand(9, 6) a = mt.tensor(data, chunk_size=(2, 6)) for ord in (None, 'nuc', np.inf, -np.inf, 0, 1, -1, 2, -2): for axis in (0, 1, (0, 1)): for keepdims in (True, False): try: res = mt.linalg.norm(a, ord=ord, axis=axis, keepdims=keepdims) expect_shape = np.linalg.norm(data, ord=ord, axis=axis, keepdims=keepdims).shape self.assertEqual(res.shape, expect_shape) self.assertEqual(calc_shape(res), expect_shape) except ValueError: continue
def testSeriesFromTensor(self): data = np.random.rand(10) series = md.Series(mt.tensor(data), name='a') pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a')) series = md.Series(mt.tensor(data, chunk_size=3)) pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data)) series = md.Series(mt.ones((10,), chunk_size=4)) pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0], pd.Series(np.ones(10,))) index_data = np.random.rand(10) series = md.Series(mt.tensor(data, chunk_size=3), name='a', index=mt.tensor(index_data, chunk_size=4)) pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a', index=index_data)) series = md.Series(mt.tensor(data, chunk_size=3), name='a', index=md.date_range('2020-1-1', periods=10)) pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0], pd.Series(data, name='a', index=pd.date_range('2020-1-1', periods=10)))
def testFetchSlices(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session a = mt.random.rand(10, 10, 10, chunk_size=3) r = session.run(a) r_slice1 = session.fetch(a[:2]) np.testing.assert_array_equal(r[:2], r_slice1) r_slice2 = session.fetch(a[2:8, 2:8]) np.testing.assert_array_equal(r[2:8, 2:8], r_slice2) r_slice3 = session.fetch(a[:, 2:]) np.testing.assert_array_equal(r[:, 2:], r_slice3) r_slice4 = session.fetch(a[:, 2:, -5:]) np.testing.assert_array_equal(r[:, 2:, -5:], r_slice4) r_slice5 = session.fetch(a[0]) np.testing.assert_array_equal(r[0], r_slice5) # test repr with np.printoptions(threshold=100): raw = np.random.randint(1000, size=(3, 4, 6)) b = mt.tensor(raw, chunk_size=3) self.assertEqual(repr(b.execute(session=session)), repr(raw)) web_session = new_session('http://' + cluster._web_endpoint) r = web_session.run(a) r_slice1 = web_session.fetch(a[:2]) np.testing.assert_array_equal(r[:2], r_slice1) r_slice2 = web_session.fetch(a[2:8, 2:8]) np.testing.assert_array_equal(r[2:8, 2:8], r_slice2) r_slice3 = web_session.fetch(a[:, 2:]) np.testing.assert_array_equal(r[:, 2:], r_slice3) r_slice4 = web_session.fetch(a[:, 2:, -5:]) np.testing.assert_array_equal(r[:, 2:, -5:], r_slice4) r_slice5 = web_session.fetch(a[4]) np.testing.assert_array_equal(r[4], r_slice5)
def testDistributedContext(self): self.start_processes(etcd=False) session_id = uuid.uuid1() actor_client = new_client() rs = np.random.RandomState(0) context = DistributedContext( scheduler_address=self.scheduler_endpoints[0], session_id=session_id) session_ref = actor_client.actor_ref( self.session_manager_ref.create_session(session_id)) raw1 = rs.rand(10, 10) a = mt.tensor(raw1, chunk_size=4) graph = a.build_graph() targets = [a.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets, names=['test']) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) tileable_key = context.get_tileable_key_by_name('test') self.assertEqual(a.key, tileable_key) nsplits = context.get_tileable_metas([a.key], filter_fields=['nsplits'])[0][0] self.assertEqual(((4, 4, 2), (4, 4, 2)), nsplits) r = context.get_tileable_data(a.key) np.testing.assert_array_equal(raw1, r) indexes = [slice(3, 9), slice(0, 7)] r = context.get_tileable_data(a.key, indexes) np.testing.assert_array_equal(raw1[tuple(indexes)], r) indexes = [[1, 2, 4, 5], slice(None, None, None)] r = context.get_tileable_data(a.key, indexes) np.testing.assert_array_equal(raw1[tuple(indexes)], r) indexed = a[[0, 1, 2, 9], [0, 0, 4, 4]] r = context.get_tileable_data(a.key, indexed.op.indexes) np.testing.assert_array_equal(raw1[[0, 1, 2, 9], [0, 0, 4, 4]], r)
def test_cmdline_run(supervisor_args, worker_args, use_web_addr): new_isolation() sv_proc = w_procs = None try: env = os.environ.copy() env['MARS_CPU_TOTAL'] = '2' sv_args = _reload_args(supervisor_args) sv_proc = subprocess.Popen(sv_args, env=env) oscar_port = _get_labelled_port('supervisor', create=False) if not oscar_port: oscar_ep = _wait_supervisor_ready(sv_proc) else: oscar_ep = f'127.0.0.1:{oscar_port}' if use_web_addr: host = oscar_ep.rsplit(':', 1)[0] api_ep = f'http://{host}:{_get_labelled_port("web", create=False)}' else: api_ep = oscar_ep w_procs = [subprocess.Popen( _reload_args(worker_args), env=env) for _ in range(2)] _wait_worker_ready(oscar_ep, w_procs) new_session(api_ep) data = np.random.rand(10, 10) res = mt.tensor(data, chunk_size=5).sum().execute().fetch() np.testing.assert_almost_equal(res, data.sum()) finally: ep_file_name = OscarCommandRunner._build_endpoint_file_path(pid=sv_proc.pid) try: os.unlink(ep_file_name) except OSError: pass w_procs = w_procs or [] for proc in w_procs + [sv_proc]: if not proc: continue proc.terminate() try: proc.wait(3) except subprocess.TimeoutExpired: kill_process_tree(proc.pid) stop_isolation()
async def test_fault_inject_subtask_processor(fault_cluster, fault_and_exception): fault_type, fault_count, first_run_raises = fault_and_exception name = await create_fault_injection_manager( session_id=fault_cluster.session.session_id, address=fault_cluster.session.address, fault_count=fault_count, fault_type=fault_type) extra_config = {ExtraConfigKey.FAULT_INJECTION_MANAGER_NAME: name} raw = np.random.RandomState(0).rand(10, 10) a = mt.tensor(raw, chunk_size=5) b = a + 1 with first_run_raises: b.execute(extra_config=extra_config)
def testFlat(self): data = np.random.rand(10, 20) a = tensor(data, chunk_size=4) fl = a.flat fl[1:10] = 10 b = fl[10:20] b[0:4] = 20 npa = data.copy() npfl = npa.flat npfl[1:10] = 10 npb = npfl[10:20] npb[0:4] = 20 np.testing.assert_array_equal(b.execute(), npb) np.testing.assert_array_equal(a.execute(), npa)
def testDistributedRunPyTorchScript(self): service_ep = 'http://127.0.0.1:' + self.web_port scheduler_ep = '127.0.0.1:' + self.scheduler_port with new_session(service_ep) as sess: raw = np.random.rand(100, 200) data = mt.tensor(raw, chunk_size=40) data.execute(name='data', session=sess) with DistributedContext(scheduler_address=scheduler_ep, session_id=sess.session_id): dataset = MarsDataset('data') self.assertEqual(len(dataset), 100) sample = [2, 5, 7, 9, 10] r1 = dataset[sample][0] np.testing.assert_array_equal(raw[sample], r1)
def testTileContextInLocalCluster(self): from mars.serialize import dataserializer with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', modules=[__name__], web=True) as cluster: session = cluster.session raw = np.random.rand(10, 20) data_bytes = dataserializer.serialize(raw).total_bytes data = mt.tensor(raw) session.run(data) data2 = TileWithContextOperand().new_tensor([data], shape=data.shape) result = session.run(data2) np.testing.assert_array_equal(raw * data_bytes, result)
def test_params(): raw = np.random.rand(10, 10) a = mt.tensor(raw) a = a[a[0] < 0.5] a = tile(a) c = a.chunks[0] assert any(np.isnan(s) for s in c.params['shape']) c.params = c.get_params_from_data(raw[raw[0] < 0.5]) assert not any(np.isnan(s) for s in c.params['shape']) params = c.params.copy() params.pop('index', None) a.params = params assert np.prod(a.shape) > 0 a.refresh_params()
def testManhattanDistancesExecution(self): raw_x = np.random.rand(20, 5) raw_y = np.random.rand(21, 5) x1 = mt.tensor(raw_x, chunk_size=30) y1 = mt.tensor(raw_y, chunk_size=30) x2 = mt.tensor(raw_x, chunk_size=11) y2 = mt.tensor(raw_y, chunk_size=12) raw_sparse_x = sps.random(20, 5, density=0.4, format='csr', random_state=0) raw_sparse_y = sps.random(21, 5, density=0.3, format='csr', random_state=0) x3 = mt.tensor(raw_sparse_x, chunk_size=30) y3 = mt.tensor(raw_sparse_y, chunk_size=30) x4 = mt.tensor(raw_sparse_x, chunk_size=11) y4 = mt.tensor(raw_sparse_y, chunk_size=12) for x, y, is_sparse in [(x1, y1, False), (x2, y2, False), (x3, y3, True), (x4, y4, True)]: if is_sparse: rx, ry = raw_sparse_x, raw_sparse_y else: rx, ry = raw_x, raw_y sv = [True, False] if not is_sparse else [True] for sum_over_features in sv: d = manhattan_distances(x, y, sum_over_features) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_manhattan_distances(rx, ry, sum_over_features) np.testing.assert_almost_equal(result, expected) d = manhattan_distances(x, sum_over_features=sum_over_features) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_manhattan_distances(rx, sum_over_features=sum_over_features) np.testing.assert_almost_equal(result, expected)
def test_inv(): a = mt.random.randint(1, 10, (20, 20), chunk_size=8) a_inv = tile(mt.linalg.inv(a)) assert a_inv.shape == (20, 20) # test 1 chunk a = mt.random.randint(1, 10, (20, 20), chunk_size=20) a_inv = tile(mt.linalg.inv(a)) assert a_inv.shape == (20, 20) assert len(a_inv.chunks) == 1 assert isinstance(a_inv.chunks[0].op, TensorInv) a = mt.random.randint(1, 10, (20, 20), chunk_size=11) a_inv = tile(mt.linalg.inv(a)) assert a_inv.shape == (20, 20) assert a_inv.nsplits == ((11, 9), (11, 9)) b = a.T.dot(a) b_inv = tile(mt.linalg.inv(b)) assert b_inv.shape == (20, 20) # test sparse data = sps.csr_matrix(np.random.randint(1, 10, (20, 20))) a = mt.tensor(data, chunk_size=10) a_inv = tile(mt.linalg.inv(a)) assert a_inv.shape == (20, 20) assert a_inv.op.sparse is True assert isinstance(a_inv, SparseTensor) assert all(c.is_sparse() for c in a_inv.chunks) is True b = a.T.dot(a) b_inv = tile(mt.linalg.inv(b)) assert b_inv.shape == (20, 20) assert b_inv.op.sparse is True assert isinstance(b_inv, SparseTensor) assert all(c.is_sparse() for c in b_inv.chunks) is True b_inv = tile(mt.linalg.inv(b, sparse=False)) assert b_inv.op.sparse is False assert not all(c.is_sparse() for c in b_inv.chunks) is True
def testSetitem(self): data = pd.DataFrame(np.random.rand(10, 5), columns=['c' + str(i) for i in range(5)], index=['i' + str(i) for i in range(10)]) data2 = np.random.rand(10) df = md.DataFrame(data, chunk_size=3) df['c3'] = df['c3'] + 1 df['c10'] = 10 df[4] = mt.tensor(data2, chunk_size=4) result = self.executor.execute_dataframe(df, concat=True)[0] expected = data.copy() expected['c3'] = expected['c3'] + 1 expected['c10'] = 10 expected[4] = data2 pd.testing.assert_frame_equal(result, expected)
def testTileContextInLocalCluster(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', modules=[__name__], web=True) as cluster: session = cluster.session raw = np.random.rand(10, 20) data = mt.tensor(raw) session.run(data) data2 = FakeOp().new_tensor([data], shape=data.shape) result = session.run(data2) np.testing.assert_array_equal(raw * raw.nbytes, result)
def test_elf(): raw = np.random.rand(10, 8, 5) t = tensor(raw, chunk_size=3) r = erf(t) expect = scipy_erf(raw) assert r.shape == raw.shape assert r.dtype == expect.dtype t, r = tile(t, r) assert r.nsplits == t.nsplits for c in r.chunks: assert isinstance(c.op, TensorErf) assert c.index == c.inputs[0].index assert c.shape == c.inputs[0].shape
def testTensorGraphTiledSerialize(self): t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random( (10, 3)), chunk_size=(5, 2)) graph = t.build_graph(tiled=True) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) chunk = next(c for c in graph if c.inputs) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(sorted(i.key for i in chunk.inputs), sorted(i.key for i in chunk2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) chunk = next(c for c in graph if c.inputs) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(sorted(i.key for i in chunk.inputs), sorted(i.key for i in chunk2.inputs)) t = ones((10, 3), chunk_size=((3, 5, 2), 2)) + 2 graph = t.build_graph(tiled=True) pb = graph.to_pb() graph2 = DAG.from_pb(pb) chunk = next(c for c in graph) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(sorted(i.key for i in chunk.composed), sorted(i.key for i in chunk2.composed)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) chunk = next(c for c in graph) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(sorted(i.key for i in chunk.composed), sorted(i.key for i in chunk2.composed))
def testNamed(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) sess = Session.default_or_local() # test named tensor t = mt.tensor(raw, chunk_size=3) name = 't_name' r1 = t.execute(name=name, session=sess) np.testing.assert_array_equal(r1, raw) t2 = mt.named_tensor(name=name, session=sess) self.assertEqual(t2.order, TensorOrder.C_ORDER) r2 = (t2 + 1).execute(session=sess).fetch() np.testing.assert_array_equal(r2, raw + 1) # test named series name = 's_name' raw = pd.Series([1, 2, 3]) s = md.Series(raw) r1 = s.execute(name=name, session=sess).fetch() pd.testing.assert_series_equal(r1, raw) s2 = md.named_series(name=name, session=sess) self.assertEqual(s2.dtype, s.dtype) pd.testing.assert_index_equal(s2.index_value.to_pandas(), s.index_value.to_pandas()) r2 = s2.execute(session=sess).fetch() pd.testing.assert_series_equal(r2, raw) # test dataframe name = 'd_name' raw = pd.DataFrame(np.random.rand(10, 3)) d = md.DataFrame(raw, chunk_size=4) r1 = d.execute(name=name, session=sess).fetch() pd.testing.assert_frame_equal(r1, raw) d2 = md.named_dataframe(name=name, session=sess) pd.testing.assert_series_equal(d2.dtypes, d.dtypes) pd.testing.assert_index_equal(d2.index_value.to_pandas(), d.index_value.to_pandas()) pd.testing.assert_index_equal(d2.columns_value.to_pandas(), d.columns_value.to_pandas()) r2 = d2.execute(session=sess).fetch() pd.testing.assert_frame_equal(r2, raw)
def testInv(self): a = mt.random.randint(1, 10, (20, 20), chunk_size=4) a_inv = mt.linalg.inv(a).tiles() self.assertEqual(a_inv.shape, (20, 20)) # test 1 chunk a = mt.random.randint(1, 10, (20, 20), chunk_size=20) a_inv = mt.linalg.inv(a).tiles() self.assertEqual(a_inv.shape, (20, 20)) self.assertEqual(len(a_inv.chunks), 1) self.assertIsInstance(a_inv.chunks[0].op, TensorInv) a = mt.random.randint(1, 10, (20, 20), chunk_size=11) a_inv = mt.linalg.inv(a).tiles() self.assertEqual(a_inv.shape, (20, 20)) self.assertEqual(a_inv.nsplits, ((11, 9), (11, 9))) b = a.T.dot(a) b_inv = mt.linalg.inv(b).tiles() self.assertEqual(b_inv.shape, (20, 20)) # test sparse data = sps.csr_matrix(np.random.randint(1, 10, (20, 20))) a = mt.tensor(data, chunk_size=5) a_inv = mt.linalg.inv(a).tiles() self.assertEqual(a_inv.shape, (20, 20)) self.assertTrue(a_inv.op.sparse) self.assertIsInstance(a_inv, SparseTensor) self.assertTrue(all(c.is_sparse() for c in a_inv.chunks)) b = a.T.dot(a) b_inv = mt.linalg.inv(b).tiles() self.assertEqual(b_inv.shape, (20, 20)) self.assertTrue(b_inv.op.sparse) self.assertIsInstance(b_inv, SparseTensor) self.assertTrue(all(c.is_sparse() for c in b_inv.chunks)) b_inv = mt.linalg.inv(b, sparse=False).tiles() self.assertFalse(b_inv.op.sparse) self.assertTrue(not all(c.is_sparse() for c in b_inv.chunks))
def test_is_multilabel(setup): raws = [ [[1, 2]], [0, 1, 0, 1], [[1], [0, 2], []], np.array([[1, 0], [0, 0]]), np.array([[1], [0], [0]]), np.array([[1, 0, 0]]), np.array([[1., 0.], [0., 0.]]), sps.csr_matrix([[1, 0], [0, 1]]), ] for raw in raws: assert is_multilabel(raw).to_numpy() == sklearn_is_multilabel(raw) t = mt.tensor(raws[3], chunk_size=1) assert is_multilabel(t).to_numpy() == sklearn_is_multilabel(raws[3])
def testTensorGraphSerialize(self): t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random((10, 3)), chunk_size=(5, 2)) graph = t.build_graph(tiled=False) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) t = next(c for c in graph if c.inputs) t2 = next(c for c in graph2 if c.key == t.key) self.assertTrue(t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) t = next(c for c in graph if c.inputs) t2 = next(c for c in graph2 if c.key == t.key) self.assertTrue(t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) # test graph with tiled tensor t2 = ones((10, 10), chunk_size=(5, 4)).tiles() graph = DAG() graph.add_node(t2) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], TensorChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], TensorChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
def testFromSpmatrix(self): t = tensor(sps.csr_matrix([[0, 0, 1], [1, 0, 0]], dtype='f8'), chunk_size=2) self.assertIsInstance(t, SparseTensor) self.assertIsInstance(t.op, CSRMatrixDataSource) self.assertTrue(t.issparse()) self.assertFalse(t.op.gpu) t = t.tiles() self.assertEqual(t.chunks[0].index, (0, 0)) self.assertIsInstance(t.op, CSRMatrixDataSource) self.assertFalse(t.op.gpu) m = sps.csr_matrix([[0, 0], [1, 0]]) self.assertTrue(np.array_equal(t.chunks[0].op.indices, m.indices)) self.assertTrue(np.array_equal(t.chunks[0].op.indptr, m.indptr)) self.assertTrue(np.array_equal(t.chunks[0].op.data, m.data)) self.assertTrue(np.array_equal(t.chunks[0].op.shape, m.shape))
def testTileContextInLocalCluster(self): class FakeOp(TensorAbs): _op_type_ = 9870102948 _multiplier = Int64Field('multiplier') @classmethod def tile(cls, op): context = get_context() self.assertEqual(context.running_mode, RunningMode.local_cluster) inp_chunk = op.inputs[0].chunks[0] inp_size = context.get_chunk_metas([inp_chunk.key ])[0].chunk_size chunk_op = op.copy().reset_key() chunk_op._multiplier = inp_size chunk = chunk_op.new_chunk([inp_chunk], shape=inp_chunk.shape) new_op = op.copy() return new_op.new_tensors(op.inputs, shape=op.outputs[0].shape, order=op.outputs[0].order, nsplits=op.inputs[0].nsplits, chunks=[chunk]) @classmethod def execute(cls, ctx, op): ctx[op.outputs[0].key] = ctx[op.inputs[0].key] * op._multiplier with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M', web=True) as cluster: session = cluster.session raw = np.random.rand(10, 20) data = mt.tensor(raw) session.run(data) data2 = FakeOp().new_tensor([data], shape=data.shape) result = session.run(data2) np.testing.assert_array_equal(raw * raw.nbytes, result)
def testGammaln(self): raw = np.random.rand(10, 8, 5) t = tensor(raw, chunk_size=3) r = gammaln(t) expect = scipy_gammaln(raw) self.assertEqual(r.shape, raw.shape) self.assertEqual(r.dtype, expect.dtype) r.tiles() self.assertEqual(r.nsplits, t.nsplits) for c in r.chunks: self.assertIsInstance(c.op, TensorGammaln) self.assertEqual(c.index, c.inputs[0].index) self.assertEqual(c.shape, c.inputs[0].shape)