def testRegister(self): from mars.graph import DAG fake_result = np.random.rand(10, 10) fake_size = (fake_result.nbytes * 2, fake_result.nbytes * 2) def fake_execute(ctx, op): ctx[op.outputs[0].key] = fake_result def fake_estimate(ctx, op): ctx[op.outputs[0].key] = fake_size register(FakeOperand, fake_execute, fake_estimate) graph = DAG() chunk = FakeOperand().new_chunk(None, shape=(10, 10)) graph.add_node(chunk.data) executor = Executor() res = executor.execute_graph(graph, keys=[chunk.key])[0] np.testing.assert_array_equal(res, fake_result) size = executor.execute_graph(graph, keys=[chunk.key], mock=True)[0] self.assertEqual(size, fake_size) graph = DAG() chunk = SubFakeOperand().new_chunk(None, shape=(10, 10)) graph.add_node(chunk.data) executor = Executor() res = executor.execute_graph(graph, keys=[chunk.key])[0] np.testing.assert_array_equal(res, fake_result)
def execute_size(t): def _tensordot_size_recorder(ctx, op): TensorTensorDot.estimate_size(ctx, op) chunk_key = op.outputs[0].key chunk_sizes[chunk_key] = ctx[chunk_key] chunk_nbytes[chunk_key] = op.outputs[0].nbytes input_sizes = dict( (inp.op.key, ctx[inp.key][0]) for inp in op.inputs) chunk_input_sizes[chunk_key] = sum(input_sizes.values()) input_nbytes = dict( (inp.op.key, inp.nbytes) for inp in op.inputs) chunk_input_nbytes[chunk_key] = sum(input_nbytes.values()) size_executor = ExecutorForTest( sync_provider_type=ExecutorForTest.SyncProviderType.MOCK) try: chunk_sizes.clear() chunk_nbytes.clear() chunk_input_sizes.clear() chunk_input_nbytes.clear() register(TensorTensorDot, size_estimator=_tensordot_size_recorder) size_executor.execute_tensor(t, mock=True) finally: register_default(TensorTensorDot)
def testFetch(self): with tempfile.TemporaryDirectory() as tempdir: filename = os.path.join(tempdir, 'test_fetch.csv') pd_df = pd.DataFrame({ 'a': [3, 4, 5, 3, 5, 4, 1, 2, 3], 'b': [1, 3, 4, 5, 6, 5, 4, 4, 4], 'c': list('aabaaddce'), 'd': list('abaaaddce') }) pd_df.to_csv(filename, index=False) df = md.read_csv(filename) df2 = df.groupby('d').agg({'b': 'min'}) expected = pd_df.groupby('d').agg({'b': 'min'}) _ = df2.execute() def _execute_read_csv(*_): # pragma: no cover raise ValueError('cannot run read_csv again') try: register(DataFrameReadCSV, _execute_read_csv) pd.testing.assert_frame_equal(df2.fetch(), expected) pd.testing.assert_frame_equal(df2.iloc[:3].fetch(), expected.iloc[:3]) finally: del Executor._op_runners[DataFrameReadCSV]
def testFetch(self): sess = new_session() arr1 = mt.ones((10, 5), chunk_size=3) r1 = sess.run(arr1) r2 = sess.run(arr1) np.testing.assert_array_equal(r1, r2) executor = sess._sess._executor executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones( (3, 3)) * 2 r3 = sess.run(arr1 + 1) np.testing.assert_array_equal(r3[:3, :3], np.ones((3, 3)) * 3) # rerun to ensure arr1's chunk results still exist r4 = sess.run(arr1 + 1) np.testing.assert_array_equal(r4[:3, :3], np.ones((3, 3)) * 3) arr2 = mt.ones((10, 5), chunk_size=3) r5 = sess.run(arr2) np.testing.assert_array_equal(r5[:3, :3], np.ones((3, 3)) * 2) r6 = sess.run(arr2 + 1) np.testing.assert_array_equal(r6[:3, :3], np.ones((3, 3)) * 3) # test fetch multiple tensors raw = np.random.rand(5, 10) arr1 = mt.ones((5, 10), chunk_size=5) arr2 = mt.tensor(raw, chunk_size=3) arr3 = mt.sum(arr2) sess.run(arr1, arr2, arr3) fetch1, fetch2, fetch3 = sess.fetch(arr1, arr2, arr3) np.testing.assert_array_equal(fetch1, np.ones((5, 10))) np.testing.assert_array_equal(fetch2, raw) np.testing.assert_almost_equal(fetch3, raw.sum()) fetch1, fetch2, fetch3 = sess.fetch([arr1, arr2, arr3]) np.testing.assert_array_equal(fetch1, np.ones((5, 10))) np.testing.assert_array_equal(fetch2, raw) np.testing.assert_almost_equal(fetch3, raw.sum()) raw = np.random.rand(5, 10) arr = mt.tensor(raw, chunk_size=5) s = arr.sum() self.assertAlmostEqual(s.execute().fetch(), raw.sum()) def _execute_ds(*_): # pragma: no cover raise ValueError('cannot run random again') try: register(ArrayDataSource, _execute_ds) self.assertAlmostEqual(s.fetch(), raw.sum()) finally: del Executor._op_runners[ArrayDataSource]
def _inject_execute_data_source(limit, op_cls): def _execute_data_source(ctx, op): op_cls.execute(ctx, op) result = ctx[op.outputs[0].key] if len(result) > limit: raise RuntimeError( 'have data more than expected') # pragma: no cover try: register(op_cls, _execute_data_source) yield finally: del Executor._op_runners[op_cls]
def _raise_iloc(self): def _execute_iloc(*_): # pragma: no cover raise ValueError('cannot run iloc') self.ctx.__enter__() try: register(DataFrameIlocGetItem, _execute_iloc) register(SeriesIlocGetItem, _execute_iloc) yield finally: del Executor._op_runners[DataFrameIlocGetItem] del Executor._op_runners[SeriesIlocGetItem] self.ctx.__exit__(None, None, None)
def _inject_execute_data_source_mixed(limit, usecols, op_cls): def _execute_data_source(ctx, op): # pragma: no cover op_cls.execute(ctx, op) result = ctx[op.outputs[0].key] if not isinstance(usecols, list): if not isinstance(result, pd.Series): raise RuntimeError('Out data should be a Series') elif len(result.columns) > len(usecols): raise RuntimeError('have data more than expected') if len(result) > limit: raise RuntimeError('have data more than expected') try: register(op_cls, _execute_data_source) yield finally: del Executor._op_runners[op_cls]
def _inject_execute_data_source_usecols(usecols, op_cls): def _execute_data_source(ctx, op): # pragma: no cover op_cls.execute(ctx, op) result = ctx[op.outputs[0].key] if not isinstance(usecols, list): if not isinstance(result, pd.Series): raise RuntimeError('Out data should be a Series, ' f'got {type(result)}') elif len(result.columns) > len(usecols): params = dict((k, getattr(op, k, None)) for k in op._keys_ if k not in op._no_copy_attrs_) raise RuntimeError( f'have data more than expected, got {result.columns}, ' f'result {result}, op params {params}') try: register(op_cls, _execute_data_source) yield finally: del Executor._op_runners[op_cls]
def post_create(self): register(FakeOperand, fake_execution_maker(self.ctx))
except ImportError: create_mars_cluster = None to_mars_dataframe = None persist_mars_dataframe = None run_mars_script = None run_mars_job = None list_mars_instances = None try: from . import dataframe except ImportError: dataframe = None try: from . import tensor except ImportError: tensor = None try: from mars.executor import register from mars.remote.core import RemoteFunction from .core import execute_with_odps_context from .run_script import RunScript register(RemoteFunction, execute_with_odps_context(RemoteFunction.execute)) register(RunScript, execute_with_odps_context(RunScript.execute)) except ImportError: pass INTERNAL_PATTERN = '\/[^\.]+\.[^\.-]+\.[^\.-]+\-[^\.-]+\.'
try: import xgboost from mars.learn.contrib.xgboost.start_tracker import StartTracker except ImportError: xgboost = None if xgboost and os.environ.get('TEST_START_TRACKER') == '1': def _patch_start_tracker_estimator(ctx, op: StartTracker): op.estimate_size(ctx, op) estimated_size = ctx[op.outputs[0].key] assert estimated_size[0] == estimated_size[1] == calc_data_size( op.outputs[0]) register(StartTracker, StartTracker.execute, _patch_start_tracker_estimator) @unittest.skipIf(xgboost is None, 'xgboost not installed') class Test(IntegrationTestBase): def setUp(self): n_rows = 1000 n_columns = 10 chunk_size = 20 rs = mt.random.RandomState(0) self.X = rs.rand(n_rows, n_columns, chunk_size=chunk_size) self.y = rs.rand(n_rows, chunk_size=chunk_size) super().setUp() @property def _extra_worker_options(self):
# Copyright 1999-2020 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from mars.dataframe.indexing.iloc import DataFrameIlocGetItem from mars.executor import register def _execute_iloc(*_): # pragma: no cover raise ValueError('cannot run iloc') register(DataFrameIlocGetItem, _execute_iloc)