def testLocalClusterError(self, *_): with option_context({'scheduler.retry_num': 1}): with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M', web=True) as cluster: # Note that it is nested exception and we want to check the message # of the inner exeception, thus assertRaises won't work. with cluster.session as session: t = mt.array(["1", "2", "3", "4"]) try: session.run(t + 1) except: # noqa: E722 etype, exp, tb = sys.exc_info() self.assertEqual(etype, ExecutionFailed) self.assertIsInstance(exp, ExecutionFailed) formatted_tb = '\n'.join(traceback.format_exception(etype, exp, tb)) self.assertIn('TypeError', formatted_tb) self.assertIn('ufunc', formatted_tb) self.assertIn('add', formatted_tb) self.assertIn('signature matching types', formatted_tb) with new_session('http://' + cluster._web_endpoint) as session: t = mt.array(["1", "2", "3", "4"]) try: session.run(t + 1) except: # noqa: E722 etype, exp, tb = sys.exc_info() self.assertEqual(etype, ExecutionFailed) self.assertIsInstance(exp, ExecutionFailed) formatted_tb = '\n'.join(traceback.format_exception(etype, exp, tb)) self.assertIn('TypeError', formatted_tb) self.assertIn('ufunc', formatted_tb) self.assertIn('add', formatted_tb) self.assertIn('signature matching types', formatted_tb)
def testCommonOperandFailover(self): delay_file = self.add_state_file('OP_DELAY_STATE_FILE') open(delay_file, 'w').close() terminate_file = self.add_state_file('OP_TERMINATE_STATE_FILE') self.start_processes( modules=['mars.scheduler.tests.integrated.op_delayer'], log_worker=True) np_a = np.random.random((100, 100)) np_b = np.random.random((100, 100)) a = mt.array(np_a, chunk_size=30) * 2 + 1 b = mt.array(np_b, chunk_size=30) * 2 + 1 c = a.dot(b) * 2 + 1 future = self._submit_tileable(c) while not os.path.exists(terminate_file): time.sleep(0.01) self.kill_process_tree(self.proc_workers[0]) logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid) self.proc_workers = self.proc_workers[1:] os.unlink(delay_file) result = future.result(timeout=self.timeout) expected = (np_a * 2 + 1).dot(np_b * 2 + 1) * 2 + 1 assert_allclose(result, expected)
def testFromTensor(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = from_tensor(tensor) self.assertIsInstance(df.index_value._index_value, IndexValue.RangeIndex) self.assertEqual( df.op.dtypes[0], tensor.dtype, 'DataFrame converted from tensor have the wrong dtype') df.tiles() self.assertEqual(len(df.chunks), 4) self.assertIsInstance(df.chunks[0].index_value._index_value, IndexValue.RangeIndex) self.assertIsInstance(df.chunks[0].index_value, IndexValue) # test converted from 1-d tensor tensor2 = mt.array([1, 2, 3]) # in fact, tensor3 is (3,1) tensor3 = mt.array([tensor2]).T df2 = from_tensor(tensor2) df3 = from_tensor(tensor3) df2.tiles() df3.tiles() np.testing.assert_equal(df2.chunks[0].index, (0, 0)) np.testing.assert_equal(df3.chunks[0].index, (0, 0)) # test converted from scalar scalar = mt.array(1) np.testing.assert_equal(scalar.ndim, 0) with self.assertRaises(TypeError): from_tensor(scalar)
def testFailoverDisabled(self): delay_file = self.add_state_file('OP_DELAY_STATE_FILE') open(delay_file, 'w').close() terminate_file = self.add_state_file('OP_TERMINATE_STATE_FILE') self.start_processes( modules=['mars.scheduler.tests.integrated.op_delayer'], scheduler_args=['--disable-failover'], log_worker=True) np_a = np.random.random((100, 100)) np_b = np.random.random((100, 100)) a = mt.array(np_a, chunk_size=30) * 2 + 1 b = mt.array(np_b, chunk_size=30) * 2 + 1 c = a.dot(b) * 2 + 1 future = self._submit_tileable(c) while not os.path.exists(terminate_file): time.sleep(0.01) self.kill_process_tree(self.proc_workers[0]) logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid) self.proc_workers = self.proc_workers[1:] os.unlink(delay_file) try: future.result(timeout=self.timeout) except ExecutionFailed as ex: self.assertIsInstance(ex.__cause__, WorkerDead) else: raise AssertionError('ExecutionFailed not raised')
def testWorkerFailOver(self): def kill_process_tree(proc): import psutil proc = psutil.Process(proc.pid) plasma_sock_dir = None for p in proc.children(recursive=True): if 'plasma' in p.name(): socks = [ conn.laddr for conn in p.connections('unix') if 'plasma' in conn.laddr ] if socks: plasma_sock_dir = os.path.dirname(socks[0]) p.kill() proc.kill() if plasma_sock_dir: shutil.rmtree(plasma_sock_dir, ignore_errors=True) delay_file = self.add_state_file('DELAY_STATE_FILE') open(delay_file, 'w').close() terminate_file = self.add_state_file('TERMINATE_STATE_FILE') self.start_processes(modules=['mars.scheduler.tests.op_delayer'], log_worker=True) session_id = uuid.uuid1() actor_client = new_client() session_ref = actor_client.actor_ref( self.session_manager_ref.create_session(session_id)) np_a = np.random.random((100, 100)) np_b = np.random.random((100, 100)) a = mt.array(np_a, chunk_size=30) * 2 + 1 b = mt.array(np_b, chunk_size=30) * 2 + 1 c = a.dot(b) * 2 + 1 graph = c.build_graph() targets = [c.key] graph_key = uuid.uuid1() session_ref.submit_tensor_graph(json.dumps(graph.to_json()), graph_key, target_tensors=targets) while not os.path.exists(terminate_file): actor_client.sleep(0.05) kill_process_tree(self.proc_workers[0]) logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid) self.proc_workers = self.proc_workers[1:] os.unlink(delay_file) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, c.key) expected = (np_a * 2 + 1).dot(np_b * 2 + 1) * 2 + 1 assert_allclose(loads(result), expected)
def testFromTensor(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = from_tensor(tensor) self.assertIsInstance(df.index_value._index_value, IndexValue.RangeIndex) self.assertEqual( df.op.dtypes[0], tensor.dtype, 'DataFrame converted from tensor have the wrong dtype') df.tiles() self.assertEqual(len(df.chunks), 4) self.assertIsInstance(df.chunks[0].index_value._index_value, IndexValue.RangeIndex) self.assertIsInstance(df.chunks[0].index_value, IndexValue) # test converted from 1-d tensor tensor2 = mt.array([1, 2, 3]) # in fact, tensor3 is (3,1) tensor3 = mt.array([tensor2]).T df2 = from_tensor(tensor2) df3 = from_tensor(tensor3) df2.tiles() df3.tiles() np.testing.assert_equal(df2.chunks[0].index, (0, 0)) np.testing.assert_equal(df3.chunks[0].index, (0, 0)) # test converted from scalar scalar = mt.array(1) np.testing.assert_equal(scalar.ndim, 0) with self.assertRaises(TypeError): from_tensor(scalar) # from tensor with given index df = from_tensor(tensor, index=np.arange(0, 20, 2)) df.tiles() pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2))) pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2))) pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2))) pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2))) # from tensor with given columns df = from_tensor(tensor, columns=list('abcdefghij')) df.tiles() pd.testing.assert_index_equal(df.chunks[0].columns.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[1].columns.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j'])) pd.testing.assert_index_equal(df.chunks[2].columns.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e'])) pd.testing.assert_index_equal(df.chunks[3].columns.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j']))
def testWorkerFailOver(self): def kill_process_tree(p): import psutil proc = psutil.Process(p.pid) for p in proc.children(recursive=True): p.kill() proc.kill() import tempfile delay_file = os.environ['DELAY_STATE_FILE'] = os.path.join( tempfile.gettempdir(), 'test-main-delay-%d-%d' % (os.getpid(), id(self))) open(delay_file, 'w').close() terminate_file = os.environ['TERMINATE_STATE_FILE'] = os.path.join( tempfile.gettempdir(), 'test-main-terminate-%d-%d' % (os.getpid(), id(self))) self.start_processes(modules=['mars.scheduler.tests.op_delayer'], log_worker=True) session_id = uuid.uuid1() actor_client = new_client() session_ref = actor_client.actor_ref( self.session_manager_ref.create_session(session_id)) np_a = np.random.random((100, 100)) np_b = np.random.random((100, 100)) a = mt.array(np_a, chunk_size=30) * 2 + 1 b = mt.array(np_b, chunk_size=30) * 2 + 1 c = a.dot(b) * 2 + 1 graph = c.build_graph() targets = [c.key] graph_key = uuid.uuid1() session_ref.submit_tensor_graph(json.dumps(graph.to_json()), graph_key, target_tensors=targets) while not os.path.exists(terminate_file): actor_client.sleep(0.05) os.unlink(terminate_file) # actor_client.sleep(1.2) kill_process_tree(self.proc_workers[0]) logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid) self.proc_workers = self.proc_workers[1:] os.unlink(delay_file) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, c.key) expected = (np_a * 2 + 1).dot(np_b * 2 + 1) * 2 + 1 assert_allclose(loads(result), expected)
def testR_(self): r = mt.r_[mt.array([1, 2, 3]), 0, 0, mt.array([4, 5, 6])] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_[np.array([1, 2, 3]), 0, 0, np.array([4, 5, 6])] np.testing.assert_array_equal(result, expected) r = mt.r_[-1:1:6j, [0] * 3, 5, 6] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_[-1:1:6j, [0] * 3, 5, 6] np.testing.assert_array_equal(result, expected) r = mt.r_[-1:1:6j] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_[-1:1:6j] np.testing.assert_array_equal(result, expected) raw = [[0, 1, 2], [3, 4, 5]] a = mt.array(raw, chunk_size=2) r = mt.r_['-1', a, a] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['-1', raw, raw] np.testing.assert_array_equal(result, expected) r = mt.r_['0,2', [1, 2, 3], [4, 5, 6]] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['0,2', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) r = mt.r_['0,2,0', [1, 2, 3], [4, 5, 6]] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['0,2,0', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) r = mt.r_['1,2,0', [1, 2, 3], [4, 5, 6]] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.r_['1,2,0', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) self.assertEqual(len(mt.r_), 0) with self.assertRaises(ValueError): _ = mt.r_[:3, 'wrong']
def test_infer_dim_3(self): n, p = 100, 5 rng = np.random.RandomState(0) X = mt.tensor(rng.randn(n, p) * .1) X[:10] += mt.array([3, 4, 5, 1, 2]) X[10:20] += mt.array([6, 0, 7, 2, -1]) X[30:40] += 2 * mt.array([-1, 1, -1, 1, -1]) pca = PCA(n_components=p, svd_solver='full') pca.fit(X) spect = pca.explained_variance_.fetch() self.assertGreater(_infer_dimension(spect, n), 2)
def testMultipleOutputTensorExecute(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session t = mt.random.rand(20, 5, chunk_size=5) r = mt.linalg.svd(t) res = session.run((t, ) + r, timeout=_exec_timeout) U, s, V = res[1:] np.testing.assert_allclose(res[0], U.dot(np.diag(s).dot(V))) raw = np.random.rand(20, 5) # to test the fuse, the graph should be fused t = mt.array(raw) U, s, V = mt.linalg.svd(t) r = U.dot(mt.diag(s).dot(V)) res = r.execute() np.testing.assert_allclose(raw, res) # test submit part of svd outputs t = mt.array(raw) U, s, V = mt.linalg.svd(t) with new_session(cluster.endpoint) as session2: U_result, s_result = session2.run(U, s, timeout=_exec_timeout) U_expected, s_expectd, _ = np.linalg.svd(raw, full_matrices=False) np.testing.assert_allclose(U_result, U_expected) np.testing.assert_allclose(s_result, s_expectd) with new_session(cluster.endpoint) as session2: U_result, s_result = session2.run(U + 1, s + 1, timeout=_exec_timeout) U_expected, s_expectd, _ = np.linalg.svd(raw, full_matrices=False) np.testing.assert_allclose(U_result, U_expected + 1) np.testing.assert_allclose(s_result, s_expectd + 1) with new_session(cluster.endpoint) as session2: t = mt.array(raw) _, s, _ = mt.linalg.svd(t) del _ s_result = session2.run(s, timeout=_exec_timeout) s_expected = np.linalg.svd(raw, full_matrices=False)[1] np.testing.assert_allclose(s_result, s_expected)
def test_r_(setup): r = mt.r_[mt.array([1, 2, 3]), 0, 0, mt.array([4, 5, 6])] result = r.execute().fetch() expected = np.r_[np.array([1, 2, 3]), 0, 0, np.array([4, 5, 6])] np.testing.assert_array_equal(result, expected) r = mt.r_[-1:1:6j, [0]*3, 5, 6] result = r.execute().fetch() expected = np.r_[-1:1:6j, [0]*3, 5, 6] np.testing.assert_array_equal(result, expected) r = mt.r_[-1:1:6j] result = r.execute().fetch() expected = np.r_[-1:1:6j] np.testing.assert_array_equal(result, expected) raw = [[0, 1, 2], [3, 4, 5]] a = mt.array(raw, chunk_size=2) r = mt.r_['-1', a, a] result = r.execute().fetch() expected = np.r_['-1', raw, raw] np.testing.assert_array_equal(result, expected) r = mt.r_['0,2', [1, 2, 3], [4, 5, 6]] result = r.execute().fetch() expected = np.r_['0,2', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) r = mt.r_['0,2,0', [1, 2, 3], [4, 5, 6]] result = r.execute().fetch() expected = np.r_['0,2,0', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) r = mt.r_['1,2,0', [1, 2, 3], [4, 5, 6]] result = r.execute().fetch() expected = np.r_['1,2,0', [1, 2, 3], [4, 5, 6]] np.testing.assert_array_equal(result, expected) assert len(mt.r_) == 0 with pytest.raises(ValueError): _ = mt.r_[:3, 'wrong']
def test_infer_dim_2(self): # TODO: explain what this is testing # Or at least use explicit variable names... n, p = 1000, 5 rng = np.random.RandomState(0) X = mt.tensor(rng.randn(n, p) * .1) X[:10] += mt.array([3, 4, 5, 1, 2]) X[10:20] += mt.array([6, 0, 7, 2, -1]) pca = PCA(n_components=p, svd_solver='full') pca.fit(X) spect = pca.explained_variance_.fetch() self.assertGreater(_infer_dimension(spect, n), 1)
def test_infer_dim_1(setup): # TODO: explain what this is testing # Or at least use explicit variable names... n, p = 1000, 5 rng = np.random.RandomState(0) X = (mt.tensor(rng.randn(n, p)) * .1 + mt.tensor(rng.randn(n, 1)) * mt.array([3, 4, 5, 1, 2]) + mt.array([1, 0, 7, 4, 6])) pca = PCA(n_components=p, svd_solver='full') pca.fit(X) spect = pca.explained_variance_.to_numpy() ll = np.array([_assess_dimension(spect, k, n) for k in range(1, p)]) assert ll[1] > ll.max() - .01 * n
def testApi(self): service_ep = 'http://127.0.0.1:' + self.web_port with new_session(service_ep) as sess: self.assertEqual(sess.count_workers(), 1) a = mt.ones((100, 100), chunk_size=30) b = mt.ones((100, 100), chunk_size=30) c = a.dot(b) value = sess.run(c) assert_array_equal(value, np.ones((100, 100)) * 100) value2 = sess.run(c) assert_array_equal(value, value2) # todo this behavior may change when eager mode is introduced with self.assertRaises(ExecutionFailed): sess.run(c + 1) va = np.random.randint(0, 10000, (100, 100)) vb = np.random.randint(0, 10000, (100, 100)) a = mt.array(va, chunk_size=30) b = mt.array(vb, chunk_size=30) c = a.dot(b) value = sess.run(c, timeout=120) assert_array_equal(value, va.dot(vb)) graphs = sess.get_graph_states() # check web UI requests res = requests.get(service_ep) self.assertEqual(res.status_code, 200) res = requests.get('%s/task' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/scheduler' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/scheduler?endpoint=127.0.0.1:%s' % (service_ep, self.scheduler_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker' % (service_ep, )) self.assertEqual(res.status_code, 200) res = requests.get('%s/worker?endpoint=127.0.0.1:%s' % (service_ep, self.worker_port)) self.assertEqual(res.status_code, 200) res = requests.get('%s/task' % (service_ep, )) self.assertEqual(res.status_code, 200) task_id = next(iter(graphs.keys())) res = requests.get('%s/task?session_id=%s&task_id=%s' % (service_ep, sess._session_id, task_id)) self.assertEqual(res.status_code, 200)
def test_randomized_pca_check_projection(self): # Test that the projection by randomized PCA on dense data is correct rng = np.random.RandomState(0) n, p = 100, 3 X = mt.tensor(rng.randn(n, p) * .1) X[:10] += mt.array([3, 4, 5]) Xt = 0.1 * mt.tensor(rng.randn(1, p)) + mt.array([3, 4, 5]) Yt = PCA(n_components=2, svd_solver='randomized', random_state=0).fit(X).transform(Xt) Yt /= np.sqrt((Yt**2).sum()) assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1., 1)
def test_pca_check_projection(setup): # Test that the projection of data is correct rng = np.random.RandomState(0) n, p = 100, 3 X = mt.tensor(rng.randn(n, p) * .1) X[:10] += mt.array([3, 4, 5]) Xt = 0.1 * mt.tensor(rng.randn(1, p)) + mt.array([3, 4, 5]) for solver in solver_list: Yt = PCA(n_components=2, svd_solver=solver).fit(X).transform(Xt) Yt /= mt.sqrt((Yt ** 2).sum()) assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1., 1)
def testKMeansInit(self): # non centered, sparse centers to check the centers = np.array([ [0.0, 5.0, 0.0, 0.0, 0.0], [1.0, 1.0, 4.0, 0.0, 0.0], [1.0, 0.0, 0.0, 5.0, 1.0], ]) n_samples = 100 n_clusters, n_features = centers.shape X, true_labels = make_blobs(n_samples=n_samples, centers=centers, cluster_std=1., random_state=42) X_csr = sp.csr_matrix(X) for data in [X, X_csr]: for init in ['random', 'k-means++', 'k-means||', centers.copy()]: data = mt.tensor(data, chunk_size=50) km = KMeans(init=init, n_clusters=n_clusters, random_state=42, n_init=1, algorithm='elkan') km.fit(data) self._check_fitted_model(km, n_clusters, n_features, true_labels) X = mt.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) kmeans = KMeans(n_clusters=2, random_state=0, n_init=1, init='k-means||').fit(X) self.assertEqual(sorted(kmeans.cluster_centers_.fetch().tolist()), sorted([[10., 2.], [1., 2.]]))
def testLearnInLocalCluster(self, *_): from mars.learn.cluster import KMeans from mars.learn.neighbors import NearestNeighbors from sklearn.cluster import KMeans as SK_KMEANS from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) raw = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) X = mt.array(raw) kmeans = KMeans(n_clusters=2, random_state=0, init='k-means++').fit(X) sk_km_elkan = SK_KMEANS(n_clusters=2, random_state=0, init='k-means++').fit(raw) np.testing.assert_allclose(kmeans.cluster_centers_, sk_km_elkan.cluster_centers_)
def testSendTargets(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=WorkerDistributor(2)) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) result_key = result_tensor.chunks[0].key pool.create_actor(MockSenderActor, mock_data + np.ones((4,)), 'out', uid='w:mock_sender') with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, send_addresses={result_key: (pool_address,)}, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testPrepareSpilled(self): from mars.worker.spill import write_spill_file pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-') with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(SpillActor) pool.create_actor(CpuCalcActor) cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid()) chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref) pool.actor_ref(ChunkHolderActor.default_uid()) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) # test meta missing with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) write_spill_file(modified_chunk.key, mock_data) # test read from spilled file with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testDataFrameTensorConvert(self): # test from_tensor(), from_dataframe(), to_tensor(), to_dataframe() sess = new_session() tensor = mt.ones((2, 2)) df = tensor.to_dataframe() np.testing.assert_equal(sess.run(df), np.ones((2, 2))) tensor2 = mt.from_dataframe(df) np.testing.assert_equal(sess.run(tensor2), np.ones((2, 2))) tensor3 = tensor2.from_dataframe(df) np.testing.assert_equal(sess.run(tensor3), np.ones((2, 2))) tensor4 = df.to_tensor() np.testing.assert_equal(sess.run(tensor4), np.ones((2, 2))) df = md.dataframe_from_tensor(tensor3) np.testing.assert_equal(sess.run(df).values, np.ones((2, 2))) df = df.from_tensor(tensor3) np.testing.assert_equal(sess.run(df).values, np.ones((2, 2))) # test raise error exception with self.assertRaises(TypeError): md.dataframe_from_tensor(mt.ones((1, 2, 3))) # test exception tensor = md.dataframe_from_tensor(mt.array([1, 2, 3])) np.testing.assert_equal(sess.run(tensor), np.array([1, 2, 3]).reshape(3, 1))
def test_pca_validation(self): for solver in self.solver_list: # Ensures that solver-specific extreme inputs for the n_components # parameter raise errors X = mt.array([[0, 1, 0], [1, 0, 0]]) smallest_d = 2 # The smallest dimension lower_limit = {'randomized': 1, 'full': 0, 'auto': 0} # We conduct the same test on X.T so that it is invariant to axis. for data in [X, X.T]: for n_components in [-1, 3]: if solver == 'auto': solver_reported = 'full' else: solver_reported = solver assert_raises_regex( ValueError, "n_components={}L? must be between " r"{}L? and min\(n_samples, n_features\)=" "{}L? with svd_solver=\'{}\'".format( n_components, lower_limit[solver], smallest_d, solver_reported), PCA(n_components, svd_solver=solver).fit, data) n_components = 1.0 type_ncom = type(n_components) assert_raise_message( ValueError, "n_components={} must be of type int " "when greater than or equal to 1, was of type={}".format( n_components, type_ncom), PCA(n_components, svd_solver=solver).fit, data)
def testFromTensorExecution(self): tensor = mt.random.rand(10, 10, chunk_size=5) df = from_tensor(tensor) tensor_res = self.executor.execute_tensor(tensor, concat=True)[0] pdf_expected = pd.DataFrame(tensor_res) df_result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10)) pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10)) pd.testing.assert_frame_equal(df_result, pdf_expected) # test converted with specified index_value and columns tensor2 = mt.random.rand(2, 2, chunk_size=1) df2 = from_tensor(tensor2, index=pd.Index(['a', 'b']), columns=pd.Index([3, 4])) df_result = self.executor.execute_dataframe(df2, concat=True)[0] pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b'])) pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4])) # test converted from 1-d tensor tensor3 = mt.array([1, 2, 3]) df3 = from_tensor(tensor3) result3 = self.executor.execute_dataframe(df3, concat=True)[0] pdf_expected = pd.DataFrame(np.array([1, 2, 3])) pd.testing.assert_frame_equal(pdf_expected, result3)
def testC_(self): r = mt.c_[mt.array([1, 2, 3]), mt.array([4, 5, 6])] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.c_[np.array([1, 2, 3]), np.array([4, 5, 6])] np.testing.assert_array_equal(result, expected) r = mt.c_[mt.array([[1, 2, 3]]), 0, 0, mt.array([[4, 5, 6]])] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])] np.testing.assert_array_equal(result, expected) r = mt.c_[:3, 1:4] result = self.executor.execute_tensor(r, concat=True)[0] expected = np.c_[:3, 1:4] np.testing.assert_array_equal(result, expected)
def test_pca_score2(setup): # Test that probabilistic PCA correctly separated different datasets n, p = 100, 3 rng = np.random.RandomState(0) X = mt.tensor(rng.randn(n, p) * .1) + mt.array([3, 4, 5]) for solver in solver_list: pca = PCA(n_components=2, svd_solver=solver) pca.fit(X) ll1 = pca.score(X) ll2 = pca.score(mt.tensor(rng.randn(n, p) * .2) + mt.array([3, 4, 5])) assert ll1.fetch() > ll2.fetch() # Test that it gives different scores if whiten=True pca = PCA(n_components=2, whiten=True, svd_solver=solver) pca.fit(X) ll2 = pca.score(X) assert ll1.fetch() > ll2.fetch()
def test_c_(setup): r = mt.c_[mt.array([1, 2, 3]), mt.array([4, 5, 6])] result = r.execute().fetch() expected = np.c_[np.array([1, 2, 3]), np.array([4, 5, 6])] np.testing.assert_array_equal(result, expected) r = mt.c_[mt.array([[1, 2, 3]]), 0, 0, mt.array([[4, 5, 6]])] result = r.execute().fetch() expected = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])] np.testing.assert_array_equal(result, expected) r = mt.c_[:3, 1:4] result = r.execute().fetch() expected = np.c_[:3, 1:4] np.testing.assert_array_equal(result, expected)
def testApi(self): service_ep = 'http://127.0.0.1:' + self.web_port client = MarsApiClient(service_ep) self.assertEqual(client.count_workers(), 1) with client.create_session() as sess: a = mt.ones((100, 100), chunks=30) b = mt.ones((100, 100), chunks=30) c = a.dot(b) value = sess.run(c) assert_array_equal(value[0], np.ones((100, 100)) * 100) va = np.random.randint(0, 10000, (100, 100)) vb = np.random.randint(0, 10000, (100, 100)) a = mt.array(va, chunks=30) b = mt.array(vb, chunks=30) c = a.dot(b) value = sess.run(c, timeout=120) assert_array_equal(value[0], va.dot(vb))
def test_pca_dim(self): # Check automated dimensionality setting rng = np.random.RandomState(0) n, p = 100, 5 X = mt.tensor(rng.randn(n, p) * .1) X[:10] += mt.array([3, 4, 5, 1, 2]) pca = PCA(n_components='mle', svd_solver='full').fit(X) self.assertEqual(pca.n_components, 'mle') self.assertEqual(pca.n_components_, 1)
def testMultipleAdd(self): import numpy as np import operator from mars.compat import reduce base_arr = np.random.random((100, 100)) a = mt.array(base_arr) sumv = reduce(operator.add, [a[:10, :10] for _ in range(10)]) self.run_expr_suite(sumv)
def testPrepareQuota(self, *_): pinned = [True] def _mock_pin(_graph_key, chunk_keys): from mars.errors import PinChunkFailed if pinned[0]: raise PinChunkFailed return chunk_keys ChunkHolderActor.pin_chunks.side_effect = _mock_pin pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender') pool.create_actor(CpuCalcActor) cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid()) chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) start_time = time.time() execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(time.time())) \ .catch(lambda *exc: test_actor.set_result(exc, False)) def _delay_fun(): time.sleep(1) pinned[0] = False threading.Thread(target=_delay_fun).start() finish_time = self.get_result() self.assertGreaterEqual(finish_time, start_time + 1)