コード例 #1
0
ファイル: test_cluster.py プロジェクト: melodylail/mars
    def testLocalClusterError(self, *_):
        with option_context({'scheduler.retry_num': 1}):
            with new_cluster(scheduler_n_process=2, worker_n_process=3,
                             shared_memory='20M', web=True) as cluster:
                # Note that it is nested exception and we want to check the message
                # of the inner exeception, thus assertRaises won't work.

                with cluster.session as session:
                    t = mt.array(["1", "2", "3", "4"])
                    try:
                        session.run(t + 1)
                    except:  # noqa: E722
                        etype, exp, tb = sys.exc_info()
                        self.assertEqual(etype, ExecutionFailed)
                        self.assertIsInstance(exp, ExecutionFailed)
                        formatted_tb = '\n'.join(traceback.format_exception(etype, exp, tb))
                        self.assertIn('TypeError', formatted_tb)
                        self.assertIn('ufunc', formatted_tb)
                        self.assertIn('add', formatted_tb)
                        self.assertIn('signature matching types', formatted_tb)

                with new_session('http://' + cluster._web_endpoint) as session:
                    t = mt.array(["1", "2", "3", "4"])
                    try:
                        session.run(t + 1)
                    except:  # noqa: E722
                        etype, exp, tb = sys.exc_info()
                        self.assertEqual(etype, ExecutionFailed)
                        self.assertIsInstance(exp, ExecutionFailed)
                        formatted_tb = '\n'.join(traceback.format_exception(etype, exp, tb))
                        self.assertIn('TypeError', formatted_tb)
                        self.assertIn('ufunc', formatted_tb)
                        self.assertIn('add', formatted_tb)
                        self.assertIn('signature matching types', formatted_tb)
コード例 #2
0
    def testCommonOperandFailover(self):
        delay_file = self.add_state_file('OP_DELAY_STATE_FILE')
        open(delay_file, 'w').close()

        terminate_file = self.add_state_file('OP_TERMINATE_STATE_FILE')

        self.start_processes(
            modules=['mars.scheduler.tests.integrated.op_delayer'],
            log_worker=True)

        np_a = np.random.random((100, 100))
        np_b = np.random.random((100, 100))

        a = mt.array(np_a, chunk_size=30) * 2 + 1
        b = mt.array(np_b, chunk_size=30) * 2 + 1
        c = a.dot(b) * 2 + 1

        future = self._submit_tileable(c)

        while not os.path.exists(terminate_file):
            time.sleep(0.01)

        self.kill_process_tree(self.proc_workers[0])
        logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid)
        self.proc_workers = self.proc_workers[1:]
        os.unlink(delay_file)

        result = future.result(timeout=self.timeout)
        expected = (np_a * 2 + 1).dot(np_b * 2 + 1) * 2 + 1
        assert_allclose(result, expected)
コード例 #3
0
    def testFromTensor(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = from_tensor(tensor)
        self.assertIsInstance(df.index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertEqual(
            df.op.dtypes[0], tensor.dtype,
            'DataFrame converted from tensor have the wrong dtype')

        df.tiles()
        self.assertEqual(len(df.chunks), 4)
        self.assertIsInstance(df.chunks[0].index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertIsInstance(df.chunks[0].index_value, IndexValue)

        # test converted from 1-d tensor
        tensor2 = mt.array([1, 2, 3])
        # in fact, tensor3 is (3,1)
        tensor3 = mt.array([tensor2]).T

        df2 = from_tensor(tensor2)
        df3 = from_tensor(tensor3)
        df2.tiles()
        df3.tiles()
        np.testing.assert_equal(df2.chunks[0].index, (0, 0))
        np.testing.assert_equal(df3.chunks[0].index, (0, 0))

        # test converted from scalar
        scalar = mt.array(1)
        np.testing.assert_equal(scalar.ndim, 0)
        with self.assertRaises(TypeError):
            from_tensor(scalar)
コード例 #4
0
ファイル: test_worker_failover.py プロジェクト: yyaaa1/mars
    def testFailoverDisabled(self):
        delay_file = self.add_state_file('OP_DELAY_STATE_FILE')
        open(delay_file, 'w').close()

        terminate_file = self.add_state_file('OP_TERMINATE_STATE_FILE')

        self.start_processes(
            modules=['mars.scheduler.tests.integrated.op_delayer'],
            scheduler_args=['--disable-failover'],
            log_worker=True)

        np_a = np.random.random((100, 100))
        np_b = np.random.random((100, 100))

        a = mt.array(np_a, chunk_size=30) * 2 + 1
        b = mt.array(np_b, chunk_size=30) * 2 + 1
        c = a.dot(b) * 2 + 1

        future = self._submit_tileable(c)

        while not os.path.exists(terminate_file):
            time.sleep(0.01)

        self.kill_process_tree(self.proc_workers[0])
        logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid)
        self.proc_workers = self.proc_workers[1:]
        os.unlink(delay_file)

        try:
            future.result(timeout=self.timeout)
        except ExecutionFailed as ex:
            self.assertIsInstance(ex.__cause__, WorkerDead)
        else:
            raise AssertionError('ExecutionFailed not raised')
コード例 #5
0
ファイル: test_main.py プロジェクト: kevintsok/mars
    def testWorkerFailOver(self):
        def kill_process_tree(proc):
            import psutil
            proc = psutil.Process(proc.pid)
            plasma_sock_dir = None
            for p in proc.children(recursive=True):
                if 'plasma' in p.name():
                    socks = [
                        conn.laddr for conn in p.connections('unix')
                        if 'plasma' in conn.laddr
                    ]
                    if socks:
                        plasma_sock_dir = os.path.dirname(socks[0])
                p.kill()
            proc.kill()
            if plasma_sock_dir:
                shutil.rmtree(plasma_sock_dir, ignore_errors=True)

        delay_file = self.add_state_file('DELAY_STATE_FILE')
        open(delay_file, 'w').close()

        terminate_file = self.add_state_file('TERMINATE_STATE_FILE')

        self.start_processes(modules=['mars.scheduler.tests.op_delayer'],
                             log_worker=True)

        session_id = uuid.uuid1()
        actor_client = new_client()
        session_ref = actor_client.actor_ref(
            self.session_manager_ref.create_session(session_id))

        np_a = np.random.random((100, 100))
        np_b = np.random.random((100, 100))

        a = mt.array(np_a, chunk_size=30) * 2 + 1
        b = mt.array(np_b, chunk_size=30) * 2 + 1
        c = a.dot(b) * 2 + 1
        graph = c.build_graph()
        targets = [c.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tensor_graph(json.dumps(graph.to_json()),
                                        graph_key,
                                        target_tensors=targets)

        while not os.path.exists(terminate_file):
            actor_client.sleep(0.05)

        kill_process_tree(self.proc_workers[0])
        logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid)
        self.proc_workers = self.proc_workers[1:]
        os.unlink(delay_file)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, c.key)
        expected = (np_a * 2 + 1).dot(np_b * 2 + 1) * 2 + 1
        assert_allclose(loads(result), expected)
コード例 #6
0
    def testFromTensor(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = from_tensor(tensor)
        self.assertIsInstance(df.index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertEqual(
            df.op.dtypes[0], tensor.dtype,
            'DataFrame converted from tensor have the wrong dtype')

        df.tiles()
        self.assertEqual(len(df.chunks), 4)
        self.assertIsInstance(df.chunks[0].index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertIsInstance(df.chunks[0].index_value, IndexValue)

        # test converted from 1-d tensor
        tensor2 = mt.array([1, 2, 3])
        # in fact, tensor3 is (3,1)
        tensor3 = mt.array([tensor2]).T

        df2 = from_tensor(tensor2)
        df3 = from_tensor(tensor3)
        df2.tiles()
        df3.tiles()
        np.testing.assert_equal(df2.chunks[0].index, (0, 0))
        np.testing.assert_equal(df3.chunks[0].index, (0, 0))

        # test converted from scalar
        scalar = mt.array(1)
        np.testing.assert_equal(scalar.ndim, 0)
        with self.assertRaises(TypeError):
            from_tensor(scalar)

        # from tensor with given index
        df = from_tensor(tensor, index=np.arange(0, 20, 2))
        df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))
        pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))

        # from tensor with given columns
        df = from_tensor(tensor, columns=list('abcdefghij'))
        df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].columns.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[1].columns.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[2].columns.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[3].columns.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
コード例 #7
0
    def testWorkerFailOver(self):
        def kill_process_tree(p):
            import psutil
            proc = psutil.Process(p.pid)
            for p in proc.children(recursive=True):
                p.kill()
            proc.kill()

        import tempfile
        delay_file = os.environ['DELAY_STATE_FILE'] = os.path.join(
            tempfile.gettempdir(),
            'test-main-delay-%d-%d' % (os.getpid(), id(self)))
        open(delay_file, 'w').close()

        terminate_file = os.environ['TERMINATE_STATE_FILE'] = os.path.join(
            tempfile.gettempdir(),
            'test-main-terminate-%d-%d' % (os.getpid(), id(self)))

        self.start_processes(modules=['mars.scheduler.tests.op_delayer'],
                             log_worker=True)

        session_id = uuid.uuid1()
        actor_client = new_client()
        session_ref = actor_client.actor_ref(
            self.session_manager_ref.create_session(session_id))

        np_a = np.random.random((100, 100))
        np_b = np.random.random((100, 100))

        a = mt.array(np_a, chunk_size=30) * 2 + 1
        b = mt.array(np_b, chunk_size=30) * 2 + 1
        c = a.dot(b) * 2 + 1
        graph = c.build_graph()
        targets = [c.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tensor_graph(json.dumps(graph.to_json()),
                                        graph_key,
                                        target_tensors=targets)

        while not os.path.exists(terminate_file):
            actor_client.sleep(0.05)
        os.unlink(terminate_file)
        # actor_client.sleep(1.2)

        kill_process_tree(self.proc_workers[0])
        logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid)
        self.proc_workers = self.proc_workers[1:]
        os.unlink(delay_file)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, c.key)
        expected = (np_a * 2 + 1).dot(np_b * 2 + 1) * 2 + 1
        assert_allclose(loads(result), expected)
コード例 #8
0
    def testR_(self):
        r = mt.r_[mt.array([1, 2, 3]), 0, 0, mt.array([4, 5, 6])]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_[np.array([1, 2, 3]), 0, 0, np.array([4, 5, 6])]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_[-1:1:6j, [0] * 3, 5, 6]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_[-1:1:6j, [0] * 3, 5, 6]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_[-1:1:6j]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_[-1:1:6j]

        np.testing.assert_array_equal(result, expected)

        raw = [[0, 1, 2], [3, 4, 5]]
        a = mt.array(raw, chunk_size=2)
        r = mt.r_['-1', a, a]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['-1', raw, raw]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_['0,2', [1, 2, 3], [4, 5, 6]]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['0,2', [1, 2, 3], [4, 5, 6]]

        np.testing.assert_array_equal(result, expected)

        r = mt.r_['0,2,0', [1, 2, 3], [4, 5, 6]]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['0,2,0', [1, 2, 3], [4, 5, 6]]
        np.testing.assert_array_equal(result, expected)

        r = mt.r_['1,2,0', [1, 2, 3], [4, 5, 6]]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.r_['1,2,0', [1, 2, 3], [4, 5, 6]]
        np.testing.assert_array_equal(result, expected)

        self.assertEqual(len(mt.r_), 0)

        with self.assertRaises(ValueError):
            _ = mt.r_[:3, 'wrong']
コード例 #9
0
 def test_infer_dim_3(self):
     n, p = 100, 5
     rng = np.random.RandomState(0)
     X = mt.tensor(rng.randn(n, p) * .1)
     X[:10] += mt.array([3, 4, 5, 1, 2])
     X[10:20] += mt.array([6, 0, 7, 2, -1])
     X[30:40] += 2 * mt.array([-1, 1, -1, 1, -1])
     pca = PCA(n_components=p, svd_solver='full')
     pca.fit(X)
     spect = pca.explained_variance_.fetch()
     self.assertGreater(_infer_dimension(spect, n), 2)
コード例 #10
0
ファイル: test_cluster.py プロジェクト: ueshin/mars
    def testMultipleOutputTensorExecute(self, *_):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M') as cluster:
            session = cluster.session

            t = mt.random.rand(20, 5, chunk_size=5)
            r = mt.linalg.svd(t)

            res = session.run((t, ) + r, timeout=_exec_timeout)

            U, s, V = res[1:]
            np.testing.assert_allclose(res[0], U.dot(np.diag(s).dot(V)))

            raw = np.random.rand(20, 5)

            # to test the fuse, the graph should be fused
            t = mt.array(raw)
            U, s, V = mt.linalg.svd(t)
            r = U.dot(mt.diag(s).dot(V))

            res = r.execute()
            np.testing.assert_allclose(raw, res)

            # test submit part of svd outputs
            t = mt.array(raw)
            U, s, V = mt.linalg.svd(t)

            with new_session(cluster.endpoint) as session2:
                U_result, s_result = session2.run(U, s, timeout=_exec_timeout)
                U_expected, s_expectd, _ = np.linalg.svd(raw,
                                                         full_matrices=False)

                np.testing.assert_allclose(U_result, U_expected)
                np.testing.assert_allclose(s_result, s_expectd)

            with new_session(cluster.endpoint) as session2:
                U_result, s_result = session2.run(U + 1,
                                                  s + 1,
                                                  timeout=_exec_timeout)
                U_expected, s_expectd, _ = np.linalg.svd(raw,
                                                         full_matrices=False)

                np.testing.assert_allclose(U_result, U_expected + 1)
                np.testing.assert_allclose(s_result, s_expectd + 1)

            with new_session(cluster.endpoint) as session2:
                t = mt.array(raw)
                _, s, _ = mt.linalg.svd(t)
                del _

                s_result = session2.run(s, timeout=_exec_timeout)
                s_expected = np.linalg.svd(raw, full_matrices=False)[1]
                np.testing.assert_allclose(s_result, s_expected)
コード例 #11
0
def test_r_(setup):
    r = mt.r_[mt.array([1, 2, 3]), 0, 0, mt.array([4, 5, 6])]

    result = r.execute().fetch()
    expected = np.r_[np.array([1, 2, 3]), 0, 0, np.array([4, 5, 6])]

    np.testing.assert_array_equal(result, expected)

    r = mt.r_[-1:1:6j, [0]*3, 5, 6]

    result = r.execute().fetch()
    expected = np.r_[-1:1:6j, [0]*3, 5, 6]

    np.testing.assert_array_equal(result, expected)

    r = mt.r_[-1:1:6j]

    result = r.execute().fetch()
    expected = np.r_[-1:1:6j]

    np.testing.assert_array_equal(result, expected)

    raw = [[0, 1, 2], [3, 4, 5]]
    a = mt.array(raw, chunk_size=2)
    r = mt.r_['-1', a, a]

    result = r.execute().fetch()
    expected = np.r_['-1', raw, raw]

    np.testing.assert_array_equal(result, expected)

    r = mt.r_['0,2', [1, 2, 3], [4, 5, 6]]

    result = r.execute().fetch()
    expected = np.r_['0,2', [1, 2, 3], [4, 5, 6]]

    np.testing.assert_array_equal(result, expected)

    r = mt.r_['0,2,0', [1, 2, 3], [4, 5, 6]]

    result = r.execute().fetch()
    expected = np.r_['0,2,0', [1, 2, 3], [4, 5, 6]]
    np.testing.assert_array_equal(result, expected)

    r = mt.r_['1,2,0', [1, 2, 3], [4, 5, 6]]

    result = r.execute().fetch()
    expected = np.r_['1,2,0', [1, 2, 3], [4, 5, 6]]
    np.testing.assert_array_equal(result, expected)

    assert len(mt.r_) == 0

    with pytest.raises(ValueError):
        _ = mt.r_[:3, 'wrong']
コード例 #12
0
 def test_infer_dim_2(self):
     # TODO: explain what this is testing
     # Or at least use explicit variable names...
     n, p = 1000, 5
     rng = np.random.RandomState(0)
     X = mt.tensor(rng.randn(n, p) * .1)
     X[:10] += mt.array([3, 4, 5, 1, 2])
     X[10:20] += mt.array([6, 0, 7, 2, -1])
     pca = PCA(n_components=p, svd_solver='full')
     pca.fit(X)
     spect = pca.explained_variance_.fetch()
     self.assertGreater(_infer_dimension(spect, n), 1)
コード例 #13
0
ファイル: test_pca.py プロジェクト: haijohn/mars
def test_infer_dim_1(setup):
    # TODO: explain what this is testing
    # Or at least use explicit variable names...
    n, p = 1000, 5
    rng = np.random.RandomState(0)
    X = (mt.tensor(rng.randn(n, p)) * .1 + mt.tensor(rng.randn(n, 1)) * mt.array([3, 4, 5, 1, 2]) +
         mt.array([1, 0, 7, 4, 6]))
    pca = PCA(n_components=p, svd_solver='full')
    pca.fit(X)
    spect = pca.explained_variance_.to_numpy()
    ll = np.array([_assess_dimension(spect, k, n) for k in range(1, p)])
    assert ll[1] > ll.max() - .01 * n
コード例 #14
0
ファイル: test_api.py プロジェクト: zwz1437/mars
    def testApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        with new_session(service_ep) as sess:
            self.assertEqual(sess.count_workers(), 1)
            a = mt.ones((100, 100), chunk_size=30)
            b = mt.ones((100, 100), chunk_size=30)
            c = a.dot(b)
            value = sess.run(c)
            assert_array_equal(value, np.ones((100, 100)) * 100)

            value2 = sess.run(c)
            assert_array_equal(value, value2)

            # todo this behavior may change when eager mode is introduced
            with self.assertRaises(ExecutionFailed):
                sess.run(c + 1)

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunk_size=30)
            b = mt.array(vb, chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=120)
            assert_array_equal(value, va.dot(vb))

            graphs = sess.get_graph_states()

            # check web UI requests
            res = requests.get(service_ep)
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/task' % (service_ep, ))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/scheduler' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/scheduler?endpoint=127.0.0.1:%s' %
                               (service_ep, self.scheduler_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/worker' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/worker?endpoint=127.0.0.1:%s' %
                               (service_ep, self.worker_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/task' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            task_id = next(iter(graphs.keys()))
            res = requests.get('%s/task?session_id=%s&task_id=%s' %
                               (service_ep, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)
コード例 #15
0
    def test_randomized_pca_check_projection(self):
        # Test that the projection by randomized PCA on dense data is correct
        rng = np.random.RandomState(0)
        n, p = 100, 3
        X = mt.tensor(rng.randn(n, p) * .1)
        X[:10] += mt.array([3, 4, 5])
        Xt = 0.1 * mt.tensor(rng.randn(1, p)) + mt.array([3, 4, 5])

        Yt = PCA(n_components=2, svd_solver='randomized',
                 random_state=0).fit(X).transform(Xt)
        Yt /= np.sqrt((Yt**2).sum())

        assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1., 1)
コード例 #16
0
ファイル: test_pca.py プロジェクト: haijohn/mars
def test_pca_check_projection(setup):
    # Test that the projection of data is correct
    rng = np.random.RandomState(0)
    n, p = 100, 3
    X = mt.tensor(rng.randn(n, p) * .1)
    X[:10] += mt.array([3, 4, 5])
    Xt = 0.1 * mt.tensor(rng.randn(1, p)) + mt.array([3, 4, 5])

    for solver in solver_list:
        Yt = PCA(n_components=2, svd_solver=solver).fit(X).transform(Xt)
        Yt /= mt.sqrt((Yt ** 2).sum())

        assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1., 1)
コード例 #17
0
ファイル: test_k_means.py プロジェクト: tangyiyong/mars
    def testKMeansInit(self):
        # non centered, sparse centers to check the
        centers = np.array([
            [0.0, 5.0, 0.0, 0.0, 0.0],
            [1.0, 1.0, 4.0, 0.0, 0.0],
            [1.0, 0.0, 0.0, 5.0, 1.0],
        ])
        n_samples = 100
        n_clusters, n_features = centers.shape
        X, true_labels = make_blobs(n_samples=n_samples,
                                    centers=centers,
                                    cluster_std=1.,
                                    random_state=42)
        X_csr = sp.csr_matrix(X)
        for data in [X, X_csr]:
            for init in ['random', 'k-means++', 'k-means||', centers.copy()]:
                data = mt.tensor(data, chunk_size=50)
                km = KMeans(init=init,
                            n_clusters=n_clusters,
                            random_state=42,
                            n_init=1,
                            algorithm='elkan')
                km.fit(data)
                self._check_fitted_model(km, n_clusters, n_features,
                                         true_labels)

        X = mt.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
        kmeans = KMeans(n_clusters=2,
                        random_state=0,
                        n_init=1,
                        init='k-means||').fit(X)
        self.assertEqual(sorted(kmeans.cluster_centers_.fetch().tolist()),
                         sorted([[10., 2.], [1., 2.]]))
コード例 #18
0
ファイル: test_cluster.py プロジェクト: zymITsky/mars
    def testLearnInLocalCluster(self, *_):
        from mars.learn.cluster import KMeans
        from mars.learn.neighbors import NearestNeighbors
        from sklearn.cluster import KMeans as SK_KMEANS
        from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors

        with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster:
            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))
            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)

            ret = nn.kneighbors(Y, session=cluster.session)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

            raw = np.array([[1, 2], [1, 4], [1, 0],
                            [10, 2], [10, 4], [10, 0]])
            X = mt.array(raw)
            kmeans = KMeans(n_clusters=2, random_state=0, init='k-means++').fit(X)
            sk_km_elkan = SK_KMEANS(n_clusters=2, random_state=0, init='k-means++').fit(raw)
            np.testing.assert_allclose(kmeans.cluster_centers_, sk_km_elkan.cluster_centers_)
コード例 #19
0
    def testSendTargets(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=WorkerDistributor(2)) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)
            result_key = result_tensor.chunks[0].key

            pool.create_actor(MockSenderActor, mock_data + np.ones((4,)), 'out', uid='w:mock_sender')
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None,
                                            send_addresses={result_key: (pool_address,)}, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
コード例 #20
0
ファイル: test_execution.py プロジェクト: zuodh/mars
    def testPrepareSpilled(self):
        from mars.worker.spill import write_spill_file

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            pool.actor_ref(ChunkHolderActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            write_spill_file(modified_chunk.key, mock_data)

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
コード例 #21
0
    def testDataFrameTensorConvert(self):
        # test from_tensor(), from_dataframe(), to_tensor(), to_dataframe()
        sess = new_session()
        tensor = mt.ones((2, 2))
        df = tensor.to_dataframe()
        np.testing.assert_equal(sess.run(df), np.ones((2, 2)))
        tensor2 = mt.from_dataframe(df)
        np.testing.assert_equal(sess.run(tensor2), np.ones((2, 2)))

        tensor3 = tensor2.from_dataframe(df)
        np.testing.assert_equal(sess.run(tensor3), np.ones((2, 2)))

        tensor4 = df.to_tensor()
        np.testing.assert_equal(sess.run(tensor4), np.ones((2, 2)))

        df = md.dataframe_from_tensor(tensor3)
        np.testing.assert_equal(sess.run(df).values, np.ones((2, 2)))

        df = df.from_tensor(tensor3)
        np.testing.assert_equal(sess.run(df).values, np.ones((2, 2)))

        # test raise error exception
        with self.assertRaises(TypeError):
            md.dataframe_from_tensor(mt.ones((1, 2, 3)))

        # test exception
        tensor = md.dataframe_from_tensor(mt.array([1, 2, 3]))
        np.testing.assert_equal(sess.run(tensor),
                                np.array([1, 2, 3]).reshape(3, 1))
コード例 #22
0
    def test_pca_validation(self):
        for solver in self.solver_list:
            # Ensures that solver-specific extreme inputs for the n_components
            # parameter raise errors
            X = mt.array([[0, 1, 0], [1, 0, 0]])
            smallest_d = 2  # The smallest dimension
            lower_limit = {'randomized': 1, 'full': 0, 'auto': 0}

            # We conduct the same test on X.T so that it is invariant to axis.
            for data in [X, X.T]:
                for n_components in [-1, 3]:

                    if solver == 'auto':
                        solver_reported = 'full'
                    else:
                        solver_reported = solver

                    assert_raises_regex(
                        ValueError, "n_components={}L? must be between "
                        r"{}L? and min\(n_samples, n_features\)="
                        "{}L? with svd_solver=\'{}\'".format(
                            n_components, lower_limit[solver], smallest_d,
                            solver_reported),
                        PCA(n_components, svd_solver=solver).fit, data)

            n_components = 1.0
            type_ncom = type(n_components)
            assert_raise_message(
                ValueError, "n_components={} must be of type int "
                "when greater than or equal to 1, was of type={}".format(
                    n_components, type_ncom),
                PCA(n_components, svd_solver=solver).fit, data)
コード例 #23
0
    def testFromTensorExecution(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = from_tensor(tensor)
        tensor_res = self.executor.execute_tensor(tensor, concat=True)[0]
        pdf_expected = pd.DataFrame(tensor_res)
        df_result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10))
        pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10))
        pd.testing.assert_frame_equal(df_result, pdf_expected)

        # test converted with specified index_value and columns
        tensor2 = mt.random.rand(2, 2, chunk_size=1)
        df2 = from_tensor(tensor2,
                          index=pd.Index(['a', 'b']),
                          columns=pd.Index([3, 4]))
        df_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b']))
        pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4]))

        # test converted from 1-d tensor
        tensor3 = mt.array([1, 2, 3])
        df3 = from_tensor(tensor3)
        result3 = self.executor.execute_dataframe(df3, concat=True)[0]
        pdf_expected = pd.DataFrame(np.array([1, 2, 3]))
        pd.testing.assert_frame_equal(pdf_expected, result3)
コード例 #24
0
    def testC_(self):
        r = mt.c_[mt.array([1, 2, 3]), mt.array([4, 5, 6])]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.c_[np.array([1, 2, 3]), np.array([4, 5, 6])]
        np.testing.assert_array_equal(result, expected)

        r = mt.c_[mt.array([[1, 2, 3]]), 0, 0, mt.array([[4, 5, 6]])]

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])]
        np.testing.assert_array_equal(result, expected)

        r = mt.c_[:3, 1:4]
        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.c_[:3, 1:4]
        np.testing.assert_array_equal(result, expected)
コード例 #25
0
ファイル: test_pca.py プロジェクト: haijohn/mars
def test_pca_score2(setup):
    # Test that probabilistic PCA correctly separated different datasets
    n, p = 100, 3
    rng = np.random.RandomState(0)
    X = mt.tensor(rng.randn(n, p) * .1) + mt.array([3, 4, 5])
    for solver in solver_list:
        pca = PCA(n_components=2, svd_solver=solver)
        pca.fit(X)
        ll1 = pca.score(X)
        ll2 = pca.score(mt.tensor(rng.randn(n, p) * .2) + mt.array([3, 4, 5]))
        assert ll1.fetch() > ll2.fetch()

        # Test that it gives different scores if whiten=True
        pca = PCA(n_components=2, whiten=True, svd_solver=solver)
        pca.fit(X)
        ll2 = pca.score(X)
        assert ll1.fetch() > ll2.fetch()
コード例 #26
0
def test_c_(setup):
    r = mt.c_[mt.array([1, 2, 3]), mt.array([4, 5, 6])]

    result = r.execute().fetch()
    expected = np.c_[np.array([1, 2, 3]), np.array([4, 5, 6])]
    np.testing.assert_array_equal(result, expected)

    r = mt.c_[mt.array([[1, 2, 3]]), 0, 0, mt.array([[4, 5, 6]])]

    result = r.execute().fetch()
    expected = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])]
    np.testing.assert_array_equal(result, expected)

    r = mt.c_[:3, 1:4]
    result = r.execute().fetch()
    expected = np.c_[:3, 1:4]
    np.testing.assert_array_equal(result, expected)
コード例 #27
0
    def testApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        client = MarsApiClient(service_ep)
        self.assertEqual(client.count_workers(), 1)
        with client.create_session() as sess:
            a = mt.ones((100, 100), chunks=30)
            b = mt.ones((100, 100), chunks=30)
            c = a.dot(b)
            value = sess.run(c)
            assert_array_equal(value[0], np.ones((100, 100)) * 100)

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunks=30)
            b = mt.array(vb, chunks=30)
            c = a.dot(b)
            value = sess.run(c, timeout=120)
            assert_array_equal(value[0], va.dot(vb))
コード例 #28
0
 def test_pca_dim(self):
     # Check automated dimensionality setting
     rng = np.random.RandomState(0)
     n, p = 100, 5
     X = mt.tensor(rng.randn(n, p) * .1)
     X[:10] += mt.array([3, 4, 5, 1, 2])
     pca = PCA(n_components='mle', svd_solver='full').fit(X)
     self.assertEqual(pca.n_components, 'mle')
     self.assertEqual(pca.n_components_, 1)
コード例 #29
0
    def testMultipleAdd(self):
        import numpy as np
        import operator
        from mars.compat import reduce

        base_arr = np.random.random((100, 100))
        a = mt.array(base_arr)
        sumv = reduce(operator.add, [a[:10, :10] for _ in range(10)])
        self.run_expr_suite(sumv)
コード例 #30
0
ファイル: test_execution.py プロジェクト: zuodh/mars
    def testPrepareQuota(self, *_):
        pinned = [True]

        def _mock_pin(_graph_key, chunk_keys):
            from mars.errors import PinChunkFailed
            if pinned[0]:
                raise PinChunkFailed
            return chunk_keys

        ChunkHolderActor.pin_chunks.side_effect = _mock_pin

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    time.sleep(1)
                    pinned[0] = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 1)