Ejemplo n.º 1
0
    def testDataSource(self):
        from mars.tensor.base.broadcast_to import TensorBroadcastTo

        data = np.random.random((10, 3))
        t = tensor(data, chunk_size=2)
        self.assertFalse(t.op.gpu)
        t = t.tiles()
        self.assertTrue((t.chunks[0].op.data == data[:2, :2]).all())
        self.assertTrue((t.chunks[1].op.data == data[:2, 2:3]).all())
        self.assertTrue((t.chunks[2].op.data == data[2:4, :2]).all())
        self.assertTrue((t.chunks[3].op.data == data[2:4, 2:3]).all())

        self.assertEqual(t.key, tensor(data, chunk_size=2).tiles().key)
        self.assertNotEqual(t.key, tensor(data, chunk_size=3).tiles().key)
        self.assertNotEqual(
            t.key,
            tensor(np.random.random((10, 3)), chunk_size=2).tiles().key)

        t = tensor(data, chunk_size=2, gpu=True)
        t = t.tiles()

        self.assertTrue(t.op.gpu)
        self.assertTrue(t.chunks[0].op.gpu)

        t = full((2, 2), 2, dtype='f4')
        self.assertFalse(t.op.gpu)
        self.assertEqual(t.shape, (2, 2))
        self.assertEqual(t.dtype, np.float32)

        t = full((2, 2), [1.0, 2.0], dtype='f4')
        self.assertEqual(t.shape, (2, 2))
        self.assertEqual(t.dtype, np.float32)
        self.assertIsInstance(t.op, TensorBroadcastTo)

        with self.assertRaises(ValueError):
            full((2, 2), [1.0, 2.0, 3.0], dtype='f4')
Ejemplo n.º 2
0
    def testFaissNearestNeighborsExecution(self):
        rs = np.random.RandomState(0)
        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        # test faiss execution
        X = mt.tensor(raw_X, chunk_size=7)
        Y = mt.tensor(raw_Y, chunk_size=(5, 3))

        nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2')
        nn.fit(X)

        ret = nn.kneighbors(Y)

        snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2')
        snn.fit(raw_X)
        expected = snn.kneighbors(raw_Y)

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0], expected[0], decimal=6)
        np.testing.assert_almost_equal(result[1], expected[1])

        # test return_distance=False
        ret = nn.kneighbors(Y, return_distance=False)

        result = ret.fetch()
        np.testing.assert_almost_equal(result, expected[1])

        # test y is x
        ret = nn.kneighbors()

        expected = snn.kneighbors()

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0], expected[0], decimal=5)
        np.testing.assert_almost_equal(result[1], expected[1])
Ejemplo n.º 3
0
    def testCopyAndView(self):
        data = np.random.rand(10, 20)
        a = tensor(data, chunk_size=6)
        b = a.view()
        b[:5] = 10

        npa = data.copy()
        npb = npa.view()
        npb[:5] = 10

        np.testing.assert_array_equal(b.execute(), npb)
        np.testing.assert_array_equal(a.execute(), npa)

        data = np.random.rand(10, 20)
        a = tensor(data, chunk_size=6)
        b = a.copy()
        b[:5] = 10

        npa = data.copy()
        npb = npa.copy()
        npb[:5] = 10

        np.testing.assert_array_equal(b.execute(), npb)
        np.testing.assert_array_equal(a.execute(), npa)
Ejemplo n.º 4
0
    def testDataFrameCreate(self):
        sess = new_session()
        tensor = mt.ones((2, 2))
        df = md.DataFrame(tensor)
        df_result = sess.run(df)
        df2 = md.DataFrame(df)
        df2 = sess.run(df2)
        np.testing.assert_equal(df_result.values, np.ones((2, 2)))
        pd.testing.assert_frame_equal(df_result, df2)

        raw_a = np.random.rand(10)
        raw_b = np.random.randint(1000, size=10)
        df = md.DataFrame({
            'a': mt.tensor(raw_a),
            'b': mt.tensor(raw_b)
        },
                          columns=['b', 'a'])
        df_result = sess.run(df)
        pd.testing.assert_frame_equal(
            df_result,
            pd.DataFrame({
                'a': raw_a,
                'b': raw_b
            }, columns=['b', 'a']))
Ejemplo n.º 5
0
    def testGraphFail(self, *_):
        op = SerializeMustFailOperand(f=3)
        tensor = op.new_tensor(None, (3, 3))

        try:
            raise ValueError
        except:  # noqa: E722
            exc = sys.exc_info()[1]

        with new_cluster(scheduler_n_process=2, worker_n_process=2,
                         shared_memory='20M', modules=[__name__],
                         options={'scheduler.retry_num': 1}) as cluster:
            with self.assertRaises(ExecutionFailed):
                try:
                    cluster.session.run(tensor, timeout=_exec_timeout)
                except ExecutionFailed as ex:
                    self.assertIsInstance(ex.__cause__, TypeError)
                    raise

            data = mt.tensor(np.random.rand(10, 20))
            data2 = TileFailOperand(_exc_serial=pickle.dumps(exc)).new_tensor([data], shape=data.shape)
            with self.assertRaises(ExecutionFailed):
                try:
                    cluster.session.run(data2)
                except ExecutionFailed as ex:
                    self.assertIsInstance(ex.__cause__, ValueError)
                    raise

            data = mt.tensor(np.random.rand(20, 10))
            data2 = ExecFailOperand(_exc_serial=pickle.dumps(exc)).new_tensor([data], shape=data.shape)
            with self.assertRaises(ExecutionFailed):
                try:
                    cluster.session.run(data2)
                except ExecutionFailed as ex:
                    self.assertIsInstance(ex.__cause__, ValueError)
                    raise
Ejemplo n.º 6
0
    def testViewDataOnSlice(self):
        data = np.random.rand(10, 20)
        a = tensor(data, chunk_size=6)
        b = a[:5, 5:10]
        b[:3, :3] = 3

        npa = data.copy()
        npb = npa[:5, 5:10]
        npb[:3, :3] = 3

        np.testing.assert_array_equal(b.execute(), npb)
        np.testing.assert_array_equal(a.execute(), npa)

        data = np.random.rand(10, 20)
        a = tensor(data, chunk_size=6)
        b = a[:7]
        b += 1

        npa = data.copy()
        npb = npa[:7]
        npb += 1

        np.testing.assert_array_equal(b.execute(), npb)
        np.testing.assert_array_equal(a.execute(), npa)
Ejemplo n.º 7
0
    def testInsert(self):
        rs = np.random.RandomState(0)
        raw = pd.DataFrame(rs.rand(10, 4), columns=['A', 'B', 'C', 'D'])

        with self.assertRaises(ValueError):
            tensor = mt.tensor(rs.rand(10, 10), chunk_size=4)
            df = md.DataFrame(raw.copy(deep=True), chunk_size=3)
            df.insert(4, 'E', tensor)

        df = md.DataFrame(raw.copy(deep=True), chunk_size=3)
        df.insert(4, 'E', 0)
        raw_dup = raw.copy(deep=True)
        raw_dup.insert(4, 'E', 0)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df, concat=True)[0], raw_dup)

        raw_tensor = rs.rand(10)
        tensor = mt.tensor(raw_tensor, chunk_size=4)
        df = md.DataFrame(raw.copy(deep=True), chunk_size=3)
        df.insert(4, 'E', tensor)
        raw_dup = raw.copy(deep=True)
        raw_dup.insert(4, 'E', raw_tensor)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(df, concat=True)[0], raw_dup)
Ejemplo n.º 8
0
    def testNorm(self):
        data = np.random.rand(9, 6)

        a = mt.tensor(data, chunk_size=(2, 6))

        for ord in (None, 'nuc', np.inf, -np.inf, 0, 1, -1, 2, -2):
            for axis in (0, 1, (0, 1)):
                for keepdims in (True, False):
                    try:
                        res = mt.linalg.norm(a, ord=ord, axis=axis, keepdims=keepdims)
                        expect_shape = np.linalg.norm(data, ord=ord, axis=axis, keepdims=keepdims).shape
                        self.assertEqual(res.shape, expect_shape)
                        self.assertEqual(calc_shape(res), expect_shape)
                    except ValueError:
                        continue
Ejemplo n.º 9
0
    def testSeriesFromTensor(self):
        data = np.random.rand(10)
        series = md.Series(mt.tensor(data), name='a')
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0],
                                       pd.Series(data, name='a'))

        series = md.Series(mt.tensor(data, chunk_size=3))
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0],
                                       pd.Series(data))

        series = md.Series(mt.ones((10,), chunk_size=4))
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0],
                                       pd.Series(np.ones(10,)))

        index_data = np.random.rand(10)
        series = md.Series(mt.tensor(data, chunk_size=3), name='a',
                           index=mt.tensor(index_data, chunk_size=4))
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0],
                                       pd.Series(data, name='a', index=index_data))

        series = md.Series(mt.tensor(data, chunk_size=3), name='a',
                           index=md.date_range('2020-1-1', periods=10))
        pd.testing.assert_series_equal(self.executor.execute_dataframe(series, concat=True)[0],
                                       pd.Series(data, name='a', index=pd.date_range('2020-1-1', periods=10)))
Ejemplo n.º 10
0
    def testFetchSlices(self, *_):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         web=True) as cluster:
            session = cluster.session
            a = mt.random.rand(10, 10, 10, chunk_size=3)

            r = session.run(a)

            r_slice1 = session.fetch(a[:2])
            np.testing.assert_array_equal(r[:2], r_slice1)

            r_slice2 = session.fetch(a[2:8, 2:8])
            np.testing.assert_array_equal(r[2:8, 2:8], r_slice2)

            r_slice3 = session.fetch(a[:, 2:])
            np.testing.assert_array_equal(r[:, 2:], r_slice3)

            r_slice4 = session.fetch(a[:, 2:, -5:])
            np.testing.assert_array_equal(r[:, 2:, -5:], r_slice4)

            r_slice5 = session.fetch(a[0])
            np.testing.assert_array_equal(r[0], r_slice5)

            # test repr
            with np.printoptions(threshold=100):
                raw = np.random.randint(1000, size=(3, 4, 6))
                b = mt.tensor(raw, chunk_size=3)
                self.assertEqual(repr(b.execute(session=session)), repr(raw))

            web_session = new_session('http://' + cluster._web_endpoint)
            r = web_session.run(a)

            r_slice1 = web_session.fetch(a[:2])
            np.testing.assert_array_equal(r[:2], r_slice1)

            r_slice2 = web_session.fetch(a[2:8, 2:8])
            np.testing.assert_array_equal(r[2:8, 2:8], r_slice2)

            r_slice3 = web_session.fetch(a[:, 2:])
            np.testing.assert_array_equal(r[:, 2:], r_slice3)

            r_slice4 = web_session.fetch(a[:, 2:, -5:])
            np.testing.assert_array_equal(r[:, 2:, -5:], r_slice4)

            r_slice5 = web_session.fetch(a[4])
            np.testing.assert_array_equal(r[4], r_slice5)
Ejemplo n.º 11
0
    def testDistributedContext(self):
        self.start_processes(etcd=False)

        session_id = uuid.uuid1()
        actor_client = new_client()
        rs = np.random.RandomState(0)

        context = DistributedContext(
            scheduler_address=self.scheduler_endpoints[0],
            session_id=session_id)

        session_ref = actor_client.actor_ref(
            self.session_manager_ref.create_session(session_id))
        raw1 = rs.rand(10, 10)
        a = mt.tensor(raw1, chunk_size=4)

        graph = a.build_graph()
        targets = [a.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets,
                                          names=['test'])

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        tileable_key = context.get_tileable_key_by_name('test')
        self.assertEqual(a.key, tileable_key)

        nsplits = context.get_tileable_metas([a.key],
                                             filter_fields=['nsplits'])[0][0]
        self.assertEqual(((4, 4, 2), (4, 4, 2)), nsplits)

        r = context.get_tileable_data(a.key)
        np.testing.assert_array_equal(raw1, r)

        indexes = [slice(3, 9), slice(0, 7)]
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[tuple(indexes)], r)

        indexes = [[1, 2, 4, 5], slice(None, None, None)]
        r = context.get_tileable_data(a.key, indexes)
        np.testing.assert_array_equal(raw1[tuple(indexes)], r)

        indexed = a[[0, 1, 2, 9], [0, 0, 4, 4]]
        r = context.get_tileable_data(a.key, indexed.op.indexes)
        np.testing.assert_array_equal(raw1[[0, 1, 2, 9], [0, 0, 4, 4]], r)
Ejemplo n.º 12
0
def test_cmdline_run(supervisor_args, worker_args, use_web_addr):
    new_isolation()
    sv_proc = w_procs = None
    try:
        env = os.environ.copy()
        env['MARS_CPU_TOTAL'] = '2'

        sv_args = _reload_args(supervisor_args)
        sv_proc = subprocess.Popen(sv_args, env=env)

        oscar_port = _get_labelled_port('supervisor', create=False)
        if not oscar_port:
            oscar_ep = _wait_supervisor_ready(sv_proc)
        else:
            oscar_ep = f'127.0.0.1:{oscar_port}'

        if use_web_addr:
            host = oscar_ep.rsplit(':', 1)[0]
            api_ep = f'http://{host}:{_get_labelled_port("web", create=False)}'
        else:
            api_ep = oscar_ep

        w_procs = [subprocess.Popen(
            _reload_args(worker_args), env=env) for _ in range(2)]
        _wait_worker_ready(oscar_ep, w_procs)

        new_session(api_ep)
        data = np.random.rand(10, 10)
        res = mt.tensor(data, chunk_size=5).sum().execute().fetch()
        np.testing.assert_almost_equal(res, data.sum())
    finally:
        ep_file_name = OscarCommandRunner._build_endpoint_file_path(pid=sv_proc.pid)
        try:
            os.unlink(ep_file_name)
        except OSError:
            pass

        w_procs = w_procs or []
        for proc in w_procs + [sv_proc]:
            if not proc:
                continue
            proc.terminate()
            try:
                proc.wait(3)
            except subprocess.TimeoutExpired:
                kill_process_tree(proc.pid)

        stop_isolation()
Ejemplo n.º 13
0
async def test_fault_inject_subtask_processor(fault_cluster,
                                              fault_and_exception):
    fault_type, fault_count, first_run_raises = fault_and_exception
    name = await create_fault_injection_manager(
        session_id=fault_cluster.session.session_id,
        address=fault_cluster.session.address,
        fault_count=fault_count,
        fault_type=fault_type)
    extra_config = {ExtraConfigKey.FAULT_INJECTION_MANAGER_NAME: name}

    raw = np.random.RandomState(0).rand(10, 10)
    a = mt.tensor(raw, chunk_size=5)
    b = a + 1

    with first_run_raises:
        b.execute(extra_config=extra_config)
Ejemplo n.º 14
0
    def testFlat(self):
        data = np.random.rand(10, 20)
        a = tensor(data, chunk_size=4)
        fl = a.flat
        fl[1:10] = 10
        b = fl[10:20]
        b[0:4] = 20

        npa = data.copy()
        npfl = npa.flat
        npfl[1:10] = 10
        npb = npfl[10:20]
        npb[0:4] = 20

        np.testing.assert_array_equal(b.execute(), npb)
        np.testing.assert_array_equal(a.execute(), npa)
Ejemplo n.º 15
0
    def testDistributedRunPyTorchScript(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        scheduler_ep = '127.0.0.1:' + self.scheduler_port
        with new_session(service_ep) as sess:
            raw = np.random.rand(100, 200)
            data = mt.tensor(raw, chunk_size=40)
            data.execute(name='data', session=sess)

            with DistributedContext(scheduler_address=scheduler_ep,
                                    session_id=sess.session_id):
                dataset = MarsDataset('data')
                self.assertEqual(len(dataset), 100)

                sample = [2, 5, 7, 9, 10]
                r1 = dataset[sample][0]
                np.testing.assert_array_equal(raw[sample], r1)
Ejemplo n.º 16
0
    def testTileContextInLocalCluster(self):
        from mars.serialize import dataserializer
        with new_cluster(scheduler_n_process=2, worker_n_process=2,
                         shared_memory='20M', modules=[__name__], web=True) as cluster:
            session = cluster.session

            raw = np.random.rand(10, 20)
            data_bytes = dataserializer.serialize(raw).total_bytes
            data = mt.tensor(raw)

            session.run(data)

            data2 = TileWithContextOperand().new_tensor([data], shape=data.shape)

            result = session.run(data2)
            np.testing.assert_array_equal(raw * data_bytes, result)
Ejemplo n.º 17
0
def test_params():
    raw = np.random.rand(10, 10)
    a = mt.tensor(raw)
    a = a[a[0] < 0.5]
    a = tile(a)
    c = a.chunks[0]

    assert any(np.isnan(s) for s in c.params['shape'])
    c.params = c.get_params_from_data(raw[raw[0] < 0.5])
    assert not any(np.isnan(s) for s in c.params['shape'])

    params = c.params.copy()
    params.pop('index', None)
    a.params = params
    assert np.prod(a.shape) > 0
    a.refresh_params()
Ejemplo n.º 18
0
    def testManhattanDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x1 = mt.tensor(raw_x, chunk_size=30)
        y1 = mt.tensor(raw_y, chunk_size=30)

        x2 = mt.tensor(raw_x, chunk_size=11)
        y2 = mt.tensor(raw_y, chunk_size=12)

        raw_sparse_x = sps.random(20, 5, density=0.4, format='csr', random_state=0)
        raw_sparse_y = sps.random(21, 5, density=0.3, format='csr', random_state=0)

        x3 = mt.tensor(raw_sparse_x, chunk_size=30)
        y3 = mt.tensor(raw_sparse_y, chunk_size=30)

        x4 = mt.tensor(raw_sparse_x, chunk_size=11)
        y4 = mt.tensor(raw_sparse_y, chunk_size=12)

        for x, y, is_sparse in [(x1, y1, False),
                                (x2, y2, False),
                                (x3, y3, True),
                                (x4, y4, True)]:
            if is_sparse:
                rx, ry = raw_sparse_x, raw_sparse_y
            else:
                rx, ry = raw_x, raw_y

            sv = [True, False] if not is_sparse else [True]

            for sum_over_features in sv:
                d = manhattan_distances(x, y, sum_over_features)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_manhattan_distances(rx, ry, sum_over_features)

                np.testing.assert_almost_equal(result, expected)

                d = manhattan_distances(x, sum_over_features=sum_over_features)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_manhattan_distances(rx, sum_over_features=sum_over_features)

                np.testing.assert_almost_equal(result, expected)
Ejemplo n.º 19
0
def test_inv():
    a = mt.random.randint(1, 10, (20, 20), chunk_size=8)
    a_inv = tile(mt.linalg.inv(a))

    assert a_inv.shape == (20, 20)

    # test 1 chunk
    a = mt.random.randint(1, 10, (20, 20), chunk_size=20)
    a_inv = tile(mt.linalg.inv(a))

    assert a_inv.shape == (20, 20)
    assert len(a_inv.chunks) == 1
    assert isinstance(a_inv.chunks[0].op, TensorInv)

    a = mt.random.randint(1, 10, (20, 20), chunk_size=11)
    a_inv = tile(mt.linalg.inv(a))

    assert a_inv.shape == (20, 20)
    assert a_inv.nsplits == ((11, 9), (11, 9))

    b = a.T.dot(a)
    b_inv = tile(mt.linalg.inv(b))
    assert b_inv.shape == (20, 20)

    # test sparse
    data = sps.csr_matrix(np.random.randint(1, 10, (20, 20)))
    a = mt.tensor(data, chunk_size=10)
    a_inv = tile(mt.linalg.inv(a))

    assert a_inv.shape == (20, 20)

    assert a_inv.op.sparse is True
    assert isinstance(a_inv, SparseTensor)
    assert all(c.is_sparse() for c in a_inv.chunks) is True

    b = a.T.dot(a)
    b_inv = tile(mt.linalg.inv(b))
    assert b_inv.shape == (20, 20)

    assert b_inv.op.sparse is True
    assert isinstance(b_inv, SparseTensor)
    assert all(c.is_sparse() for c in b_inv.chunks) is True

    b_inv = tile(mt.linalg.inv(b, sparse=False))
    assert b_inv.op.sparse is False
    assert not all(c.is_sparse() for c in b_inv.chunks) is True
Ejemplo n.º 20
0
    def testSetitem(self):
        data = pd.DataFrame(np.random.rand(10, 5),
                            columns=['c' + str(i) for i in range(5)],
                            index=['i' + str(i) for i in range(10)])
        data2 = np.random.rand(10)
        df = md.DataFrame(data, chunk_size=3)

        df['c3'] = df['c3'] + 1
        df['c10'] = 10
        df[4] = mt.tensor(data2, chunk_size=4)

        result = self.executor.execute_dataframe(df, concat=True)[0]
        expected = data.copy()
        expected['c3'] = expected['c3'] + 1
        expected['c10'] = 10
        expected[4] = data2
        pd.testing.assert_frame_equal(result, expected)
Ejemplo n.º 21
0
    def testTileContextInLocalCluster(self):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         modules=[__name__],
                         web=True) as cluster:
            session = cluster.session

            raw = np.random.rand(10, 20)
            data = mt.tensor(raw)

            session.run(data)

            data2 = FakeOp().new_tensor([data], shape=data.shape)

            result = session.run(data2)
            np.testing.assert_array_equal(raw * raw.nbytes, result)
Ejemplo n.º 22
0
def test_elf():
    raw = np.random.rand(10, 8, 5)
    t = tensor(raw, chunk_size=3)

    r = erf(t)
    expect = scipy_erf(raw)

    assert r.shape == raw.shape
    assert r.dtype == expect.dtype

    t, r = tile(t, r)

    assert r.nsplits == t.nsplits
    for c in r.chunks:
        assert isinstance(c.op, TensorErf)
        assert c.index == c.inputs[0].index
        assert c.shape == c.inputs[0].shape
Ejemplo n.º 23
0
    def testTensorGraphTiledSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random(
            (10, 3)),
                                                      chunk_size=(5, 2))
        graph = t.build_graph(tiled=True)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunk = next(c for c in graph if c.inputs)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(sorted(i.key for i in chunk.inputs),
                         sorted(i.key for i in chunk2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunk = next(c for c in graph if c.inputs)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(sorted(i.key for i in chunk.inputs),
                         sorted(i.key for i in chunk2.inputs))

        t = ones((10, 3), chunk_size=((3, 5, 2), 2)) + 2
        graph = t.build_graph(tiled=True)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        chunk = next(c for c in graph)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(sorted(i.key for i in chunk.composed),
                         sorted(i.key for i in chunk2.composed))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        chunk = next(c for c in graph)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(sorted(i.key for i in chunk.composed),
                         sorted(i.key for i in chunk2.composed))
Ejemplo n.º 24
0
    def testNamed(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)

        sess = Session.default_or_local()

        # test named tensor
        t = mt.tensor(raw, chunk_size=3)
        name = 't_name'
        r1 = t.execute(name=name, session=sess)
        np.testing.assert_array_equal(r1, raw)

        t2 = mt.named_tensor(name=name, session=sess)
        self.assertEqual(t2.order, TensorOrder.C_ORDER)
        r2 = (t2 + 1).execute(session=sess).fetch()
        np.testing.assert_array_equal(r2, raw + 1)

        # test named series
        name = 's_name'
        raw = pd.Series([1, 2, 3])
        s = md.Series(raw)
        r1 = s.execute(name=name, session=sess).fetch()
        pd.testing.assert_series_equal(r1, raw)

        s2 = md.named_series(name=name, session=sess)
        self.assertEqual(s2.dtype, s.dtype)
        pd.testing.assert_index_equal(s2.index_value.to_pandas(),
                                      s.index_value.to_pandas())
        r2 = s2.execute(session=sess).fetch()
        pd.testing.assert_series_equal(r2, raw)

        # test dataframe
        name = 'd_name'
        raw = pd.DataFrame(np.random.rand(10, 3))
        d = md.DataFrame(raw, chunk_size=4)
        r1 = d.execute(name=name, session=sess).fetch()
        pd.testing.assert_frame_equal(r1, raw)

        d2 = md.named_dataframe(name=name, session=sess)
        pd.testing.assert_series_equal(d2.dtypes, d.dtypes)
        pd.testing.assert_index_equal(d2.index_value.to_pandas(),
                                      d.index_value.to_pandas())
        pd.testing.assert_index_equal(d2.columns_value.to_pandas(),
                                      d.columns_value.to_pandas())
        r2 = d2.execute(session=sess).fetch()
        pd.testing.assert_frame_equal(r2, raw)
Ejemplo n.º 25
0
    def testInv(self):
        a = mt.random.randint(1, 10, (20, 20), chunk_size=4)
        a_inv = mt.linalg.inv(a).tiles()

        self.assertEqual(a_inv.shape, (20, 20))

        # test 1 chunk
        a = mt.random.randint(1, 10, (20, 20), chunk_size=20)
        a_inv = mt.linalg.inv(a).tiles()

        self.assertEqual(a_inv.shape, (20, 20))
        self.assertEqual(len(a_inv.chunks), 1)
        self.assertIsInstance(a_inv.chunks[0].op, TensorInv)

        a = mt.random.randint(1, 10, (20, 20), chunk_size=11)
        a_inv = mt.linalg.inv(a).tiles()

        self.assertEqual(a_inv.shape, (20, 20))
        self.assertEqual(a_inv.nsplits, ((11, 9), (11, 9)))

        b = a.T.dot(a)
        b_inv = mt.linalg.inv(b).tiles()
        self.assertEqual(b_inv.shape, (20, 20))

        # test sparse
        data = sps.csr_matrix(np.random.randint(1, 10, (20, 20)))
        a = mt.tensor(data, chunk_size=5)
        a_inv = mt.linalg.inv(a).tiles()

        self.assertEqual(a_inv.shape, (20, 20))

        self.assertTrue(a_inv.op.sparse)
        self.assertIsInstance(a_inv, SparseTensor)
        self.assertTrue(all(c.is_sparse() for c in a_inv.chunks))

        b = a.T.dot(a)
        b_inv = mt.linalg.inv(b).tiles()
        self.assertEqual(b_inv.shape, (20, 20))

        self.assertTrue(b_inv.op.sparse)
        self.assertIsInstance(b_inv, SparseTensor)
        self.assertTrue(all(c.is_sparse() for c in b_inv.chunks))

        b_inv = mt.linalg.inv(b, sparse=False).tiles()
        self.assertFalse(b_inv.op.sparse)
        self.assertTrue(not all(c.is_sparse() for c in b_inv.chunks))
Ejemplo n.º 26
0
def test_is_multilabel(setup):
    raws = [
        [[1, 2]],
        [0, 1, 0, 1],
        [[1], [0, 2], []],
        np.array([[1, 0], [0, 0]]),
        np.array([[1], [0], [0]]),
        np.array([[1, 0, 0]]),
        np.array([[1., 0.], [0., 0.]]),
        sps.csr_matrix([[1, 0], [0, 1]]),
    ]

    for raw in raws:
        assert is_multilabel(raw).to_numpy() == sklearn_is_multilabel(raw)

    t = mt.tensor(raws[3], chunk_size=1)
    assert is_multilabel(t).to_numpy() == sklearn_is_multilabel(raws[3])
Ejemplo n.º 27
0
    def testTensorGraphSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random((10, 3)), chunk_size=(5, 2))
        graph = t.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))

        # test graph with tiled tensor
        t2 = ones((10, 10), chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], TensorChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], TensorChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
Ejemplo n.º 28
0
    def testFromSpmatrix(self):
        t = tensor(sps.csr_matrix([[0, 0, 1], [1, 0, 0]], dtype='f8'), chunk_size=2)

        self.assertIsInstance(t, SparseTensor)
        self.assertIsInstance(t.op, CSRMatrixDataSource)
        self.assertTrue(t.issparse())
        self.assertFalse(t.op.gpu)

        t = t.tiles()
        self.assertEqual(t.chunks[0].index, (0, 0))
        self.assertIsInstance(t.op, CSRMatrixDataSource)
        self.assertFalse(t.op.gpu)
        m = sps.csr_matrix([[0, 0], [1, 0]])
        self.assertTrue(np.array_equal(t.chunks[0].op.indices, m.indices))
        self.assertTrue(np.array_equal(t.chunks[0].op.indptr, m.indptr))
        self.assertTrue(np.array_equal(t.chunks[0].op.data, m.data))
        self.assertTrue(np.array_equal(t.chunks[0].op.shape, m.shape))
Ejemplo n.º 29
0
    def testTileContextInLocalCluster(self):
        class FakeOp(TensorAbs):
            _op_type_ = 9870102948

            _multiplier = Int64Field('multiplier')

            @classmethod
            def tile(cls, op):
                context = get_context()

                self.assertEqual(context.running_mode,
                                 RunningMode.local_cluster)

                inp_chunk = op.inputs[0].chunks[0]
                inp_size = context.get_chunk_metas([inp_chunk.key
                                                    ])[0].chunk_size
                chunk_op = op.copy().reset_key()
                chunk_op._multiplier = inp_size
                chunk = chunk_op.new_chunk([inp_chunk], shape=inp_chunk.shape)

                new_op = op.copy()
                return new_op.new_tensors(op.inputs,
                                          shape=op.outputs[0].shape,
                                          order=op.outputs[0].order,
                                          nsplits=op.inputs[0].nsplits,
                                          chunks=[chunk])

            @classmethod
            def execute(cls, ctx, op):
                ctx[op.outputs[0].key] = ctx[op.inputs[0].key] * op._multiplier

        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         web=True) as cluster:
            session = cluster.session

            raw = np.random.rand(10, 20)
            data = mt.tensor(raw)

            session.run(data)

            data2 = FakeOp().new_tensor([data], shape=data.shape)

            result = session.run(data2)
            np.testing.assert_array_equal(raw * raw.nbytes, result)
Ejemplo n.º 30
0
    def testGammaln(self):
        raw = np.random.rand(10, 8, 5)
        t = tensor(raw, chunk_size=3)

        r = gammaln(t)
        expect = scipy_gammaln(raw)

        self.assertEqual(r.shape, raw.shape)
        self.assertEqual(r.dtype, expect.dtype)

        r.tiles()

        self.assertEqual(r.nsplits, t.nsplits)
        for c in r.chunks:
            self.assertIsInstance(c.op, TensorGammaln)
            self.assertEqual(c.index, c.inputs[0].index)
            self.assertEqual(c.shape, c.inputs[0].shape)