def testDataFrameSeries(self, *_):
        service_ep = 'http://127.0.0.1:' + self.web_port
        scheduler_ep = '127.0.0.1:' + self.scheduler_port
        with new_session(service_ep) as sess:
            raw1 = pd.Series(np.random.rand(100, ))
            data1 = md.Series(raw1, chunk_size=40)
            data1.execute(name='series', session=sess)

            raw2 = pd.DataFrame(np.random.rand(100, 10))
            data2 = mt.tensor(raw2, chunk_size=60)
            data2.execute(name='dataframe', session=sess)

            with DistributedContext(scheduler_address=scheduler_ep,
                                    session_id=sess.session_id):
                t1 = md.named_series(name='series', session=sess)
                t2 = md.named_dataframe(name='dataframe', session=sess)
                dataset = MarsDataset(t1, t2)
                self.assertEqual(len(dataset), 100)

                sampler = MarsDistributedSampler(dataset,
                                                 num_replicas=1,
                                                 rank=0)
                indices = sampler.generate_indices()
                r1 = np.array(dataset._get_data(indices)[0])
                r2 = np.array([dataset[ind][0] for ind in sampler])
                np.testing.assert_array_equal(r1, r2)

                r1 = np.array(dataset._get_data(indices)[1])
                r2 = np.array([dataset[ind][1] for ind in sampler])
                np.testing.assert_array_equal(r1, r2)
Example #2
0
    def testNamed(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)

        sess = Session.default_or_local()

        # test named tensor
        t = mt.tensor(raw, chunk_size=3)
        name = 't_name'
        r1 = t.execute(name=name, session=sess)
        np.testing.assert_array_equal(r1, raw)

        t2 = mt.named_tensor(name=name, session=sess)
        self.assertEqual(t2.order, TensorOrder.C_ORDER)
        r2 = (t2 + 1).execute(session=sess).fetch()
        np.testing.assert_array_equal(r2, raw + 1)

        # test named series
        name = 's_name'
        raw = pd.Series([1, 2, 3])
        s = md.Series(raw)
        r1 = s.execute(name=name, session=sess).fetch()
        pd.testing.assert_series_equal(r1, raw)

        s2 = md.named_series(name=name, session=sess)
        self.assertEqual(s2.dtype, s.dtype)
        pd.testing.assert_index_equal(s2.index_value.to_pandas(),
                                      s.index_value.to_pandas())
        r2 = s2.execute(session=sess).fetch()
        pd.testing.assert_series_equal(r2, raw)

        # test dataframe
        name = 'd_name'
        raw = pd.DataFrame(np.random.rand(10, 3))
        d = md.DataFrame(raw, chunk_size=4)
        r1 = d.execute(name=name, session=sess).fetch()
        pd.testing.assert_frame_equal(r1, raw)

        d2 = md.named_dataframe(name=name, session=sess)
        pd.testing.assert_series_equal(d2.dtypes, d.dtypes)
        pd.testing.assert_index_equal(d2.index_value.to_pandas(),
                                      d.index_value.to_pandas())
        pd.testing.assert_index_equal(d2.columns_value.to_pandas(),
                                      d.columns_value.to_pandas())
        r2 = d2.execute(session=sess).fetch()
        pd.testing.assert_frame_equal(r2, raw)
Example #3
0
    def testNamed(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)

        sess = Session.default_or_local()

        # test named tensor
        t = mt.tensor(raw, chunk_size=3)
        name = 't_name'
        r1 = t.execute(name=name, session=sess)
        np.testing.assert_array_equal(r1, raw)

        t2 = mt.named_tensor(name=name, session=sess)
        r2 = (t2 + 1).execute(session=sess).fetch()
        np.testing.assert_array_equal(r2, raw + 1)

        # test named series
        name = 's_name'
        raw = pd.Series([1, 2, 3])
        s = md.Series(raw)
        r1 = s.execute(name=name, session=sess).fetch()
        pd.testing.assert_series_equal(r1, raw)

        s2 = md.named_series(name=name, session=sess)
        r2 = s2.execute(session=sess).fetch()
        pd.testing.assert_series_equal(r2, raw)

        # test dataframe
        name = 'd_name'
        raw = pd.DataFrame(np.random.rand(10, 3))
        d = md.DataFrame(raw, chunk_size=4)
        r1 = d.execute(name=name, session=sess).fetch()
        pd.testing.assert_frame_equal(r1, raw)

        d2 = md.named_dataframe(name=name, session=sess)
        r2 = d2.execute(session=sess).fetch()
        pd.testing.assert_frame_equal(r2, raw)