def testDataFrameSeries(self, *_): service_ep = 'http://127.0.0.1:' + self.web_port scheduler_ep = '127.0.0.1:' + self.scheduler_port with new_session(service_ep) as sess: raw1 = pd.Series(np.random.rand(100, )) data1 = md.Series(raw1, chunk_size=40) data1.execute(name='series', session=sess) raw2 = pd.DataFrame(np.random.rand(100, 10)) data2 = mt.tensor(raw2, chunk_size=60) data2.execute(name='dataframe', session=sess) with DistributedContext(scheduler_address=scheduler_ep, session_id=sess.session_id): t1 = md.named_series(name='series', session=sess) t2 = md.named_dataframe(name='dataframe', session=sess) dataset = MarsDataset(t1, t2) self.assertEqual(len(dataset), 100) sampler = MarsDistributedSampler(dataset, num_replicas=1, rank=0) indices = sampler.generate_indices() r1 = np.array(dataset._get_data(indices)[0]) r2 = np.array([dataset[ind][0] for ind in sampler]) np.testing.assert_array_equal(r1, r2) r1 = np.array(dataset._get_data(indices)[1]) r2 = np.array([dataset[ind][1] for ind in sampler]) np.testing.assert_array_equal(r1, r2)
def testNamed(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) sess = Session.default_or_local() # test named tensor t = mt.tensor(raw, chunk_size=3) name = 't_name' r1 = t.execute(name=name, session=sess) np.testing.assert_array_equal(r1, raw) t2 = mt.named_tensor(name=name, session=sess) self.assertEqual(t2.order, TensorOrder.C_ORDER) r2 = (t2 + 1).execute(session=sess).fetch() np.testing.assert_array_equal(r2, raw + 1) # test named series name = 's_name' raw = pd.Series([1, 2, 3]) s = md.Series(raw) r1 = s.execute(name=name, session=sess).fetch() pd.testing.assert_series_equal(r1, raw) s2 = md.named_series(name=name, session=sess) self.assertEqual(s2.dtype, s.dtype) pd.testing.assert_index_equal(s2.index_value.to_pandas(), s.index_value.to_pandas()) r2 = s2.execute(session=sess).fetch() pd.testing.assert_series_equal(r2, raw) # test dataframe name = 'd_name' raw = pd.DataFrame(np.random.rand(10, 3)) d = md.DataFrame(raw, chunk_size=4) r1 = d.execute(name=name, session=sess).fetch() pd.testing.assert_frame_equal(r1, raw) d2 = md.named_dataframe(name=name, session=sess) pd.testing.assert_series_equal(d2.dtypes, d.dtypes) pd.testing.assert_index_equal(d2.index_value.to_pandas(), d.index_value.to_pandas()) pd.testing.assert_index_equal(d2.columns_value.to_pandas(), d.columns_value.to_pandas()) r2 = d2.execute(session=sess).fetch() pd.testing.assert_frame_equal(r2, raw)
def testNamed(self): rs = np.random.RandomState(0) raw = rs.rand(10, 10) sess = Session.default_or_local() # test named tensor t = mt.tensor(raw, chunk_size=3) name = 't_name' r1 = t.execute(name=name, session=sess) np.testing.assert_array_equal(r1, raw) t2 = mt.named_tensor(name=name, session=sess) r2 = (t2 + 1).execute(session=sess).fetch() np.testing.assert_array_equal(r2, raw + 1) # test named series name = 's_name' raw = pd.Series([1, 2, 3]) s = md.Series(raw) r1 = s.execute(name=name, session=sess).fetch() pd.testing.assert_series_equal(r1, raw) s2 = md.named_series(name=name, session=sess) r2 = s2.execute(session=sess).fetch() pd.testing.assert_series_equal(r2, raw) # test dataframe name = 'd_name' raw = pd.DataFrame(np.random.rand(10, 3)) d = md.DataFrame(raw, chunk_size=4) r1 = d.execute(name=name, session=sess).fetch() pd.testing.assert_frame_equal(r1, raw) d2 = md.named_dataframe(name=name, session=sess) r2 = d2.execute(session=sess).fetch() pd.testing.assert_frame_equal(r2, raw)