def testFromTileableIndex(self): t = mt.random.rand(10, 4) with self.assertRaises(ValueError): from_tileable(t) pd_df = pd.DataFrame(np.random.rand(10, 4), index=np.arange(10, 0, -1).astype(np.int64)) pd_df.index.name = 'ind' df = from_pandas_df(pd_df, chunk_size=6) for o in [df, df[0]]: index = o.index self.assertIsInstance(index, Int64Index) self.assertEqual(index.dtype, np.int64) self.assertEqual(index.name, pd_df.index.name) self.assertIsInstance(index.index_value.value, IndexValue.Int64Index) index = index.tiles() self.assertEqual(len(index.chunks), 2) for c in index.chunks: self.assertEqual(c.dtype, np.int64) self.assertEqual(c.name, pd_df.index.name) self.assertIsInstance(c.index_value.value, IndexValue.Int64Index) t = mt.random.rand(10, chunk_size=6) index = from_tileable(t, name='new_name') self.assertIsInstance(index, Float64Index) self.assertEqual(index.dtype, np.float64) self.assertEqual(index.name, 'new_name') self.assertIsInstance(index.index_value.value, IndexValue.Float64Index) index = index.tiles() self.assertEqual(len(index.chunks), 2) for c in index.chunks: self.assertEqual(c.dtype, np.float64) self.assertEqual(c.name, 'new_name') self.assertIsInstance(c.index_value.value, IndexValue.Float64Index)
def test_from_tileable_index(): t = mt.random.rand(10, 4) with pytest.raises(ValueError): from_tileable(t) pd_df = pd.DataFrame(np.random.rand(10, 4), index=np.arange(10, 0, -1).astype(np.int64)) pd_df.index.name = 'ind' df = from_pandas_df(pd_df, chunk_size=6) for o in [df, df[0]]: index = o.index assert isinstance(index, Int64Index) assert index.dtype == np.int64 assert index.name == pd_df.index.name assert isinstance(index.index_value.value, IndexValue.Int64Index) index = tile(index) assert len(index.chunks) == 2 for c in index.chunks: assert c.dtype == np.int64 assert c.name == pd_df.index.name assert isinstance(c.index_value.value, IndexValue.Int64Index) t = mt.random.rand(10, chunk_size=6) index = from_tileable(t, name='new_name') assert isinstance(index, Float64Index) assert index.dtype == np.float64 assert index.name == 'new_name' assert isinstance(index.index_value.value, IndexValue.Float64Index) index = tile(index) assert len(index.chunks) == 2 for c in index.chunks: assert c.dtype == np.float64 assert c.name == 'new_name' assert isinstance(c.index_value.value, IndexValue.Float64Index)
def testIndexExecution(self): rs = np.random.RandomState(0) pdf = pd.DataFrame(rs.rand(20, 10), index=np.arange(20, 0, -1), columns=['a' + str(i) for i in range(10)]) df = from_pandas_df(pdf, chunk_size=13) # test df.index result = self.executor.execute_dataframe(df.index, concat=True)[0] pd.testing.assert_index_equal(result, pdf.index) result = self.executor.execute_dataframe(df.columns, concat=True)[0] pd.testing.assert_index_equal(result, pdf.columns) # df has unknown chunk shape on axis 0 df = df[df.a1 < 0.5] # test df.index result = self.executor.execute_dataframe(df.index, concat=True)[0] pd.testing.assert_index_equal(result, pdf[pdf.a1 < 0.5].index) s = pd.Series(pdf['a1'], index=pd.RangeIndex(20)) series = from_pandas_series(s, chunk_size=13) # test series.index which has value result = self.executor.execute_dataframe(series.index, concat=True)[0] pd.testing.assert_index_equal(result, s.index) s = pdf['a2'] series = from_pandas_series(s, chunk_size=13) # test series.index result = self.executor.execute_dataframe(series.index, concat=True)[0] pd.testing.assert_index_equal(result, s.index) # test tensor raw = rs.random(20) t = mt.tensor(raw, chunk_size=13) result = self.executor.execute_dataframe(from_tileable(t), concat=True)[0] pd.testing.assert_index_equal(result, pd.Index(raw))