def testIndexReduction(self): rs = np.random.RandomState(0) data = pd.Index(rs.randint(0, 5, (100, ))) data2 = pd.Index(rs.randint(1, 6, (100, ))) for method in ['min', 'max', 'all', 'any']: idx = md.Index(data) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data, method)()) idx = md.Index(data, chunk_size=10) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data, method)()) idx = md.Index(data2) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data2, method)()) idx = md.Index(data2, chunk_size=10) result = self.executor.execute_dataframe(getattr(idx, method)(), concat=True)[0] self.assertEqual(result, getattr(data2, method)())
def testIndexInitializer(self): def _concat_idx(results): s_results = [pd.Series(idx) for idx in results] return pd.Index(pd.concat(s_results)) # from tensor raw = np.arange(100) np.random.shuffle(raw) tensor = mt.tensor(raw) r = md.Index(tensor, chunk_size=7) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_index_equal(result, pd.Index(raw)) # from Mars index raw = np.arange(100) np.random.shuffle(raw) idx = md.Index(raw, chunk_size=7) r = md.Index(idx, num_partitions=11) results = self.executor.execute_dataframe(r) self.assertEqual(len(results), 10) pd.testing.assert_index_equal(_concat_idx(results), pd.Index(raw)) # from pandas initializer raw = np.arange(100) np.random.shuffle(raw) raw_ser = pd.Series(raw, name='series_name') r = md.Index(raw_ser, chunk_size=7) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_index_equal(result, pd.Index(raw_ser)) raw_idx = pd.Index(raw, name='idx_name') r = md.Index(raw_idx, num_partitions=10) results = self.executor.execute_dataframe(r) self.assertEqual(len(results), 10) pd.testing.assert_index_equal(_concat_idx(results), pd.Index(raw_idx))
def test_index_initializer(setup): # from tensor raw = np.arange(100) np.random.shuffle(raw) tensor = mt.tensor(raw) r = md.Index(tensor, chunk_size=7) result = r.execute().fetch() pd.testing.assert_index_equal(result, pd.Index(raw)) # from Mars index raw = np.arange(100) np.random.shuffle(raw) idx = md.Index(raw, chunk_size=7) r = md.Index(idx, num_partitions=11) result = r.execute().fetch() pd.testing.assert_index_equal(result, pd.Index(raw)) # from pandas initializer raw = np.arange(100) np.random.shuffle(raw) raw_ser = pd.Series(raw, name='series_name') r = md.Index(raw_ser, chunk_size=7) result = r.execute().fetch() pd.testing.assert_index_equal(result, pd.Index(raw_ser)) raw_idx = pd.Index(raw, name='idx_name') r = md.Index(raw_idx, num_partitions=10) result = r.execute().fetch() pd.testing.assert_index_equal(result, pd.Index(raw_idx))
def testInitializerExecution(self): arr = np.random.rand(20, 30) pdf = pd.DataFrame(arr, index=[np.arange(20), np.arange(20, 0, -1)]) df = md.DataFrame(pdf, chunk_size=(15, 10)) result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_frame_equal(pdf, result) df = md.DataFrame(arr, index=md.date_range('2020-1-1', periods=20)) result = self.executor.execute_dataframe(df, concat=True)[0] pd.testing.assert_frame_equal( result, pd.DataFrame(arr, index=pd.date_range('2020-1-1', periods=20))) s = np.random.rand(20) ps = pd.Series(s, index=[np.arange(20), np.arange(20, 0, -1)], name='a') series = md.Series(ps, chunk_size=7) result = self.executor.execute_dataframe(series, concat=True)[0] pd.testing.assert_series_equal(ps, result) series = md.Series(s, index=md.date_range('2020-1-1', periods=20)) result = self.executor.execute_dataframe(series, concat=True)[0] pd.testing.assert_series_equal( result, pd.Series(s, index=pd.date_range('2020-1-1', periods=20))) pi = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) index = md.Index(md.Index(pi)) result = self.executor.execute_dataframe(index, concat=True)[0] pd.testing.assert_index_equal(pi, result)
def test_index_gpu_initializer(setup_gpu): # from raw cudf initializer raw = cudf.Index(cupy.random.rand(100), name='a') r = md.Index(raw, chunk_size=13) result = r.execute().fetch() pd.testing.assert_index_equal(result.to_pandas(), raw.to_pandas()) raw = cupy.random.rand(100) r = md.Index(raw, name='a', chunk_size=13) result = r.execute().fetch() expected = cudf.Index(raw, name='a') pd.testing.assert_index_equal(result.to_pandas(), expected.to_pandas())
def test_check_na_execution(setup): df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ')) for _ in range(20): df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99) df = md.DataFrame(df_raw, chunk_size=4) pd.testing.assert_frame_equal(df.isna().execute().fetch(), df_raw.isna()) pd.testing.assert_frame_equal(df.notna().execute().fetch(), df_raw.notna()) series_raw = pd.Series(np.nan, index=range(20)) for _ in range(3): series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99) series = md.Series(series_raw, chunk_size=4) pd.testing.assert_series_equal(series.isna().execute().fetch(), series_raw.isna()) pd.testing.assert_series_equal(series.notna().execute().fetch(), series_raw.notna()) idx_data = np.array([np.nan] * 20) for _ in range(3): idx_data[random.randint(0, 19)] = random.randint(0, 99) idx_raw = pd.Index(idx_data) idx = md.Index(idx_raw, chunk_size=4) np.testing.assert_array_equal(idx.isna().execute().fetch(), idx_raw.isna()) np.testing.assert_array_equal(idx.notna().execute().fetch(), idx_raw.notna())
def testSeriesInitializer(self): # from tensor raw = np.random.rand(100) tensor = mt.tensor(raw, chunk_size=7) r = md.Series(tensor) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, pd.Series(raw)) r = md.Series(tensor, chunk_size=13) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, pd.Series(raw)) # from index raw = np.arange(100) np.random.shuffle(raw) raw = pd.Index(raw, name='idx_name') idx = md.Index(raw, chunk_size=7) r = md.Series(idx) result = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(result, pd.Series(raw)) # from Mars series raw = pd.Series(np.random.rand(100), name='series_name') ms = md.Series(raw, chunk_size=15) * 2 r = md.Series(ms, num_partitions=11) results = self.executor.execute_dataframe(r) self.assertEqual(len(results), 10) pd.testing.assert_series_equal(pd.concat(results), raw * 2) # from raw pandas initializer raw = pd.Series(np.random.rand(100), name='series_name') r = md.Series(raw, num_partitions=10) results = self.executor.execute_dataframe(r) self.assertEqual(len(results), 10) pd.testing.assert_series_equal(pd.concat(results), raw)
def test_index_only(setup): df = md.DataFrame(index=[1, 2, 3]) pd.testing.assert_frame_equal(df.execute().fetch(), pd.DataFrame(index=[1, 2, 3])) s = md.Series(index=[1, 2, 3]) pd.testing.assert_series_equal(s.execute().fetch(), pd.Series(index=[1, 2, 3])) df = md.DataFrame(index=md.Index([1, 2, 3])) pd.testing.assert_frame_equal(df.execute().fetch(), pd.DataFrame(index=[1, 2, 3])) s = md.Series(index=md.Index([1, 2, 3]), dtype=object) pd.testing.assert_series_equal(s.execute().fetch(), pd.Series(index=[1, 2, 3], dtype=object))
def test_drop_na_execution(setup): # dataframe cases df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ')) for _ in range(30): df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99) for rowid in range(random.randint(1, 5)): row = random.randint(0, 19) for idx in range(0, 10): df_raw.iloc[row, idx] = random.randint(0, 99) # only one chunk in columns, can run dropna directly r = md.DataFrame(df_raw, chunk_size=(4, 10)).dropna() pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.dropna()) # multiple chunks in columns, count() will be called first r = md.DataFrame(df_raw, chunk_size=4).dropna() pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.dropna()) r = md.DataFrame(df_raw, chunk_size=4).dropna(how='all') pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.dropna(how='all')) r = md.DataFrame(df_raw, chunk_size=4).dropna(subset=list('ABFI')) pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.dropna(subset=list('ABFI'))) r = md.DataFrame(df_raw, chunk_size=4).dropna(how='all', subset=list('BDHJ')) pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.dropna(how='all', subset=list('BDHJ'))) r = md.DataFrame(df_raw, chunk_size=4) r.dropna(how='all', inplace=True) pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.dropna(how='all')) # series cases series_raw = pd.Series(np.nan, index=range(20)) for _ in range(10): series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99) r = md.Series(series_raw, chunk_size=4).dropna() pd.testing.assert_series_equal(r.execute().fetch(), series_raw.dropna()) r = md.Series(series_raw, chunk_size=4) r.dropna(inplace=True) pd.testing.assert_series_equal(r.execute().fetch(), series_raw.dropna()) # index cases idx_data = np.array([np.nan] * 20) for _ in range(10): idx_data[random.randint(0, 19)] = random.randint(0, 99) idx_raw = pd.Index(idx_data) r = md.Index(idx_raw, chunk_size=4).dropna() pd.testing.assert_index_equal(r.execute().fetch(), idx_raw.dropna())
def testFromDataFrameExecution(self): mdf = md.DataFrame({ 'angle': [0, 3, 4], 'degree': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) tensor_result = self.executor.execute_tensor(from_dataframe(mdf)) tensor_expected = self.executor.execute_tensor( mt.tensor([[0, 360], [3, 180], [4, 360]])) np.testing.assert_equal(tensor_result, tensor_expected) # test up-casting mdf2 = md.DataFrame({'a': [0.1, 0.2, 0.3], 'b': [1, 2, 3]}) tensor_result2 = self.executor.execute_tensor(from_dataframe(mdf2)) np.testing.assert_equal(tensor_result2[0].dtype, np.dtype('float64')) tensor_expected2 = self.executor.execute_tensor( mt.tensor([[0.1, 1.0], [0.2, 2.0], [0.3, 3.0]])) np.testing.assert_equal(tensor_result2, tensor_expected2) raw = [[0.1, 0.2, 0.4], [0.4, 0.7, 0.3]] mdf3 = md.DataFrame(raw, columns=list('abc'), chunk_size=2) tensor_result3 = self.executor.execute_tensor(from_dataframe(mdf3), concat=True)[0] np.testing.assert_array_equal(tensor_result3, np.asarray(raw)) self.assertTrue(tensor_result3.flags['F_CONTIGUOUS']) self.assertFalse(tensor_result3.flags['C_CONTIGUOUS']) # test from series series = md.Series([1, 2, 3]) tensor_result = series.to_tensor().execute() np.testing.assert_array_equal(tensor_result, np.array([1, 2, 3])) series = md.Series(range(10), chunk_size=3) tensor_result = series.to_tensor().execute() np.testing.assert_array_equal(tensor_result, np.arange(10)) # test from index index = md.Index(pd.MultiIndex.from_tuples([(0, 1), (2, 3), (4, 5)])) tensor_result = index.to_tensor(extract_multi_index=True).execute() np.testing.assert_array_equal(tensor_result, np.arange(6).reshape((3, 2))) index = md.Index(pd.MultiIndex.from_tuples([(0, 1), (2, 3), (4, 5)])) tensor_result = index.to_tensor(extract_multi_index=False).execute() np.testing.assert_array_equal( tensor_result, pd.MultiIndex.from_tuples([(0, 1), (2, 3), (4, 5)]).to_series())
def test_index_fill_na_execution(setup): idx_data = np.array([np.nan] * 20) for _ in range(10): idx_data[random.randint(0, 19)] = random.randint(0, 99) idx_raw = pd.Index(idx_data) # test single chunk idx = md.Index(idx_raw) r = idx.fillna(1) pd.testing.assert_index_equal(r.execute().fetch(), idx_raw.fillna(1)) idx = md.Index(idx_raw, chunk_size=3) # test chunked with numeric fill r = idx.fillna(1) pd.testing.assert_index_equal(r.execute().fetch(), idx_raw.fillna(1))
def testRepr(self): # test tensor repr with np.printoptions(threshold=100): arr = np.random.randint(1000, size=(11, 4, 13)) t = mt.tensor(arr, chunk_size=3) result = repr(t.execute()) expected = repr(arr) self.assertEqual(result, expected) for size in (5, 58, 60, 62, 64): pdf = pd.DataFrame(np.random.randint(1000, size=(size, 10))) # test DataFrame repr df = md.DataFrame(pdf, chunk_size=size // 2) result = repr(df.execute()) expected = repr(pdf) self.assertEqual(result, expected, f'failed repr for DataFrame when size = {size}') # test DataFrame _repr_html_ result = df.execute()._repr_html_() expected = pdf._repr_html_() self.assertEqual( result, expected, f'failed repr html for DataFrame when size = {size}') # test Series repr ps = pdf[0] s = md.Series(ps, chunk_size=size // 2) result = repr(s.execute()) expected = repr(ps) self.assertEqual(result, expected, f'failed repr for Series when size = {size}') # test Index repr pind = pd.date_range('2020-1-1', periods=10) ind = md.Index(pind, chunk_size=5) self.assertIn('DatetimeIndex', repr(ind.execute())) # test groupby repr df = md.DataFrame( pd.DataFrame(np.random.rand(100, 3), columns=list('abc'))) grouped = df.groupby(['a', 'b']).execute() self.assertIn('DataFrameGroupBy', repr(grouped)) # test Categorical repr c = md.qcut(range(5), 3) self.assertIn('Categorical', repr(c)) self.assertIn('Categorical', str(c)) self.assertEqual(repr(c.execute()), repr(pd.qcut(range(5), 3)))
def testIndexOnly(self): df = md.DataFrame(index=[1, 2, 3]) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df, concat=True)[0], pd.DataFrame(index=[1, 2, 3])) s = md.Series(index=[1, 2, 3]) pd.testing.assert_series_equal( self.executor.execute_dataframe(s, concat=True)[0], pd.Series(index=[1, 2, 3])) df = md.DataFrame(index=md.Index([1, 2, 3])) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df, concat=True)[0], pd.DataFrame(index=[1, 2, 3])) s = md.Series(index=md.Index([1, 2, 3]), dtype=object) pd.testing.assert_series_equal( self.executor.execute_dataframe(s, concat=True)[0], pd.Series(index=[1, 2, 3], dtype=object))
def testIndexFillNAExecution(self): idx_data = np.array([np.nan] * 20) for _ in range(10): idx_data[random.randint(0, 19)] = random.randint(0, 99) idx_raw = pd.Index(idx_data) # test single chunk idx = md.Index(idx_raw) r = idx.fillna(1) pd.testing.assert_index_equal( self.executor.execute_dataframe(r, concat=True)[0], idx_raw.fillna(1)) idx = md.Index(idx_raw, chunk_size=3) # test chunked with numeric fill r = idx.fillna(1) pd.testing.assert_index_equal( self.executor.execute_dataframe(r, concat=True)[0], idx_raw.fillna(1))
def test_index_reduction(setup, check_ref_counts): rs = np.random.RandomState(0) data = pd.Index(rs.randint(0, 5, (100, ))) data2 = pd.Index(rs.randint(1, 6, (100, ))) for method in ['min', 'max', 'all', 'any']: idx = md.Index(data) result = getattr(idx, method)().execute().fetch() assert result == getattr(data, method)() idx = md.Index(data, chunk_size=10) result = getattr(idx, method)().execute().fetch() assert result == getattr(data, method)() idx = md.Index(data2) result = getattr(idx, method)().execute().fetch() assert result == getattr(data2, method)() idx = md.Index(data2, chunk_size=10) result = getattr(idx, method)().execute().fetch() assert result == getattr(data2, method)()
def test_repr(setup): # test tensor repr with np.printoptions(threshold=100): arr = np.random.randint(1000, size=(11, 4, 13)) t = mt.tensor(arr, chunk_size=3) result = repr(t.execute()) expected = repr(arr) assert result == expected for size in (5, 58, 60, 62, 64): pdf = pd.DataFrame(np.random.randint(1000, size=(size, 10))) # test DataFrame repr df = md.DataFrame(pdf, chunk_size=size // 2) result = repr(df.execute()) expected = repr(pdf) assert result == expected # test DataFrame _repr_html_ result = df.execute()._repr_html_() expected = pdf._repr_html_() assert result == expected # test Series repr ps = pdf[0] s = md.Series(ps, chunk_size=size // 2) result = repr(s.execute()) expected = repr(ps) assert result == expected # test Index repr pind = pd.date_range('2020-1-1', periods=10) ind = md.Index(pind, chunk_size=5) assert 'DatetimeIndex' in repr(ind.execute()) # test groupby repr df = md.DataFrame(pd.DataFrame(np.random.rand(100, 3), columns=list('abc'))) grouped = df.groupby(['a', 'b']).execute() assert 'DataFrameGroupBy' in repr(grouped) # test Categorical repr c = md.qcut(range(5), 3) assert 'Categorical' in repr(c) assert 'Categorical' in str(c) assert repr(c.execute()) == repr(pd.qcut(range(5), 3))
def test_initializer_execution(setup): arr = np.random.rand(20, 30) pdf = pd.DataFrame(arr, index=[np.arange(20), np.arange(20, 0, -1)]) df = md.DataFrame(pdf, chunk_size=(15, 10)) result = df.execute().fetch() pd.testing.assert_frame_equal(pdf, result) df = md.DataFrame(arr, index=md.date_range('2020-1-1', periods=20)) result = df.execute().fetch() pd.testing.assert_frame_equal( result, pd.DataFrame(arr, index=pd.date_range('2020-1-1', periods=20))) df = md.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]}, index=md.date_range('1/1/2010', periods=6, freq='D')) result = df.execute().fetch() pd.testing.assert_frame_equal( result, pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]}, index=pd.date_range('1/1/2010', periods=6, freq='D'))) s = np.random.rand(20) ps = pd.Series(s, index=[np.arange(20), np.arange(20, 0, -1)], name='a') series = md.Series(ps, chunk_size=7) result = series.execute().fetch() pd.testing.assert_series_equal(ps, result) series = md.Series(s, index=md.date_range('2020-1-1', periods=20)) result = series.execute().fetch() pd.testing.assert_series_equal( result, pd.Series(s, index=pd.date_range('2020-1-1', periods=20))) pi = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) index = md.Index(md.Index(pi)) result = index.execute().fetch() pd.testing.assert_index_equal(pi, result)
def testRepr(self): # test tensor repr with np.printoptions(threshold=100): arr = np.random.randint(1000, size=(11, 4, 13)) t = mt.tensor(arr, chunk_size=3) result = repr(t.execute()) expected = repr(arr) self.assertEqual(result, expected) for size in (5, 58, 60, 62, 64): pdf = pd.DataFrame(np.random.randint(1000, size=(size, 10))) # test DataFrame repr df = md.DataFrame(pdf, chunk_size=size // 2) result = repr(df.execute()) expected = repr(pdf) self.assertEqual( result, expected, 'failed repr for DataFrame when size = {}'.format(size)) # test DataFrame _repr_html_ result = df.execute()._repr_html_() expected = pdf._repr_html_() self.assertEqual( result, expected, 'failed repr html for DataFrame when size = {}'.format(size)) # test Series repr ps = pdf[0] s = md.Series(ps, chunk_size=size // 2) result = repr(s.execute()) expected = repr(ps) self.assertEqual( result, expected, 'failed repr for Series when size = {}'.format(size)) # test Index repr pind = pd.date_range('2020-1-1', periods=10) ind = md.Index(pind, chunk_size=5) self.assertIn('DatetimeIndex', repr(ind.execute()))
def testCheckNAExecution(self): df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list('ABCDEFGHIJ')) for _ in range(20): df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99) df = md.DataFrame(df_raw, chunk_size=4) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.isna(), concat=True)[0], df_raw.isna()) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df.notna(), concat=True)[0], df_raw.notna()) series_raw = pd.Series(np.nan, index=range(20)) for _ in range(3): series_raw.iloc[random.randint(0, 19)] = random.randint(0, 99) series = md.Series(series_raw, chunk_size=4) pd.testing.assert_series_equal( self.executor.execute_dataframe(series.isna(), concat=True)[0], series_raw.isna()) pd.testing.assert_series_equal( self.executor.execute_dataframe(series.notna(), concat=True)[0], series_raw.notna()) idx_data = np.array([np.nan] * 20) for _ in range(3): idx_data[random.randint(0, 19)] = random.randint(0, 99) idx_raw = pd.Index(idx_data) idx = md.Index(idx_raw, chunk_size=4) np.testing.assert_array_equal( self.executor.execute_dataframe(idx.isna(), concat=True)[0], idx_raw.isna()) np.testing.assert_array_equal( self.executor.execute_dataframe(idx.notna(), concat=True)[0], idx_raw.notna())
def test_series_initializer(setup): # from tensor raw = np.random.rand(100) tensor = mt.tensor(raw, chunk_size=7) r = md.Series(tensor) result = r.execute().fetch() pd.testing.assert_series_equal(result, pd.Series(raw)) r = md.Series(tensor, chunk_size=13) result = r.execute().fetch() pd.testing.assert_series_equal(result, pd.Series(raw)) # from index raw = np.arange(100) np.random.shuffle(raw) raw = pd.Index(raw, name='idx_name') idx = md.Index(raw, chunk_size=7) r = md.Series(idx) result = r.execute().fetch() pd.testing.assert_series_equal(result, pd.Series(raw)) # from Mars series raw = pd.Series(np.random.rand(100), name='series_name') ms = md.Series(raw, chunk_size=15) * 2 r = md.Series(ms, num_partitions=11) result = r.execute().fetch() pd.testing.assert_series_equal(result, raw * 2) # from raw pandas initializer raw = pd.Series(np.random.rand(100), name='series_name') r = md.Series(raw, num_partitions=10) result = r.execute().fetch() pd.testing.assert_series_equal(result, raw) # test check instance r = r * 2 assert isinstance(r, md.Series)
def testRename(self): rs = np.random.RandomState(0) raw = pd.DataFrame(rs.rand(10, 4), columns=['A', 'B', 'C', 'D']) df = md.DataFrame(raw, chunk_size=3) with self.assertWarns(Warning): df.rename(str, errors='raise') with self.assertRaises(NotImplementedError): df.rename({"A": "a", "B": "b"}, axis=1, copy=False) r = df.rename(str) pd.testing.assert_frame_equal( self.executor.execute_dataframe(r, concat=True)[0], raw.rename(str)) r = df.rename({"A": "a", "B": "b"}, axis=1) pd.testing.assert_frame_equal( self.executor.execute_dataframe(r, concat=True)[0], raw.rename({ "A": "a", "B": "b" }, axis=1)) df.rename({"A": "a", "B": "b"}, axis=1, inplace=True) pd.testing.assert_frame_equal( self.executor.execute_dataframe(df, concat=True)[0], raw.rename({ "A": "a", "B": "b" }, axis=1)) raw = pd.DataFrame(rs.rand(10, 4), columns=pd.MultiIndex.from_tuples( (('A', 'C'), ('A', 'D'), ('B', 'E'), ('B', 'F')))) df = md.DataFrame(raw, chunk_size=3) r = df.rename({"C": "a", "D": "b"}, level=1, axis=1) pd.testing.assert_frame_equal( self.executor.execute_dataframe(r, concat=True)[0], raw.rename({ "C": "a", "D": "b" }, level=1, axis=1)) raw = pd.Series(rs.rand(10), name='series') series = md.Series(raw, chunk_size=3) r = series.rename('new_series') pd.testing.assert_series_equal( self.executor.execute_dataframe(r, concat=True)[0], raw.rename('new_series')) r = series.rename(lambda x: 2**x) pd.testing.assert_series_equal( self.executor.execute_dataframe(r, concat=True)[0], raw.rename(lambda x: 2**x)) with self.assertRaises(TypeError): series.name = {1: 10, 2: 20} series.name = 'new_series' pd.testing.assert_series_equal( self.executor.execute_dataframe(series, concat=True)[0], raw.rename('new_series')) raw = pd.MultiIndex.from_frame( pd.DataFrame(rs.rand(10, 2), columns=['A', 'B'])) idx = md.Index(raw) r = idx.rename(['C', 'D']) pd.testing.assert_index_equal( self.executor.execute_dataframe(r, concat=True)[0], raw.rename(['C', 'D']))