def test_datatable_drop_indices(sample_df): dt = DataTable(sample_df, index='id', time_index='signup_date') assert dt.index == 'id' assert dt.time_index == 'signup_date' dropped_index_dt = dt.drop('id') assert 'id' not in dropped_index_dt.columns assert dropped_index_dt.index is None assert dropped_index_dt.time_index == 'signup_date' dropped_time_index_dt = dt.drop(['signup_date']) assert 'signup_date' not in dropped_time_index_dt.columns assert dropped_time_index_dt.time_index is None assert dropped_time_index_dt.index == 'id'
def test_underlying_index(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') unspecified_index = pd.RangeIndex specified_index = pd.Index dt = DataTable(sample_df.copy(), index='id') assert dt._dataframe.index.name is None assert (dt._dataframe.index == [0, 1, 2, 3]).all() assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index dt = DataTable(sample_df.copy()) dt = dt.set_index('full_name') assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index dt.index = 'id' assert (dt._dataframe.index == [0, 1, 2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index # test removing index removes the dataframe's index dt.index = None assert type(dt._dataframe.index) == unspecified_index assert type(dt.to_dataframe().index) == unspecified_index dt = DataTable(sample_df.copy(), index='made_index', make_index=True) assert (dt._dataframe.index == [0, 1, 2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index dt_dropped = dt.drop('made_index') assert 'made_index' not in dt_dropped.columns assert 'made_index' not in dt_dropped._dataframe.columns assert type(dt_dropped._dataframe.index) == unspecified_index assert type(dt_dropped.to_dataframe().index) == unspecified_index
def test_underlying_index_on_update(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') dt = DataTable(sample_df.copy(), index='id') dt.update_dataframe(sample_df.tail(2)) assert (dt._dataframe.index == [2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == pd.Int64Index assert type(dt.to_dataframe().index) == pd.Int64Index actual = dt.iloc[[0, 1]] assert type(actual._dataframe.index) == pd.Index assert type(actual.to_dataframe().index) == pd.Index actual = dt.select(dt.index) assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt[['age']] assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt.drop(dt.index) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.reset_semantic_tags(retain_index_tags=False) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.set_types(retain_index_tags=False, semantic_tags={'id': 'numeric'}) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex dt.pop(dt.index) assert type(dt._dataframe.index) == pd.RangeIndex assert type(dt.to_dataframe().index) == pd.RangeIndex
def test_datatable_drop_errors(sample_df): dt = DataTable(sample_df) error = re.escape("['not_present'] not found in DataTable") with pytest.raises(ValueError, match=error): dt.drop('not_present') with pytest.raises(ValueError, match=error): dt.drop(['age', 'not_present']) error = re.escape("['not_present1', 4] not found in DataTable") with pytest.raises(ValueError, match=error): dt.drop(['not_present1', 4])