def test_datatable_update_dataframe_with_make_index(sample_df): new_df = sample_df.copy().tail(2).reset_index(drop=True) if dd and isinstance(sample_df, dd.DataFrame): new_df = dd.from_pandas(new_df, npartitions=1) dt = DataTable(sample_df, index='new_index', make_index=True, logical_types={'full_name': 'FullName'}, semantic_tags={'phone_number': 'custom_tag'}) original_types = dt.types dt.update_dataframe(new_df) assert len(dt._dataframe) == 2 assert dt.index == 'new_index' pd.testing.assert_frame_equal(original_types, dt.types) # confirm that DataColumn series matches corresponding dataframe column for col in dt.columns: assert to_pandas(dt.columns[col]._series).equals( to_pandas(dt._dataframe[col])) assert dt.columns[col]._series.dtype == dt._dataframe[col].dtype # confirm that we can update using current dataframe without error dt.update_dataframe(dt._dataframe.head(1)) assert len(dt._dataframe) == 1
def test_underlying_index_on_update(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') dt = DataTable(sample_df.copy(), index='id') dt.update_dataframe(sample_df.tail(2)) assert (dt._dataframe.index == [2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == pd.Int64Index assert type(dt.to_dataframe().index) == pd.Int64Index
def test_underlying_index_on_update(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') dt = DataTable(sample_df.copy(), index='id') dt.update_dataframe(sample_df.tail(2)) assert (dt._dataframe.index == [2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == pd.Int64Index assert type(dt.to_dataframe().index) == pd.Int64Index actual = dt.iloc[[0, 1]] assert type(actual._dataframe.index) == pd.Index assert type(actual.to_dataframe().index) == pd.Index actual = dt.select(dt.index) assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt[['age']] assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt.drop(dt.index) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.reset_semantic_tags(retain_index_tags=False) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.set_types(retain_index_tags=False, semantic_tags={'id': 'numeric'}) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex dt.pop(dt.index) assert type(dt._dataframe.index) == pd.RangeIndex assert type(dt.to_dataframe().index) == pd.RangeIndex