def test_reset_semantic_tags_with_time_index(sample_df): semantic_tags = { 'signup_date': 'tag1', } dt = DataTable(sample_df, time_index='signup_date', semantic_tags=semantic_tags, use_standard_tags=False) assert dt['signup_date'].semantic_tags == {'time_index', 'tag1'} dt = dt.reset_semantic_tags('signup_date', retain_index_tags=True) assert dt['signup_date'].semantic_tags == {'time_index'} dt = dt.reset_semantic_tags('signup_date') assert dt['signup_date'].semantic_tags == set()
def test_reset_semantic_tags_with_index(sample_df): semantic_tags = { 'id': 'tag1', } dt = DataTable(sample_df, index='id', semantic_tags=semantic_tags, use_standard_tags=False) assert dt['id'].semantic_tags == {'index', 'tag1'} dt = dt.reset_semantic_tags('id', retain_index_tags=True) assert dt['id'].semantic_tags == {'index'} dt = dt.reset_semantic_tags('id') assert dt['id'].semantic_tags == set()
def test_reset_selected_column_semantic_tags(sample_df): semantic_tags = {'full_name': 'tag1', 'age': 'age'} input_types = ['age', ['age'], {'age'}] for input_type in input_types: dt = DataTable(sample_df, semantic_tags=semantic_tags, use_standard_tags=True) dt = dt.reset_semantic_tags(input_type) assert dt.columns['full_name'].semantic_tags == {'tag1'} assert dt.columns['age'].semantic_tags == {'numeric'}
def test_reset_all_semantic_tags(sample_df): semantic_tags = {'full_name': 'tag1', 'age': 'age'} dt = DataTable(sample_df, semantic_tags=semantic_tags, use_standard_tags=True) new_dt = dt.reset_semantic_tags() # Verify original tags were not changed assert dt.columns['full_name'].semantic_tags == {'tag1'} assert dt.columns['age'].semantic_tags == {'numeric', 'age'} assert new_dt is not dt assert new_dt.columns['full_name'].semantic_tags == set() assert new_dt.columns['age'].semantic_tags == {'numeric'}
def test_underlying_index_on_update(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') dt = DataTable(sample_df.copy(), index='id') dt.update_dataframe(sample_df.tail(2)) assert (dt._dataframe.index == [2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == pd.Int64Index assert type(dt.to_dataframe().index) == pd.Int64Index actual = dt.iloc[[0, 1]] assert type(actual._dataframe.index) == pd.Index assert type(actual.to_dataframe().index) == pd.Index actual = dt.select(dt.index) assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt[['age']] assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt.drop(dt.index) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.reset_semantic_tags(retain_index_tags=False) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.set_types(retain_index_tags=False, semantic_tags={'id': 'numeric'}) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex dt.pop(dt.index) assert type(dt._dataframe.index) == pd.RangeIndex assert type(dt.to_dataframe().index) == pd.RangeIndex
def test_reset_semantic_tags_invalid_column(sample_df): dt = DataTable(sample_df) error_msg = "Input contains columns that are not present in dataframe: 'invalid_column'" with pytest.raises(LookupError, match=error_msg): dt.reset_semantic_tags('invalid_column')