def test_reset_semantic_tags_with_time_index(sample_df):
    semantic_tags = {
        'signup_date': 'tag1',
    }
    dt = DataTable(sample_df,
                   time_index='signup_date',
                   semantic_tags=semantic_tags,
                   use_standard_tags=False)
    assert dt['signup_date'].semantic_tags == {'time_index', 'tag1'}
    dt = dt.reset_semantic_tags('signup_date', retain_index_tags=True)
    assert dt['signup_date'].semantic_tags == {'time_index'}
    dt = dt.reset_semantic_tags('signup_date')
    assert dt['signup_date'].semantic_tags == set()
def test_reset_semantic_tags_with_index(sample_df):
    semantic_tags = {
        'id': 'tag1',
    }
    dt = DataTable(sample_df,
                   index='id',
                   semantic_tags=semantic_tags,
                   use_standard_tags=False)
    assert dt['id'].semantic_tags == {'index', 'tag1'}
    dt = dt.reset_semantic_tags('id', retain_index_tags=True)
    assert dt['id'].semantic_tags == {'index'}
    dt = dt.reset_semantic_tags('id')
    assert dt['id'].semantic_tags == set()
def test_reset_selected_column_semantic_tags(sample_df):
    semantic_tags = {'full_name': 'tag1', 'age': 'age'}

    input_types = ['age', ['age'], {'age'}]
    for input_type in input_types:
        dt = DataTable(sample_df,
                       semantic_tags=semantic_tags,
                       use_standard_tags=True)
        dt = dt.reset_semantic_tags(input_type)
        assert dt.columns['full_name'].semantic_tags == {'tag1'}
        assert dt.columns['age'].semantic_tags == {'numeric'}
def test_reset_all_semantic_tags(sample_df):
    semantic_tags = {'full_name': 'tag1', 'age': 'age'}
    dt = DataTable(sample_df,
                   semantic_tags=semantic_tags,
                   use_standard_tags=True)

    new_dt = dt.reset_semantic_tags()
    # Verify original tags were not changed
    assert dt.columns['full_name'].semantic_tags == {'tag1'}
    assert dt.columns['age'].semantic_tags == {'numeric', 'age'}

    assert new_dt is not dt
    assert new_dt.columns['full_name'].semantic_tags == set()
    assert new_dt.columns['age'].semantic_tags == {'numeric'}
Example #5
0
def test_underlying_index_on_update(sample_df):
    if dd and isinstance(sample_df, dd.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Dask input')
    if ks and isinstance(sample_df, ks.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Koalas input')

    dt = DataTable(sample_df.copy(), index='id')

    dt.update_dataframe(sample_df.tail(2))
    assert (dt._dataframe.index == [2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == pd.Int64Index
    assert type(dt.to_dataframe().index) == pd.Int64Index

    actual = dt.iloc[[0, 1]]
    assert type(actual._dataframe.index) == pd.Index
    assert type(actual.to_dataframe().index) == pd.Index

    actual = dt.select(dt.index)
    assert type(actual._dataframe.index) == pd.Int64Index
    assert type(actual.to_dataframe().index) == pd.Int64Index

    actual = dt[['age']]
    assert type(actual._dataframe.index) == pd.Int64Index
    assert type(actual.to_dataframe().index) == pd.Int64Index

    actual = dt.drop(dt.index)
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    actual = dt.reset_semantic_tags(retain_index_tags=False)
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    actual = dt.set_types(retain_index_tags=False,
                          semantic_tags={'id': 'numeric'})
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    dt.pop(dt.index)
    assert type(dt._dataframe.index) == pd.RangeIndex
    assert type(dt.to_dataframe().index) == pd.RangeIndex
def test_reset_semantic_tags_invalid_column(sample_df):
    dt = DataTable(sample_df)
    error_msg = "Input contains columns that are not present in dataframe: 'invalid_column'"
    with pytest.raises(LookupError, match=error_msg):
        dt.reset_semantic_tags('invalid_column')