Example #1
0
def test_setitem_new_column(sample_df):
    dt = DataTable(sample_df)
    new_series = pd.Series([1, 2, 3])
    if ks and isinstance(sample_df, ks.DataFrame):
        dtype = 'int64'
        new_series = ks.Series(new_series)
    else:
        dtype = 'Int64'

    new_col = DataColumn(new_series, use_standard_tags=False)
    assert new_col.name is None

    dt['test_col2'] = new_col
    updated_df = dt.to_dataframe()
    assert 'test_col2' in dt.columns
    assert dt['test_col2'].logical_type == Integer
    assert dt['test_col2'].semantic_tags == set()
    assert dt['test_col2'].name == 'test_col2'
    assert dt['test_col2']._series.name == 'test_col2'
    assert 'test_col2' in updated_df.columns
    assert updated_df['test_col2'].dtype == dtype

    # Standard tags and no logical type
    new_series = pd.Series(['new', 'column', 'inserted'], name='test_col')
    if ks and isinstance(sample_df, ks.DataFrame):
        dtype = 'object'
        new_series = ks.Series(new_series)
    else:
        dtype = 'category'
    new_col = DataColumn(new_series, use_standard_tags=True)
    dt['test_col'] = new_col
    updated_df = dt.to_dataframe()
    assert 'test_col' in dt.columns
    assert dt['test_col'].logical_type == Categorical
    assert dt['test_col'].semantic_tags == {'category'}
    assert dt['test_col'].name == 'test_col'
    assert dt['test_col']._series.name == 'test_col'
    assert 'test_col' in updated_df.columns
    assert updated_df['test_col'].dtype == dtype

    # Add with logical type and semantic tag
    new_series = pd.Series([1, 2, 3])
    if ks and isinstance(sample_df, ks.DataFrame):
        new_series = ks.Series(new_series)
    new_col = DataColumn(new_series,
                         logical_type=Double,
                         use_standard_tags=False,
                         semantic_tags={'test_tag'})
    dt['test_col3'] = new_col
    updated_df = dt.to_dataframe()
    assert 'test_col3' in dt.columns
    assert dt['test_col3'].logical_type == Double
    assert dt['test_col3'].semantic_tags == {'test_tag'}
    assert dt['test_col3'].name == 'test_col3'
    assert dt['test_col3']._series.name == 'test_col3'
    assert 'test_col3' in updated_df.columns
    assert updated_df['test_col3'].dtype == 'float'
Example #2
0
def test_setitem_overwrite_column(sample_df):
    dt = DataTable(sample_df,
                   index='id',
                   time_index='signup_date',
                   use_standard_tags=True)

    # Change to column no change in types
    original_col = dt['age']
    new_series = pd.Series([1, 2, 3])
    if ks and isinstance(sample_df, ks.DataFrame):
        dtype = 'int64'
        new_series = ks.Series(new_series)
    else:
        dtype = 'Int64'
    overwrite_col = DataColumn(new_series, use_standard_tags=True)
    dt['age'] = overwrite_col
    updated_df = dt.to_dataframe()

    assert 'age' in dt.columns
    assert dt['age'].logical_type == original_col.logical_type
    assert dt['age'].semantic_tags == original_col.semantic_tags
    assert 'age' in updated_df.columns
    assert updated_df['age'].dtype == dtype
    assert original_col.to_series() is not dt['age'].to_series()

    # Change dtype, logical types, and tags with conflicting use_standard_tags
    original_col = dt['full_name']
    new_series = pd.Series([True, False, False])
    if ks and isinstance(sample_df, ks.DataFrame):
        new_series = ks.Series(new_series)
        dtype = 'bool'
    else:
        dtype = 'boolean'
    overwrite_col = DataColumn(new_series.astype(dtype),
                               use_standard_tags=False,
                               semantic_tags='test_tag')
    dt['full_name'] = overwrite_col
    updated_df = dt.to_dataframe()

    assert 'full_name' in dt.columns
    assert dt['full_name'].logical_type == Boolean
    assert dt['full_name'].semantic_tags == {'test_tag'}
    assert 'full_name' in updated_df.columns
    assert updated_df['full_name'].dtype == dtype
    assert original_col.to_series() is not dt['full_name'].to_series()
Example #3
0
def test_setitem_different_name(sample_df):
    dt = DataTable(sample_df)

    new_series = pd.Series([1, 2, 3, 4], name='wrong')
    if ks and isinstance(sample_df, ks.DataFrame):
        new_series = ks.Series(new_series)

    warning = 'Name mismatch between wrong and id. DataColumn and underlying series name are now id'
    with pytest.warns(ColumnNameMismatchWarning, match=warning):
        dt['id'] = DataColumn(new_series,
                              use_standard_tags=False)

    assert dt['id'].name == 'id'
    assert dt['id'].to_series().name == 'id'
    assert dt.to_dataframe()['id'].name == 'id'
    assert 'wrong' not in dt.columns

    new_series2 = pd.Series([1, 2, 3, 4], name='wrong2')
    if ks and isinstance(sample_df, ks.DataFrame):
        new_series2 = ks.Series(new_series2)

    warning = 'Name mismatch between wrong2 and new_col. DataColumn and underlying series name are now new_col'
    with pytest.warns(ColumnNameMismatchWarning, match=warning):
        dt['new_col'] = DataColumn(new_series2,
                                   use_standard_tags=False)

    assert dt['new_col'].name == 'new_col'
    assert dt['new_col'].to_series().name == 'new_col'
    assert dt.to_dataframe()['new_col'].name == 'new_col'
    assert 'wrong2' not in dt.columns

    warning = 'Name mismatch between wrong and col_with_name. DataColumn and underlying series name are now col_with_name'
    with pytest.warns(ColumnNameMismatchWarning, match=warning):
        dt['col_with_name'] = DataColumn(new_series,
                                         use_standard_tags=False, name='wrong')
    assert dt['col_with_name'].name == 'col_with_name'
    assert dt['col_with_name'].to_series().name == 'col_with_name'
    assert dt.to_dataframe()['col_with_name'].name == 'col_with_name'
    assert 'wrong' not in dt.columns