def test_setitem_new_column(sample_df): dt = DataTable(sample_df) new_series = pd.Series([1, 2, 3]) if ks and isinstance(sample_df, ks.DataFrame): dtype = 'int64' new_series = ks.Series(new_series) else: dtype = 'Int64' new_col = DataColumn(new_series, use_standard_tags=False) assert new_col.name is None dt['test_col2'] = new_col updated_df = dt.to_dataframe() assert 'test_col2' in dt.columns assert dt['test_col2'].logical_type == Integer assert dt['test_col2'].semantic_tags == set() assert dt['test_col2'].name == 'test_col2' assert dt['test_col2']._series.name == 'test_col2' assert 'test_col2' in updated_df.columns assert updated_df['test_col2'].dtype == dtype # Standard tags and no logical type new_series = pd.Series(['new', 'column', 'inserted'], name='test_col') if ks and isinstance(sample_df, ks.DataFrame): dtype = 'object' new_series = ks.Series(new_series) else: dtype = 'category' new_col = DataColumn(new_series, use_standard_tags=True) dt['test_col'] = new_col updated_df = dt.to_dataframe() assert 'test_col' in dt.columns assert dt['test_col'].logical_type == Categorical assert dt['test_col'].semantic_tags == {'category'} assert dt['test_col'].name == 'test_col' assert dt['test_col']._series.name == 'test_col' assert 'test_col' in updated_df.columns assert updated_df['test_col'].dtype == dtype # Add with logical type and semantic tag new_series = pd.Series([1, 2, 3]) if ks and isinstance(sample_df, ks.DataFrame): new_series = ks.Series(new_series) new_col = DataColumn(new_series, logical_type=Double, use_standard_tags=False, semantic_tags={'test_tag'}) dt['test_col3'] = new_col updated_df = dt.to_dataframe() assert 'test_col3' in dt.columns assert dt['test_col3'].logical_type == Double assert dt['test_col3'].semantic_tags == {'test_tag'} assert dt['test_col3'].name == 'test_col3' assert dt['test_col3']._series.name == 'test_col3' assert 'test_col3' in updated_df.columns assert updated_df['test_col3'].dtype == 'float'
def test_setitem_overwrite_column(sample_df): dt = DataTable(sample_df, index='id', time_index='signup_date', use_standard_tags=True) # Change to column no change in types original_col = dt['age'] new_series = pd.Series([1, 2, 3]) if ks and isinstance(sample_df, ks.DataFrame): dtype = 'int64' new_series = ks.Series(new_series) else: dtype = 'Int64' overwrite_col = DataColumn(new_series, use_standard_tags=True) dt['age'] = overwrite_col updated_df = dt.to_dataframe() assert 'age' in dt.columns assert dt['age'].logical_type == original_col.logical_type assert dt['age'].semantic_tags == original_col.semantic_tags assert 'age' in updated_df.columns assert updated_df['age'].dtype == dtype assert original_col.to_series() is not dt['age'].to_series() # Change dtype, logical types, and tags with conflicting use_standard_tags original_col = dt['full_name'] new_series = pd.Series([True, False, False]) if ks and isinstance(sample_df, ks.DataFrame): new_series = ks.Series(new_series) dtype = 'bool' else: dtype = 'boolean' overwrite_col = DataColumn(new_series.astype(dtype), use_standard_tags=False, semantic_tags='test_tag') dt['full_name'] = overwrite_col updated_df = dt.to_dataframe() assert 'full_name' in dt.columns assert dt['full_name'].logical_type == Boolean assert dt['full_name'].semantic_tags == {'test_tag'} assert 'full_name' in updated_df.columns assert updated_df['full_name'].dtype == dtype assert original_col.to_series() is not dt['full_name'].to_series()
def test_setitem_different_name(sample_df): dt = DataTable(sample_df) new_series = pd.Series([1, 2, 3, 4], name='wrong') if ks and isinstance(sample_df, ks.DataFrame): new_series = ks.Series(new_series) warning = 'Name mismatch between wrong and id. DataColumn and underlying series name are now id' with pytest.warns(ColumnNameMismatchWarning, match=warning): dt['id'] = DataColumn(new_series, use_standard_tags=False) assert dt['id'].name == 'id' assert dt['id'].to_series().name == 'id' assert dt.to_dataframe()['id'].name == 'id' assert 'wrong' not in dt.columns new_series2 = pd.Series([1, 2, 3, 4], name='wrong2') if ks and isinstance(sample_df, ks.DataFrame): new_series2 = ks.Series(new_series2) warning = 'Name mismatch between wrong2 and new_col. DataColumn and underlying series name are now new_col' with pytest.warns(ColumnNameMismatchWarning, match=warning): dt['new_col'] = DataColumn(new_series2, use_standard_tags=False) assert dt['new_col'].name == 'new_col' assert dt['new_col'].to_series().name == 'new_col' assert dt.to_dataframe()['new_col'].name == 'new_col' assert 'wrong2' not in dt.columns warning = 'Name mismatch between wrong and col_with_name. DataColumn and underlying series name are now col_with_name' with pytest.warns(ColumnNameMismatchWarning, match=warning): dt['col_with_name'] = DataColumn(new_series, use_standard_tags=False, name='wrong') assert dt['col_with_name'].name == 'col_with_name' assert dt['col_with_name'].to_series().name == 'col_with_name' assert dt.to_dataframe()['col_with_name'].name == 'col_with_name' assert 'wrong' not in dt.columns