def test_datatable_update_dataframe_with_make_index(sample_df):
    new_df = sample_df.copy().tail(2).reset_index(drop=True)
    if dd and isinstance(sample_df, dd.DataFrame):
        new_df = dd.from_pandas(new_df, npartitions=1)

    dt = DataTable(sample_df,
                   index='new_index',
                   make_index=True,
                   logical_types={'full_name': 'FullName'},
                   semantic_tags={'phone_number': 'custom_tag'})
    original_types = dt.types

    dt.update_dataframe(new_df)
    assert len(dt._dataframe) == 2
    assert dt.index == 'new_index'
    pd.testing.assert_frame_equal(original_types, dt.types)

    # confirm that DataColumn series matches corresponding dataframe column
    for col in dt.columns:
        assert to_pandas(dt.columns[col]._series).equals(
            to_pandas(dt._dataframe[col]))
        assert dt.columns[col]._series.dtype == dt._dataframe[col].dtype

    # confirm that we can update using current dataframe without error
    dt.update_dataframe(dt._dataframe.head(1))
    assert len(dt._dataframe) == 1
def test_underlying_index_on_update(sample_df):
    if dd and isinstance(sample_df, dd.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Dask input')
    if ks and isinstance(sample_df, ks.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Koalas input')

    dt = DataTable(sample_df.copy(), index='id')

    dt.update_dataframe(sample_df.tail(2))
    assert (dt._dataframe.index == [2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == pd.Int64Index
    assert type(dt.to_dataframe().index) == pd.Int64Index
Exemple #3
0
def test_underlying_index_on_update(sample_df):
    if dd and isinstance(sample_df, dd.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Dask input')
    if ks and isinstance(sample_df, ks.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Koalas input')

    dt = DataTable(sample_df.copy(), index='id')

    dt.update_dataframe(sample_df.tail(2))
    assert (dt._dataframe.index == [2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == pd.Int64Index
    assert type(dt.to_dataframe().index) == pd.Int64Index

    actual = dt.iloc[[0, 1]]
    assert type(actual._dataframe.index) == pd.Index
    assert type(actual.to_dataframe().index) == pd.Index

    actual = dt.select(dt.index)
    assert type(actual._dataframe.index) == pd.Int64Index
    assert type(actual.to_dataframe().index) == pd.Int64Index

    actual = dt[['age']]
    assert type(actual._dataframe.index) == pd.Int64Index
    assert type(actual.to_dataframe().index) == pd.Int64Index

    actual = dt.drop(dt.index)
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    actual = dt.reset_semantic_tags(retain_index_tags=False)
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    actual = dt.set_types(retain_index_tags=False,
                          semantic_tags={'id': 'numeric'})
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    dt.pop(dt.index)
    assert type(dt._dataframe.index) == pd.RangeIndex
    assert type(dt.to_dataframe().index) == pd.RangeIndex