Exemple #1
0
def test_ordinal_requires_instance_on_update(sample_series):
    dc = DataColumn(sample_series, logical_type="NaturalLanguage")

    error_msg = 'Must use an Ordinal instance with order values defined'
    with pytest.raises(TypeError, match=error_msg):
        dc.set_logical_type(Ordinal)
    with pytest.raises(TypeError, match=error_msg):
        dc.set_logical_type("Ordinal")
Exemple #2
0
def test_set_logical_type_retains_time_index_tag(sample_datetime_series):
    data_col = DataColumn(sample_datetime_series,
                          logical_type=Datetime,
                          semantic_tags='original_tag',
                          use_standard_tags=False)

    data_col._set_as_time_index()
    assert data_col.semantic_tags == {'time_index', 'original_tag'}
    new_col = data_col.set_logical_type(Categorical)
    assert new_col.semantic_tags == {'time_index'}
    new_col = data_col.set_logical_type(Categorical, retain_index_tags=False)
    assert new_col.semantic_tags == set()
Exemple #3
0
def test_set_logical_type_without_standard_tags(sample_series):
    data_col = DataColumn(sample_series,
                          logical_type=NaturalLanguage,
                          semantic_tags='original_tag',
                          use_standard_tags=False)

    new_col = data_col.set_logical_type(Categorical)
    assert isinstance(new_col, DataColumn)
    assert new_col is not data_col
    assert new_col.logical_type == Categorical
    assert new_col.semantic_tags == set()
Exemple #4
0
def test_ordinal_with_order(sample_series):
    if (ks and isinstance(sample_series, ks.Series)) or (dd and isinstance(sample_series, dd.Series)):
        pytest.xfail('Fails with Dask and Koalas - ordinal data validation not compatible')

    ordinal_with_order = Ordinal(order=['a', 'b', 'c'])
    dc = DataColumn(sample_series, logical_type=ordinal_with_order)
    assert isinstance(dc.logical_type, Ordinal)
    assert dc.logical_type.order == ['a', 'b', 'c']

    dc = DataColumn(sample_series, logical_type="NaturalLanguage")
    new_dc = dc.set_logical_type(ordinal_with_order)
    assert isinstance(new_dc.logical_type, Ordinal)
    assert new_dc.logical_type.order == ['a', 'b', 'c']
Exemple #5
0
def test_dtype_update_on_ltype_change():
    dc = DataColumn(pd.Series([1, 2, 3]),
                    logical_type='Integer')
    assert dc._series.dtype == 'Int64'
    dc = dc.set_logical_type('Double')
    assert dc._series.dtype == 'float64'