Beispiel #1
0
def test_from_string():
    df = conversion_data()
    df.loc[:, "hex_int"] = df["int"].apply(hex)
    df.loc[:, "hex_float"] = df["float"].apply(float.hex)
    data_id, column_type = "1", "type_conversion"
    i = 0
    with ExitStack() as stack:
        stack.enter_context(
            mock.patch("dtale.global_state.DATA", {data_id: df}))

        cfg = {"col": "str_num", "to": "int", "from": "str"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(builder, lambda col: col.values[0] == 1)

        cfg = {"col": "str_num", "to": "float", "from": "str"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(builder, lambda col: col.values[0] == 1.5)

        cfg = {"col": "hex_int", "to": "int", "from": "str"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(builder, lambda col: col.values[0] == 1)

        cfg = {"col": "hex_float", "to": "float", "from": "str"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(builder, lambda col: col.values[0] == 1.5)

    df = pd.DataFrame(
        dict(
            a=[1, 2, 3, "", 5, 6, 7, 8, 9, 10],
            b=[True, True, False, "", "False", True, False, True, False, True],
            c=["1", "00", "1.05", " ", " ", "", "02", "..", "none", "nan"],
        ))
    with ExitStack() as stack:
        stack.enter_context(
            mock.patch("dtale.global_state.DATA", {data_id: df}))

        cfg = {"col": "a", "to": "float", "from": "mixed-integer"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(builder, lambda col: col.sum() == 51)
        assert np.isnan(builder.build_column().values[3])

        cfg = {"col": "b", "to": "bool", "from": "mixed-integer"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(builder, lambda col: col.sum() == 5)
        assert np.isnan(builder.build_column().values[3])

        cfg = {"col": "c", "to": "float", "from": "str"}
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i), cfg)
        verify_builder(
            builder, lambda col: col.sum() == 4.05 and col.isnull().sum() == 6)
def test_zscore_normalize():
    def _data():
        for i in range(100):
            yield dict(a=1, i=i)

    df = pd.DataFrame(list(_data()))

    data_id, column_type = "1", "zscore_normalize"
    i = 0
    build_data_inst({data_id: df})

    builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i),
                            {"col": "i"})
    verify_builder(builder, lambda col: col.sum() == 4.440892098500626e-16)

    with pytest.raises(BaseException) as error:
        builder = ColumnBuilder(data_id, column_type, "Col{}".format(++i),
                                {"col": "a"})
        builder.build_column()
        assert ZERO_STD_ERROR in str(error.value)
def test_type_conversion(unittest):
    df = pd.DataFrame([{
        'str_num': '1.5',
        'str_date': '20200101',
        'str_date2': '1/1/2020',
        'str_bool': 'True',
        'int': 1,
        'int_date': 20200101,
        'int_s': 1490195805,
        'float': 1.5,
        'date': pd.Timestamp('20200101'),
        'bool': True,
        'cat_int': 1,
        'cat_bool': 'True',
        'cat_str': 'a'
    }])
    for c in ['cat_int', 'cat_bool', 'cat_str']:
        df.loc[:, c] = df[c].astype('category')
    data_id, column_type = '1', 'type_conversion'
    i = 0
    with ExitStack() as stack:
        stack.enter_context(
            mock.patch('dtale.global_state.DATA', {data_id: df}))

        cfg = {'col': 'str_num', 'to': 'int', 'from': 'str'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1

        cfg = {'col': 'str_num', 'to': 'float', 'from': 'str'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1.5

        cfg = {'col': 'str_date', 'to': 'date', 'from': 'object'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert pd.Timestamp(s.values[0]).strftime('%Y%m%d') == '20200101'

        cfg = {'col': 'str_date2', 'to': 'date', 'from': 'object'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert pd.Timestamp(s.values[0]).strftime('%Y%m%d') == '20200101'

        cfg = {'col': 'str_bool', 'to': 'bool', 'from': 'object'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0]

        cfg = {'col': 'int', 'to': 'float', 'from': 'int'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1.0

        cfg = {'col': 'int', 'to': 'str', 'from': 'int'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == '1'

        cfg = {'col': 'int', 'to': 'category', 'from': 'int'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.dtype.name == 'category'

        cfg = {'col': 'int', 'to': 'bool', 'from': 'int'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert isinstance(s.values[0],
                          np.bool_) and np.bool_(True) == s.values[0]

        cfg = {
            'col': 'int_date',
            'to': 'date',
            'from': 'int',
            'unit': 'YYYYMMDD'
        }
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert pd.Timestamp(s.values[0]).strftime('%Y%m%d') == '20200101'

        cfg = {'col': 'int_s', 'to': 'date', 'from': 'int', 'unit': 's'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert pd.Timestamp(s.values[0]).strftime('%Y%m%d') == '20170322'

        cfg = {'col': 'float', 'to': 'int', 'from': 'float'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1

        cfg = {'col': 'float', 'to': 'str', 'from': 'float'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == '1.5'

        cfg = {
            'col': 'date',
            'to': 'str',
            'from': 'datetime64',
            'fmt': '%m/%d/%Y'
        }
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == '01/01/2020'

        cfg = {
            'col': 'date',
            'to': 'int',
            'from': 'datetime64',
            'unit': 'YYYYMMDD'
        }
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 20200101

        cfg = {'col': 'date', 'to': 'int', 'from': 'datetime64', 'unit': 'ms'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1577854800

        cfg = {'col': 'bool', 'to': 'int', 'from': 'bool'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1

        cfg = {'col': 'bool', 'to': 'str', 'from': 'bool'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 'True'

        cfg = {'col': 'cat_int', 'to': 'int', 'from': 'category'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 1

        cfg = {'col': 'cat_bool', 'to': 'bool', 'from': 'category'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert isinstance(s.values[0],
                          np.bool_) and np.bool_(True) == s.values[0]

        cfg = {'col': 'cat_str', 'to': 'str', 'from': 'category'}
        builder = ColumnBuilder(data_id, column_type, 'Col{}'.format(++i), cfg)
        s = builder.build_column()
        assert s.values[0] == 'a'