Example #1
0
def test_aggregate():
    input_data = [
        {
            "a": 5,
            "b": "foo"
        },
        {
            "a": 10,
            "b": "bar"
        },
        {
            "a": 10,
            "b": "bar"
        },
    ]

    conv = c.aggregate({
        "a":
        c.reduce(c.ReduceFuncs.Array, c.item("a")),
        "ab_sum":
        c.reduce(c.ReduceFuncs.Sum, c.item("a")) +
        c.reduce(c.ReduceFuncs.Count),
        "b":
        c.reduce(c.ReduceFuncs.ArrayDistinct, c.item("b")),
        "b_max_a":
        c.reduce(c.ReduceFuncs.MaxRow, c.item("a")).item("b", default=None),
    }).gen_converter(debug=True)

    assert conv(input_data) == {
        "a": [5, 10, 10],
        "ab_sum": 28,
        "b": ["foo", "bar"],
        "b_max_a": "bar",
    }
def test_custom_reduce_initialization():
    with pytest.raises(TypeError):
        # initial is not provided
        c.reduce(lambda a, b: a + b, c.this)
    with pytest.raises(ValueError):
        # default is not provided, initial is a conversion, so it cannot be
        # used as default
        c.reduce(lambda a, b: a + b, c.this, initial=c.this)
Example #3
0
def test_reducer_reuse(dict_series):
    f = lambda a, b: a + b
    reducer = c.reduce(f, c.item("value"), initial=0)
    reducer2 = c.reduce(f, c.item("value"), initial=0)
    output = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        reducer + 10,
        reducer2 + 20,
    )).execute(dict_series))
    assert output == [
        ("Nick", 13, 23),
        ("John", 73, 83),
    ]
Example #4
0
def test_base_reducer():
    assert c.aggregate((
        c.reduce(lambda a, b: a + b, c.this(), initial=0),
        c.reduce(c.naive(lambda a, b: a + b), c.this(), initial=int),
        c.reduce(
            InlineExpr("{0} + {1}"),
            c.this(),
            prepare_first=InlineExpr("{}"),
            default=0,
        ),
        c.reduce(
            InlineExpr("{0} + {1}"),
            c.this(),
            prepare_first=int,
            default=0,
        ),
    )).filter(c.this() > 5).gen_converter(debug=False)([1, 2, 3]) == [
        6,
        6,
        6,
        6,
    ]

    with pytest.raises(ValueError):
        c.aggregate(c.ReduceFuncs.Sum(c.reduce(
            c.ReduceFuncs.Count))).gen_converter()
    with pytest.raises(ValueError):
        c.aggregate(c.ReduceFuncs.Sum(c.ReduceFuncs.Count() +
                                      1)).gen_converter()
    with pytest.raises(ValueError):
        c.aggregate((c.ReduceFuncs.Count() +
                     2).pipe(c.ReduceFuncs.Sum(c.this()) + 1)).gen_converter()

    conv = c.aggregate(c.ReduceFuncs.DictArray(
        c.item(0), c.item(1))).gen_converter(debug=False)
    data = [
        ("a", 1),
        ("a", 2),
        ("b", 3),
    ]
    result = {"a": [1, 2], "b": [3]}
    assert conv(data) == result
    assert conv([]) is None

    conv2 = c.aggregate({
        "key": c.ReduceFuncs.DictArray(c.item(0), c.item(1))
    }).gen_converter(debug=False)
    assert conv2([]) == {"key": None}
    assert conv2(data) == {"key": result}
Example #5
0
def test_simple_label():
    conv1 = (c.tuple(c.item(1).add_label("a"), c.this()).pipe(
        c.item(1).pipe(c.list_comp(
            (c.this(), c.label("a"))))).gen_converter(debug=False))
    assert conv1([1, 2, 3, 4]) == [(1, 2), (2, 2), (3, 2), (4, 2)]

    conv2 = (c.tuple(c.item(1).add_label("a"), c.this()).pipe(
        c.item(1),
        label_input={
            "aa": c.item(0),
            "bb": c.item(0)
        },
        label_output="collection1",
    ).pipe(
        c.label("collection1").pipe(
            c.aggregate(
                c.reduce(
                    c.ReduceFuncs.Sum,
                    c.this() + c.label("a") + c.label("aa") +
                    c.input_arg("x") + c.label("collection1").item(0),
                ))),
        label_output="b",
    ).pipe(c.this() + c.label("b")).gen_converter(debug=False))
    assert conv2([1, 2, 3, 4], x=10) == 140

    conv3 = (c.tuple(c.item("default").add_label("default"), c.this()).pipe(
        c.item(1).pipe(c.item(
            "abc", default=c.label("default")))).gen_converter(debug=True))
    assert conv3({"default": 1}) == 1

    with pytest.raises(c.ConversionException):
        c.this().pipe(c.this(), label_input=1)
    with pytest.raises(c.ConversionException):
        CachingConversion(c.this()).add_label("a", c.this()).add_label(
            "a", c.this())
Example #6
0
def test_aggregate():
    input_data = [
        {"a": 5, "b": "foo"},
        {"a": 10, "b": "bar"},
        {"a": 10, "b": "bar"},
    ]

    conv = c.aggregate(
        {
            "a": c.reduce(c.ReduceFuncs.Array, c.item("a")),
            "a_sum": c.reduce(c.ReduceFuncs.Sum, c.item("a")),
            "b": c.reduce(c.ReduceFuncs.ArrayDistinct, c.item("b")),
        }
    ).gen_converter(debug=True)

    assert conv(input_data) == {
        "a": [5, 10, 10],
        "a_sum": 25,
        "b": ["foo", "bar"],
    }
Example #7
0
def test_manually_defined_reducers():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    grouper = (c.group_by(c.item("name")).aggregate(
        c.reduce(lambda a, b: a + b,
                 c.item(c.input_arg("group_key")),
                 initial=0)).filter(c.this() > 20).gen_converter(
                     signature="data_, group_key='debit'"))
    assert grouper(data) == [540, 25]
    assert grouper(data, group_key="balance") == [82, 120]
Example #8
0
def test_legacy_dict_reduce_approach(dict_series):
    output = c.aggregate(
        c.reduce(
            c.ReduceFuncs.DictSum,
            (c.item("name"), c.item("value")),
        )).execute(dict_series)
    assert output == {
        "Nick": 3,
        "John": 63,
    }
    with pytest.raises(ValueError):
        c.ReduceFuncs.DictSum(c.this(), c.this(), c.this())
    with pytest.raises(ValueError):
        c.ReduceFuncs.DictSum({c.this(), c.this()})
Example #9
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
            unconditional_init=True,
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
            unconditional_init=True,
        ),
        c.reduce(
            max,
            c.item("debit"),
            prepare_first=lambda a: a,
            default=c.input_arg("arg1"),
            where=c.call_func(lambda x: x < 0, c.item("balance")),
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                max,
                c.item("debit"),
                prepare_first=lambda a: a,
                default=0,
                where=c.call_func(lambda x: x < 0, c.item("balance")),
            ),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
                where=c.inline_expr("{0} > {1}").pass_args(
                    c.item("balance"),
                    c.input_arg("arg2"),
                ),
            ),
            -1,
        ),
        c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"),
        c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              arg2=0,
                                                              debug=False))

    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120),
    ]
    # fmt: on

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        result = (c.group_by(c.item("name")).aggregate((
            c.item("category"),
            c.reduce(c.ReduceFuncs.Sum, c.item("debit")),
        )).execute(data, debug=False))

    aggregation = {
        c.call_func(
            tuple,
            c.ReduceFuncs.Array(c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.ReduceFuncs.Count(),
        "max":
        c.ReduceFuncs.Max(c.item("debit")),
        "min":
        c.ReduceFuncs.Min(c.item("debit")),
        "count_distinct":
        c.ReduceFuncs.CountDistinct(c.item("name")),
        "array_agg_distinct":
        c.ReduceFuncs.ArrayDistinct(c.item("name")),
        "dict":
        c.ReduceFuncs.Dict(c.item("debit"), c.item("name")),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            'max': 300,
            'min': 10,
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            'max': 30,
            'min': 7,
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(
        *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute(
            data, debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum(
        c.item("debit"))).execute(data, debug=False))
    assert result5 == 583

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        (c.group_by(by).aggregate(
            by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(
                data, debug=False))
Example #10
0
]
reducer_data4 = [
    {
        "name": "Bill",
        "debit": 25
    },
    {
        "name": "Nick",
        "debit": 3
    },
]

reducers_in_out = [
    dict(
        groupby=c.item("name"),
        reduce=c.reduce(lambda a, b: a + b, c.item("debit"), initial=0),
        data=reducer_data1,
        output=[('Bill', 150), ('Nick', 1)],
        raises=None,
    ),
    dict(
        groupby=c.item("name"),
        reduce=c.reduce(c.inline_expr("{} + {}"), c.item("debit"), initial=0),
        data=reducer_data1,
        output=[('Bill', 150), ('Nick', 1)],
        raises=None,
    ),
    dict(
        groupby=c.item("name"),
        reduce=c.ReduceFuncs.Sum(c.item("debit")),
        data=reducer_data1,
Example #11
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
        ),
        c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter(
            c.call_func(lambda x: x < 0, c.item("balance"))),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(max, c.item("debit"), default=0).filter(
                c.call_func(lambda x: x < 0, c.item("balance"))),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
            ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))),
            -1,
        ),
        c.reduce(
            c.ReduceFuncs.MaxRow,
            c.item("debit"),
        ).item("balance"),
        c.reduce(
            c.ReduceFuncs.MinRow,
            c.item("debit"),
        ).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              debug=False))
    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120)
    ]
    # fmt: on

    aggregation = {
        c.call_func(
            tuple,
            c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.reduce(c.ReduceFuncs.Count),
        "count_distinct":
        c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")),
        "array_agg_distinct":
        c.reduce(
            c.ReduceFuncs.ArrayDistinct,
            c.item("name"),
        ),
        "dict":
        c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(*by).aggregate(by + (
        c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data,
                                                                 debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(
        c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data,
                                                              debug=False))
    assert result5 == 583
Example #12
0
def test_base_reducer():
    from convtools.aggregations import _ReducerExpression, _ReducerStatements

    assert c.aggregate((
        c.reduce(
            _ReducerExpression(lambda a, b: a + b, expr=c.this(), initial=0)),
        c.reduce(
            _ReducerExpression(c.naive(lambda a, b: a + b),
                               expr=c.this(),
                               initial=int)),
        c.reduce(_ReducerExpression("{0} + {1}", expr=c.this(), default=0)),
        c.reduce(
            _ReducerExpression(
                "{0} + {1}",
                expr=c.this(),
                initial_from_first=int,
                default=0,
            )),
        c.reduce(
            _ReducerStatements(
                reduce="%(result)s += ({1} or 0)",
                initial_from_first="%(result)s = ({0} or 0)",
                default=0,
            ),
            c.this(),
        ),
        c.reduce(
            _ReducerStatements(
                reduce="%(result)s += ({1} or 0)",
                default=c.naive(int),
            ),
            c.this(),
        ),
        c.reduce(
            _ReducerStatements(
                reduce="%(result)s = ({1} or 0)",
                initial=0,
            ),
            c.this(),
        ),
    )).filter(c.this() > 5, cast=tuple).gen_converter(debug=True)([1, 2,
                                                                   3]) == (
                                                                       6,
                                                                       6,
                                                                       6,
                                                                       6,
                                                                       6,
                                                                       6,
                                                                   )

    with pytest.raises(AssertionError):
        c.aggregate((c.reduce(
            c.ReduceFuncs.Sum,
            c.reduce(c.ReduceFuncs.Count),
        ), )).gen_converter()

    conv = c.aggregate(
        c.reduce(c.ReduceFuncs.DictArray,
                 (c.item(0), c.item(1)))).gen_converter(debug=True)
    data = [
        ("a", 1),
        ("a", 2),
        ("b", 3),
    ]
    result = {"a": [1, 2], "b": [3]}
    assert conv(data) == result
    assert conv([]) is None

    conv2 = c.aggregate({
        "key":
        c.reduce(c.ReduceFuncs.DictArray, (c.item(0), c.item(1)))
    }).gen_converter(debug=True)
    assert conv2([]) == {"key": None}
    assert conv2(data) == {"key": result}
Example #13
0
]
reducer_data4 = [
    {
        "name": "Bill",
        "debit": 25
    },
    {
        "name": "Nick",
        "debit": 3
    },
]

reducers_in_out = [
    dict(
        groupby=c.item("name"),
        reduce=c.reduce(c.ReduceFuncs.Sum, c.item("debit")),
        data=reducer_data1,
        output=[('Bill', 150), ('Nick', 1)],
        raises=None,
    ),
    dict(
        groupby=c.item("name"),
        reduce=c.reduce(c.ReduceFuncs.Sum, c.item("debit")),
        data=reducer_data1 + reducer_data2,
        output=[('Bill', 150), ('Nick', 3)],
        raises=None,
    ),
    dict(
        groupby=c.item("name"),
        reduce=c.reduce(c.ReduceFuncs.SumOrNone, c.item("debit")),
        data=reducer_data1 + reducer_data2,
Example #14
0
def test_custom_reduce():
    with pytest.raises(ValueError):
        c.reduce(lambda a, b: a + b, c.this())
    with pytest.raises(ValueError):
        c.reduce(lambda a, b: a + b, c.this(), default=0)
Example #15
0
def test_manually_defined_reducers():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    grouper_base = c.group_by(c.item("name")).aggregate(
        c.reduce(
            lambda a, b: a + b,
            c.item(c.input_arg("group_key")),
            initial=int,
            default=int,
        ))
    grouper = grouper_base.filter(c.this > 20).gen_converter(
        signature="data_, group_key='debit'", debug=False)
    assert grouper(data) == [540, 25]
    assert list(grouper(data, group_key="balance")) == [82, 120]

    grouper = grouper_base.filter(
        (c.this > 20),
        cast=list).gen_converter(signature="data_, group_key='debit'",
                                 debug=False)
    assert grouper(data) == [540, 25]

    grouper = grouper_base.filter(
        (c.this > 20),
        cast=set).gen_converter(signature="data_, group_key='debit'",
                                debug=False)
    assert grouper(data, group_key="balance") == {82, 120}