def test_aggregate(): input_data = [ { "a": 5, "b": "foo" }, { "a": 10, "b": "bar" }, { "a": 10, "b": "bar" }, ] conv = c.aggregate({ "a": c.reduce(c.ReduceFuncs.Array, c.item("a")), "ab_sum": c.reduce(c.ReduceFuncs.Sum, c.item("a")) + c.reduce(c.ReduceFuncs.Count), "b": c.reduce(c.ReduceFuncs.ArrayDistinct, c.item("b")), "b_max_a": c.reduce(c.ReduceFuncs.MaxRow, c.item("a")).item("b", default=None), }).gen_converter(debug=True) assert conv(input_data) == { "a": [5, 10, 10], "ab_sum": 28, "b": ["foo", "bar"], "b_max_a": "bar", }
def test_custom_reduce_initialization(): with pytest.raises(TypeError): # initial is not provided c.reduce(lambda a, b: a + b, c.this) with pytest.raises(ValueError): # default is not provided, initial is a conversion, so it cannot be # used as default c.reduce(lambda a, b: a + b, c.this, initial=c.this)
def test_reducer_reuse(dict_series): f = lambda a, b: a + b reducer = c.reduce(f, c.item("value"), initial=0) reducer2 = c.reduce(f, c.item("value"), initial=0) output = (c.group_by(c.item("name")).aggregate(( c.item("name"), reducer + 10, reducer2 + 20, )).execute(dict_series)) assert output == [ ("Nick", 13, 23), ("John", 73, 83), ]
def test_base_reducer(): assert c.aggregate(( c.reduce(lambda a, b: a + b, c.this(), initial=0), c.reduce(c.naive(lambda a, b: a + b), c.this(), initial=int), c.reduce( InlineExpr("{0} + {1}"), c.this(), prepare_first=InlineExpr("{}"), default=0, ), c.reduce( InlineExpr("{0} + {1}"), c.this(), prepare_first=int, default=0, ), )).filter(c.this() > 5).gen_converter(debug=False)([1, 2, 3]) == [ 6, 6, 6, 6, ] with pytest.raises(ValueError): c.aggregate(c.ReduceFuncs.Sum(c.reduce( c.ReduceFuncs.Count))).gen_converter() with pytest.raises(ValueError): c.aggregate(c.ReduceFuncs.Sum(c.ReduceFuncs.Count() + 1)).gen_converter() with pytest.raises(ValueError): c.aggregate((c.ReduceFuncs.Count() + 2).pipe(c.ReduceFuncs.Sum(c.this()) + 1)).gen_converter() conv = c.aggregate(c.ReduceFuncs.DictArray( c.item(0), c.item(1))).gen_converter(debug=False) data = [ ("a", 1), ("a", 2), ("b", 3), ] result = {"a": [1, 2], "b": [3]} assert conv(data) == result assert conv([]) is None conv2 = c.aggregate({ "key": c.ReduceFuncs.DictArray(c.item(0), c.item(1)) }).gen_converter(debug=False) assert conv2([]) == {"key": None} assert conv2(data) == {"key": result}
def test_simple_label(): conv1 = (c.tuple(c.item(1).add_label("a"), c.this()).pipe( c.item(1).pipe(c.list_comp( (c.this(), c.label("a"))))).gen_converter(debug=False)) assert conv1([1, 2, 3, 4]) == [(1, 2), (2, 2), (3, 2), (4, 2)] conv2 = (c.tuple(c.item(1).add_label("a"), c.this()).pipe( c.item(1), label_input={ "aa": c.item(0), "bb": c.item(0) }, label_output="collection1", ).pipe( c.label("collection1").pipe( c.aggregate( c.reduce( c.ReduceFuncs.Sum, c.this() + c.label("a") + c.label("aa") + c.input_arg("x") + c.label("collection1").item(0), ))), label_output="b", ).pipe(c.this() + c.label("b")).gen_converter(debug=False)) assert conv2([1, 2, 3, 4], x=10) == 140 conv3 = (c.tuple(c.item("default").add_label("default"), c.this()).pipe( c.item(1).pipe(c.item( "abc", default=c.label("default")))).gen_converter(debug=True)) assert conv3({"default": 1}) == 1 with pytest.raises(c.ConversionException): c.this().pipe(c.this(), label_input=1) with pytest.raises(c.ConversionException): CachingConversion(c.this()).add_label("a", c.this()).add_label( "a", c.this())
def test_aggregate(): input_data = [ {"a": 5, "b": "foo"}, {"a": 10, "b": "bar"}, {"a": 10, "b": "bar"}, ] conv = c.aggregate( { "a": c.reduce(c.ReduceFuncs.Array, c.item("a")), "a_sum": c.reduce(c.ReduceFuncs.Sum, c.item("a")), "b": c.reduce(c.ReduceFuncs.ArrayDistinct, c.item("b")), } ).gen_converter(debug=True) assert conv(input_data) == { "a": [5, 10, 10], "a_sum": 25, "b": ["foo", "bar"], }
def test_manually_defined_reducers(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] grouper = (c.group_by(c.item("name")).aggregate( c.reduce(lambda a, b: a + b, c.item(c.input_arg("group_key")), initial=0)).filter(c.this() > 20).gen_converter( signature="data_, group_key='debit'")) assert grouper(data) == [540, 25] assert grouper(data, group_key="balance") == [82, 120]
def test_legacy_dict_reduce_approach(dict_series): output = c.aggregate( c.reduce( c.ReduceFuncs.DictSum, (c.item("name"), c.item("value")), )).execute(dict_series) assert output == { "Nick": 3, "John": 63, } with pytest.raises(ValueError): c.ReduceFuncs.DictSum(c.this(), c.this(), c.this()) with pytest.raises(ValueError): c.ReduceFuncs.DictSum({c.this(), c.this()})
def test_grouping(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] result = (c.group_by(c.item("name")).aggregate(( c.item("name"), c.item("name").call_method("lower"), c.call_func(str.lower, c.item("name")), c.reduce( lambda a, b: a + b, c.item("debit"), initial=c.input_arg("arg1"), unconditional_init=True, ), c.reduce( c.inline_expr("{0} + {1}"), c.item("debit"), initial=lambda: 100, unconditional_init=True, ), c.reduce( max, c.item("debit"), prepare_first=lambda a: a, default=c.input_arg("arg1"), where=c.call_func(lambda x: x < 0, c.item("balance")), ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( max, c.item("debit"), prepare_first=lambda a: a, default=0, where=c.call_func(lambda x: x < 0, c.item("balance")), ), 1000, ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( c.ReduceFuncs.Max, c.item("debit"), default=1000, where=c.inline_expr("{0} > {1}").pass_args( c.item("balance"), c.input_arg("arg2"), ), ), -1, ), c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"), c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"), )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data, arg1=100, arg2=0, debug=False)) # fmt: off assert result == [ ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50), ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90), ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120), ] # fmt: on with pytest.raises(c.ConversionException): # there's a single group by field, while we use separate items # of this tuple in aggregate result = (c.group_by(c.item("name")).aggregate(( c.item("category"), c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data, debug=False)) aggregation = { c.call_func( tuple, c.ReduceFuncs.Array(c.item("name"), default=None), ): c.item("category").call_method("lower"), "count": c.ReduceFuncs.Count(), "max": c.ReduceFuncs.Max(c.item("debit")), "min": c.ReduceFuncs.Min(c.item("debit")), "count_distinct": c.ReduceFuncs.CountDistinct(c.item("name")), "array_agg_distinct": c.ReduceFuncs.ArrayDistinct(c.item("name")), "dict": c.ReduceFuncs.Dict(c.item("debit"), c.item("name")), } result = (c.group_by(c.item("category")).aggregate(aggregation).execute( data, debug=False)) result2 = (c.group_by(c.item("category")).aggregate( c.dict(*aggregation.items())).execute(data, debug=False)) # fmt: off assert result == result2 == [ { 'array_agg_distinct': ['John', 'Nick', 'Bill'], 'count': 5, 'count_distinct': 3, 'dict': { 10: 'John', 18: 'Bill', 200: 'John', 300: 'John' }, 'max': 300, 'min': 10, ('John', 'John', 'John', 'Nick', 'Bill'): 'games' }, { 'array_agg_distinct': ['John', 'Nick'], 'count': 2, 'count_distinct': 2, 'dict': { 7: 'Nick', 30: 'John' }, 'max': 30, 'min': 7, ('John', 'Nick'): 'food' } ] # fmt: on result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe( c.inline_expr("{0} + {1}").pass_args(c.this(), c.this())).execute(data, debug=False)) assert result3 == 583 * 2 by = c.item("name"), c.item("category") result4 = (c.group_by( *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute( data, debug=False)) # fmt: off assert result4 == [('John', 'Games', 510), ('John', 'Food', 30), ('Nick', 'Food', 7), ('Nick', 'Games', 18), ('Bill', 'Games', 18)] # fmt: on result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum( c.item("debit"))).execute(data, debug=False)) assert result5 == 583 with pytest.raises(c.ConversionException): # there's a single group by field, while we use separate items # of this tuple in aggregate (c.group_by(by).aggregate( by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute( data, debug=False))
] reducer_data4 = [ { "name": "Bill", "debit": 25 }, { "name": "Nick", "debit": 3 }, ] reducers_in_out = [ dict( groupby=c.item("name"), reduce=c.reduce(lambda a, b: a + b, c.item("debit"), initial=0), data=reducer_data1, output=[('Bill', 150), ('Nick', 1)], raises=None, ), dict( groupby=c.item("name"), reduce=c.reduce(c.inline_expr("{} + {}"), c.item("debit"), initial=0), data=reducer_data1, output=[('Bill', 150), ('Nick', 1)], raises=None, ), dict( groupby=c.item("name"), reduce=c.ReduceFuncs.Sum(c.item("debit")), data=reducer_data1,
def test_grouping(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] result = (c.group_by(c.item("name")).aggregate(( c.item("name"), c.item("name").call_method("lower"), c.call_func(str.lower, c.item("name")), c.reduce( lambda a, b: a + b, c.item("debit"), initial=c.input_arg("arg1"), ), c.reduce( c.inline_expr("{0} + {1}"), c.item("debit"), initial=lambda: 100, ), c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter( c.call_func(lambda x: x < 0, c.item("balance"))), c.call_func( lambda max_debit, n: max_debit * n, c.reduce(max, c.item("debit"), default=0).filter( c.call_func(lambda x: x < 0, c.item("balance"))), 1000, ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( c.ReduceFuncs.Max, c.item("debit"), default=1000, ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))), -1, ), c.reduce( c.ReduceFuncs.MaxRow, c.item("debit"), ).item("balance"), c.reduce( c.ReduceFuncs.MinRow, c.item("debit"), ).item("balance"), )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data, arg1=100, debug=False)) # fmt: off assert result == [ ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50), ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90), ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120) ] # fmt: on aggregation = { c.call_func( tuple, c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None), ): c.item("category").call_method("lower"), "count": c.reduce(c.ReduceFuncs.Count), "count_distinct": c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")), "array_agg_distinct": c.reduce( c.ReduceFuncs.ArrayDistinct, c.item("name"), ), "dict": c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))), } result = (c.group_by(c.item("category")).aggregate(aggregation).execute( data, debug=False)) result2 = (c.group_by(c.item("category")).aggregate( c.dict(*aggregation.items())).execute(data, debug=False)) # fmt: off assert result == result2 == [ { 'array_agg_distinct': ['John', 'Nick', 'Bill'], 'count': 5, 'count_distinct': 3, 'dict': { 10: 'John', 18: 'Bill', 200: 'John', 300: 'John' }, ('John', 'John', 'John', 'Nick', 'Bill'): 'games' }, { 'array_agg_distinct': ['John', 'Nick'], 'count': 2, 'count_distinct': 2, 'dict': { 7: 'Nick', 30: 'John' }, ('John', 'Nick'): 'food' } ] # fmt: on result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe( c.inline_expr("{0} + {1}").pass_args(c.this(), c.this())).execute(data, debug=False)) assert result3 == 583 * 2 by = c.item("name"), c.item("category") result4 = (c.group_by(*by).aggregate(by + ( c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data, debug=False)) # fmt: off assert result4 == [('John', 'Games', 510), ('John', 'Food', 30), ('Nick', 'Food', 7), ('Nick', 'Games', 18), ('Bill', 'Games', 18)] # fmt: on result5 = (c.group_by().aggregate( c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data, debug=False)) assert result5 == 583
def test_base_reducer(): from convtools.aggregations import _ReducerExpression, _ReducerStatements assert c.aggregate(( c.reduce( _ReducerExpression(lambda a, b: a + b, expr=c.this(), initial=0)), c.reduce( _ReducerExpression(c.naive(lambda a, b: a + b), expr=c.this(), initial=int)), c.reduce(_ReducerExpression("{0} + {1}", expr=c.this(), default=0)), c.reduce( _ReducerExpression( "{0} + {1}", expr=c.this(), initial_from_first=int, default=0, )), c.reduce( _ReducerStatements( reduce="%(result)s += ({1} or 0)", initial_from_first="%(result)s = ({0} or 0)", default=0, ), c.this(), ), c.reduce( _ReducerStatements( reduce="%(result)s += ({1} or 0)", default=c.naive(int), ), c.this(), ), c.reduce( _ReducerStatements( reduce="%(result)s = ({1} or 0)", initial=0, ), c.this(), ), )).filter(c.this() > 5, cast=tuple).gen_converter(debug=True)([1, 2, 3]) == ( 6, 6, 6, 6, 6, 6, ) with pytest.raises(AssertionError): c.aggregate((c.reduce( c.ReduceFuncs.Sum, c.reduce(c.ReduceFuncs.Count), ), )).gen_converter() conv = c.aggregate( c.reduce(c.ReduceFuncs.DictArray, (c.item(0), c.item(1)))).gen_converter(debug=True) data = [ ("a", 1), ("a", 2), ("b", 3), ] result = {"a": [1, 2], "b": [3]} assert conv(data) == result assert conv([]) is None conv2 = c.aggregate({ "key": c.reduce(c.ReduceFuncs.DictArray, (c.item(0), c.item(1))) }).gen_converter(debug=True) assert conv2([]) == {"key": None} assert conv2(data) == {"key": result}
] reducer_data4 = [ { "name": "Bill", "debit": 25 }, { "name": "Nick", "debit": 3 }, ] reducers_in_out = [ dict( groupby=c.item("name"), reduce=c.reduce(c.ReduceFuncs.Sum, c.item("debit")), data=reducer_data1, output=[('Bill', 150), ('Nick', 1)], raises=None, ), dict( groupby=c.item("name"), reduce=c.reduce(c.ReduceFuncs.Sum, c.item("debit")), data=reducer_data1 + reducer_data2, output=[('Bill', 150), ('Nick', 3)], raises=None, ), dict( groupby=c.item("name"), reduce=c.reduce(c.ReduceFuncs.SumOrNone, c.item("debit")), data=reducer_data1 + reducer_data2,
def test_custom_reduce(): with pytest.raises(ValueError): c.reduce(lambda a, b: a + b, c.this()) with pytest.raises(ValueError): c.reduce(lambda a, b: a + b, c.this(), default=0)
def test_manually_defined_reducers(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] grouper_base = c.group_by(c.item("name")).aggregate( c.reduce( lambda a, b: a + b, c.item(c.input_arg("group_key")), initial=int, default=int, )) grouper = grouper_base.filter(c.this > 20).gen_converter( signature="data_, group_key='debit'", debug=False) assert grouper(data) == [540, 25] assert list(grouper(data, group_key="balance")) == [82, 120] grouper = grouper_base.filter( (c.this > 20), cast=list).gen_converter(signature="data_, group_key='debit'", debug=False) assert grouper(data) == [540, 25] grouper = grouper_base.filter( (c.this > 20), cast=set).gen_converter(signature="data_, group_key='debit'", debug=False) assert grouper(data, group_key="balance") == {82, 120}