def test_weighted_average(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Average(c.item(0),
                                          c.item(1))).execute(series),
        weighted_average(series),
    )
    result = (c.group_by(c.item(0) // 5).aggregate([
        c.item(0) // 5,
        c.ReduceFuncs.Average(c.item(1)),
        c.ReduceFuncs.Average(c.item(1), where=c.item(0) > 10, default=-1),
    ]).execute(zip(range(10), range(10)), debug=False))
    assert result == [
        [0, 2, -1],
        [1, 7, -1],
    ]
    result = (c.group_by(c.item(0) // 5).aggregate([
        c.item(0) // 5,
        c.ReduceFuncs.Average(c.item(1), c.item(2)),
        c.ReduceFuncs.Average(c.item(1),
                              c.item(2),
                              where=c.item(0) > 10,
                              default=-1),
    ]).execute(zip(range(10), range(10), cycle([1, 2])), debug=False))

    assert result == [
        [0, 2, -1],
        [1, 7, -1],
    ]
Exemple #2
0
def test_group_by_with_double_ended_pipes():
    input_data = [
        {
            "value": 1
        },
        {
            "value": 2
        },
    ]
    # fmt: off
    conv = c.aggregate(
        c.item("value").pipe(c.ReduceFuncs.Sum(c.this())).pipe(
            c.this() * 2)).gen_converter()
    # fmt: on
    result = conv(input_data)
    assert result == 6

    input_data = [
        {
            "k": "A",
            "v": 1
        },
        {
            "k": "A",
            "v": 2
        },
    ]
    reducer = c.ReduceFuncs.Sum(c.item("v"))
    conv = (c.group_by(c.item("k")).aggregate({
        "v1":
        c.input_arg("test").pipe(reducer),
        "v2":
        reducer,
    }).gen_converter())
    assert conv(input_data, test={"v": 7}) == [{"v1": 14, "v2": 3}]
def test_multi_statement_reducers(dict_series):
    output = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        SumReducer1(c.item("value")),
        SumReducer2(c.item("value")),
        SumReducer3(c.item("value")),
        SumReducer4(c.item("value")),
        SumReducer5(c.item("value"), initial=5),
    )).execute(dict_series, debug=False))
    assert output == [("Nick", 3, 3, 3, 3, 8), ("John", 63, 63, 63, 63, 68)]

    with pytest.raises(AttributeError):

        class SumReducer(MultiStatementReducer):
            reduce = ("%(result)s = %(result)s + ({0} or 4)", )
            default = 0
            unconditional_init = True

        # prepare_first is not specified
        c.aggregate(SumReducer(c.item("value"))).gen_converter()

    with pytest.raises(ValueError):

        class SumReducer(MultiStatementReducer):
            reduce = ("%(result)s = %(result)s + ({0} or 4)", )
            unconditional_init = True

        # default is not provided
        SumReducer(c.item("value"))
Exemple #4
0
def test_multi_statement_reducers(dict_series):
    output = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        SumReducer1(c.item("value")),
        SumReducer2(c.item("value")),
        SumReducer3(c.item("value")),
        SumReducer4(c.item("value")),
        SumReducer5(c.item("value"), initial=5),
    )).execute(dict_series, debug=False))
    assert output == [("Nick", 3, 3, 3, 3, 8), ("John", 63, 63, 63, 63, 68)]

    with pytest.raises(ValueError):

        class SumReducer(MultiStatementReducer):
            reduce = ("%(result)s = {0} + ({1} or 4)", )
            default = 0
            unconditional_init = True

        SumReducer(c.item("value"))
    with pytest.raises(ValueError):

        class SumReducer(MultiStatementReducer):
            reduce = ("%(result)s = {0} + ({1} or 4)", )
            unconditional_init = True

        SumReducer(c.item("value"))
Exemple #5
0
def test_piped_group_by():
    input_data = [
        {
            "a": 5,
            "b": "foo",
            "amount": 1
        },
        {
            "a": 10,
            "b": "bar",
            "amount": 2
        },
        {
            "a": 10,
            "b": "bar",
            "amount": 3
        },
    ]
    assert c.group_by(c.item("a"), c.item("b")).aggregate({
        "a":
        c.item("a"),
        "b":
        c.item("b"),
        "amount":
        c.ReduceFuncs.Sum(c.item("amount")),
    }).pipe(
        c.group_by(c.item("b")).aggregate({
            "b":
            c.item("b"),
            "set_a":
            c.ReduceFuncs.ArrayDistinct(c.item("a")),
            "min_amount":
            c.ReduceFuncs.Min(c.item("amount")),
        })).execute(input_data) == [
            {
                "b": "foo",
                "set_a": [5],
                "min_amount": 1
            },
            {
                "b": "bar",
                "set_a": [10],
                "min_amount": 5
            },
        ]
def test_nested_group_by():
    data = [
        [0, [1, 2, 3]],
        [0, [4, 5, 6]],
        [1, [2, 3, 4]],
    ]
    assert c.group_by(c.item(0)).aggregate(
        (
            c.item(0),
            c.ReduceFuncs.Sum(
                c.item(1).pipe(c.aggregate(c.ReduceFuncs.Sum(c.this())))
            ),
        )
    ).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]
    agg_conv = c.aggregate(c.ReduceFuncs.Sum(c.this()))
    assert c.group_by(c.item(0)).aggregate(
        (
            c.item(0),
            c.if_(c.item(1), c.item(1), c.item(1),).pipe(
                c.if_(c.this(), c.this(), c.this(),).pipe(
                    c.ReduceFuncs.Sum(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )
                        .pipe((agg_conv, agg_conv))
                        .pipe(c.item(1))
                    ).pipe(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )
                    ),
                )
            ),
        )
    ).execute(data, debug=True) == [
        (0, 21),
        (1, 9),
    ]
Exemple #7
0
def test_weighted_average_with_group_by(series):
    assert eq(
        c.group_by(c.item(0)).aggregate(
            c.ReduceFuncs.Average(c.item(0), c.item(1))).execute(series),
        [
            weighted_average([x for x in series if x[0] == key])
            for key in ordered_set(x[0] for x in series)
        ],
    )
Exemple #8
0
def test_median_with_group_by(series):
    assert eq(
        c.group_by(c.item(0)).aggregate(c.ReduceFuncs.Median(
            c.item(1))).execute(series),
        [
            statistics.median(x[1] for x in series if x[0] == key)
            for key in ordered_set(x[0] for x in series)
        ],
    )
Exemple #9
0
def test_top_k_with_group_by(series, k):
    assert eq(
        c.group_by(c.item(0)).aggregate(c.ReduceFuncs.TopK(
            k, c.item(1))).execute(series),
        [[
            x[1]
            for x in Counter(x[1]
                             for x in series if x[0] == key).most_common(k)
        ] for key in ordered_set(x[0] for x in series)],
    )
Exemple #10
0
def test_mode_with_groupby():
    series = [(0, 1), (0, 1), (0, 2), (1, 1), (1, 2), (1, 2)]

    assert eq(
        (c.group_by(c.item(0)).aggregate(c.ReduceFuncs.Mode(
            c.item(1))).execute(series)),
        [
            statistics.mode([x[1] for x in series if x[0] == key])
            for key in ordered_set(x[0] for x in series)
        ],
    )
def test_group_by_key_edge_case():
    with pytest.raises(ValueError):
        c.this.add_label("row").pipe(c.ReduceFuncs.Count())
    with pytest.raises(ValueError):
        (c.this.add_label("row") + 1).pipe(c.ReduceFuncs.Count() + 1)
    with pytest.raises(ValueError):
        c.this.pipe(c.ReduceFuncs.Count(), label_input="row")
    data = [
        (0, 1),
        (1, 2),
    ]
    assert c.group_by(c.item(0)).aggregate(
        c.if_(c.item(1), c.item(1), c.item(1)).pipe(
            (c.ReduceFuncs.Sum(c.this) /
             c.ReduceFuncs.Count(c.this)).pipe(c.this + 10))).gen_converter(
                 debug=False)(data) == [11, 12]
    assert c.group_by(c.item(0)).aggregate(
        c.item(1).pipe(
            c.ReduceFuncs.Sum(c.this),
            label_output="count")).gen_converter(debug=False)(data) == [1, 2]
Exemple #12
0
def test_reducers():
    for config in reducers_in_out:
        converter = (c.group_by(config["groupby"]).aggregate(
            (config["groupby"],
             config["reduce"])).gen_converter(debug=config.get("debug", True)))

        if config["raises"]:
            with pytest.raises(config["raises"]):
                converter(config["data"])
        else:
            data = converter(config["data"])
            assert data == config["output"]
def test_conditional_init_merges():
    converter = (c.group_by(c.item(0)).aggregate([
        c.ReduceFuncs.First(c.item(1)),
        c.ReduceFuncs.First(c.item(2)),
        c.ReduceFuncs.Min(c.item(1)),
        c.ReduceFuncs.Max(c.item(1)),
        c.ReduceFuncs.Min(c.item(2)),
        c.ReduceFuncs.Max(c.item(2)),
        c.ReduceFuncs.DictMin(c.item(0), c.item(1)),
        c.ReduceFuncs.DictMin(c.item(0), c.item(2)),
        c.ReduceFuncs.DictMax(c.item(0), c.item(1)),
        c.ReduceFuncs.DictMax(c.item(0), c.item(2)),
        c.ReduceFuncs.MaxRow(c.item(2)),
        c.ReduceFuncs.MaxRow(c.item(2), where=c.item(0) == 1).item(-1),
    ]).gen_converter(debug=True))
    # fmt: off
    assert converter([
        [1, 2, None],
        [1, 1, 4],
        [1, None, 3],
    ]) == [[2, None, 1, 2, 3, 4, {
        1: 1
    }, {
        1: 3
    }, {
        1: 2
    }, {
        1: 4
    }, [1, 1, 4], 4]]
    # fmt: on

    converter = (c.group_by(c.item(0)).aggregate([
        c.ReduceFuncs.Min(c.item(1)),
        c.ReduceFuncs.Min(c.item(1)) + 1,
    ]).gen_converter(debug=True))
    assert (sum(
        code_line.count("<") for code_line in next(
            conf["code_str"]
            for name, conf in converter._name_to_converter.items()
            if name.startswith("group_by"))) == 1)
Exemple #14
0
def test_manually_defined_reducers():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    grouper = (c.group_by(c.item("name")).aggregate(
        c.reduce(lambda a, b: a + b,
                 c.item(c.input_arg("group_key")),
                 initial=0)).filter(c.this() > 20).gen_converter(
                     signature="data_, group_key='debit'"))
    assert grouper(data) == [540, 25]
    assert grouper(data, group_key="balance") == [82, 120]
def test_group_by_percentile():
    input_data = [{
        "key": key,
        "value": value
    } for index, key in enumerate("abc")
                  for value in range(index + 90, -1, -1)]
    c_round = c.call_func(round, c.this, 2)
    result = (c.group_by(c.item("key")).aggregate({
        "key":
        c.item("key"),
        "min":
        c.ReduceFuncs.Percentile(0, c.item("value")).pipe(c_round),
        "min":
        c.ReduceFuncs.Percentile(0,
                                 c.item("value"),
                                 where=c.and_(default=True)).pipe(c_round),
        "percentile_5":
        c.ReduceFuncs.Percentile(5, c.item("value")).pipe(c_round),
        "median":
        c.ReduceFuncs.Percentile(50, c.item("value")).pipe(c_round),
        "percentile_95":
        c.ReduceFuncs.Percentile(95, c.item("value")).pipe(c_round),
        "max":
        c.ReduceFuncs.Percentile(100, c.item("value")).pipe(c_round),
    }).execute(input_data))

    assert result == [
        {
            "key": "a",
            "max": 90,
            "median": 45.0,
            "min": 0.0,
            "percentile_5": 4.5,
            "percentile_95": 85.5,
        },
        {
            "key": "b",
            "max": 91,
            "median": 45.5,
            "min": 0.0,
            "percentile_5": 4.55,
            "percentile_95": 86.45,
        },
        {
            "key": "c",
            "max": 92,
            "median": 46.0,
            "min": 0.0,
            "percentile_5": 4.6,
            "percentile_95": 87.4,
        },
    ]
Exemple #16
0
def test_reducer_reuse(dict_series):
    f = lambda a, b: a + b
    reducer = c.reduce(f, c.item("value"), initial=0)
    reducer2 = c.reduce(f, c.item("value"), initial=0)
    output = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        reducer + 10,
        reducer2 + 20,
    )).execute(dict_series))
    assert output == [
        ("Nick", 13, 23),
        ("John", 73, 83),
    ]
def test_group_by_key_edge_case():
    with pytest.raises(ValueError):
        c.this().add_label("row").pipe(c.ReduceFuncs.Count())
    with pytest.raises(ValueError):
        (c.this().add_label("row") + 1).pipe(c.ReduceFuncs.Count() + 1)
    with pytest.raises(ValueError):
        c.this().pipe(c.ReduceFuncs.Count(), label_input="row")
    data = [
        (0, 1),
        (1, 2),
    ]
    # TODO: try to test nested pipe (double overwrites)
    # TODO: reducer + label then pipe to somewhere
    assert c.group_by(c.item(0)).aggregate(
        c.if_(c.item(1), c.item(1), c.item(1)).pipe(
            (c.ReduceFuncs.Sum(c.this()) / c.ReduceFuncs.Count(c.this())).pipe(
                c.this() + 10
            )
        )
    ).gen_converter(debug=False)(data) == [11, 12]
    assert c.group_by(c.item(0)).aggregate(
        c.item(1).pipe(c.ReduceFuncs.Sum(c.this()), label_output="count")
    ).gen_converter(debug=False)(data) == [1, 2]
Exemple #18
0
def test_iter_method():
    assert (c.this.iter(c.this * 3).filter(c.this).as_type(list).execute(
        [1, 2, 3, 0, 1],
        debug=False,
    ) == [3, 6, 9, 3])

    assert c.group_by(c.item(0)).aggregate(
        c([
            c.item(0),
            c.item(1).pipe(c.ReduceFuncs.Max(c.this)),
        ]).iter(c.this * 100).as_type(tuple)).execute([(0, 1), (0, 2), (1, 7)],
                                                      debug=False) == [
                                                          (0, 200),
                                                          (100, 700),
                                                      ]
def test_iter_mut_method():
    assert c.iter(c.item(0)).as_type(list).execute([[1], [2]]) == [1, 2]
    assert c.iter_mut(c.Mut.custom(c.this.call_method("append", 7))).as_type(
        list
    ).execute([[1], [2]]) == [[1, 7], [2, 7]]
    result = (
        c.this.iter({"a": c.this})
        .iter_mut(
            c.Mut.set_item("b", c.item("a") + 1),
            c.Mut.set_item("c", c.item("a") + 2),
        )
        .iter_mut(
            c.Mut.set_item("d", c.item("a") + 3),
        )
        .as_type(list)
        .execute([1, 2, 3], debug=False)
    )
    assert result == [
        {"a": 1, "b": 2, "c": 3, "d": 4},
        {"a": 2, "b": 3, "c": 4, "d": 5},
        {"a": 3, "b": 4, "c": 5, "d": 6},
    ]

    result = (
        c.group_by(c.item(0))
        .aggregate(
            c(
                [
                    {c.item(0): c.item(1).pipe(c.ReduceFuncs.Max(c.this))},
                    {c.item(1).pipe(c.ReduceFuncs.Max(c.this)): c.item(0)},
                ]
            )
            .iter_mut(
                c.Mut.set_item(
                    "x",
                    c.call_func(sum, c.this.call_method("values"))
                    + c.input_arg("base"),
                )
            )
            .as_type(tuple)
        )
        .execute([(0, 1), (0, 2), (1, 7)], base=100, debug=False)
    )
    assert result == [
        ({0: 2, "x": 102}, {2: 0, "x": 100}),
        ({1: 7, "x": 107}, {7: 1, "x": 101}),
    ]
Exemple #20
0
def test_zip_in_aggregate():
    input_data = [
        ("kitchen", "size", 10),
        ("kitchen", "temperature", 40),
        ("living_room", "size", 12),
        ("living_room", "color", "white"),
    ]
    converter = (c.group_by(c.item(1)).aggregate({
        "prop":
        c.item(1),
        "values":
        c.zip(
            room=c.ReduceFuncs.Array(c.item(0)),
            value=c.ReduceFuncs.Array(c.item(2)),
        ).as_type(list),
    }).gen_converter())
    assert converter(input_data) == [
        {
            "prop":
            "size",
            "values": [
                {
                    "room": "kitchen",
                    "value": 10
                },
                {
                    "room": "living_room",
                    "value": 12
                },
            ],
        },
        {
            "prop": "temperature",
            "values": [{
                "room": "kitchen",
                "value": 40
            }]
        },
        {
            "prop": "color",
            "values": [{
                "room": "living_room",
                "value": "white"
            }],
        },
    ]
Exemple #21
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
            unconditional_init=True,
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
            unconditional_init=True,
        ),
        c.reduce(
            max,
            c.item("debit"),
            prepare_first=lambda a: a,
            default=c.input_arg("arg1"),
            where=c.call_func(lambda x: x < 0, c.item("balance")),
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                max,
                c.item("debit"),
                prepare_first=lambda a: a,
                default=0,
                where=c.call_func(lambda x: x < 0, c.item("balance")),
            ),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
                where=c.inline_expr("{0} > {1}").pass_args(
                    c.item("balance"),
                    c.input_arg("arg2"),
                ),
            ),
            -1,
        ),
        c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"),
        c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              arg2=0,
                                                              debug=False))

    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120),
    ]
    # fmt: on

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        result = (c.group_by(c.item("name")).aggregate((
            c.item("category"),
            c.reduce(c.ReduceFuncs.Sum, c.item("debit")),
        )).execute(data, debug=False))

    aggregation = {
        c.call_func(
            tuple,
            c.ReduceFuncs.Array(c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.ReduceFuncs.Count(),
        "max":
        c.ReduceFuncs.Max(c.item("debit")),
        "min":
        c.ReduceFuncs.Min(c.item("debit")),
        "count_distinct":
        c.ReduceFuncs.CountDistinct(c.item("name")),
        "array_agg_distinct":
        c.ReduceFuncs.ArrayDistinct(c.item("name")),
        "dict":
        c.ReduceFuncs.Dict(c.item("debit"), c.item("name")),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            'max': 300,
            'min': 10,
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            'max': 30,
            'min': 7,
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(
        *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute(
            data, debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum(
        c.item("debit"))).execute(data, debug=False))
    assert result5 == 583

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        (c.group_by(by).aggregate(
            by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(
                data, debug=False))
Exemple #22
0
def test_blank_aggregate(series):
    assert eq(
        c.group_by(c.item(0)).aggregate(c.item(0)).execute(series),
        list({x[0]: 1
              for x in series}),
    )
Exemple #23
0
def test_group_by_with_pipes():
    # fmt: off
    input_data = [
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "A"
        },
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": date(2020, 1, 2),
            "product": "B"
        },
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "C"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "D"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 2, 1),
            "stopped_at": None,
            "product": "D"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 2, 1),
            "stopped_at": None,
            "product": "E"
        },
    ]
    # fmt: on
    output = (c.group_by(
        c.item("name"),
        c.item("started_at"),
    ).aggregate({
        "name":
        c.item("name"),
        "started_at":
        c.item("started_at"),
        "products":
        c.ReduceFuncs.ArrayDistinct(
            c.if_(
                c.item("stopped_at").is_(None),
                c.item("product"),
                None,
            ), ).pipe(c.filter(c.this())).pipe(
                c.call_func(sorted, c.this()).pipe(
                    c(", ").call_method("join", c.this()))).pipe(c.this()),
    }).execute(input_data))
    # fmt: off
    assert output == [{
        'name': 'John',
        'products': 'A, C',
        'started_at': date(2020, 1, 1)
    }, {
        'name': 'Nick',
        'products': 'D',
        'started_at': date(2020, 1, 1)
    }, {
        'name': 'Nick',
        'products': 'D, E',
        'started_at': date(2020, 2, 1)
    }]
    # fmt: on

    reducer = c.ReduceFuncs.Array(c.this(), default=list)
    output = (c.group_by(
        c.this()["name"],
        c.this()["started_at"],
    ).aggregate({
        "name": c.this()["name"],
        "started_at": c.this()["started_at"],
        "products": c.this()["product"].pipe(reducer)[:3],
    }).execute(input_data))
    assert output == [
        {
            "name": "John",
            "products": ["A", "B", "C"],
            "started_at": date(2020, 1, 1),
        },
        {
            "name": "Nick",
            "products": ["D"],
            "started_at": date(2020, 1, 1),
        },
        {
            "name": "Nick",
            "products": ["D", "E"],
            "started_at": date(2020, 2, 1),
        },
    ]
def test_manually_defined_reducers():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    grouper_base = c.group_by(c.item("name")).aggregate(
        c.reduce(
            lambda a, b: a + b,
            c.item(c.input_arg("group_key")),
            initial=int,
            default=int,
        ))
    grouper = grouper_base.filter(c.this > 20).gen_converter(
        signature="data_, group_key='debit'", debug=False)
    assert grouper(data) == [540, 25]
    assert list(grouper(data, group_key="balance")) == [82, 120]

    grouper = grouper_base.filter(
        (c.this > 20),
        cast=list).gen_converter(signature="data_, group_key='debit'",
                                 debug=False)
    assert grouper(data) == [540, 25]

    grouper = grouper_base.filter(
        (c.this > 20),
        cast=set).gen_converter(signature="data_, group_key='debit'",
                                debug=False)
    assert grouper(data, group_key="balance") == {82, 120}
Exemple #25
0
def test_nested_group_by():
    data = [
        [0, [1, 2, 3]],
        [0, [4, 5, 6]],
        [1, [2, 3, 4]],
    ]
    assert c.group_by(c.item(0)).aggregate((
        c.item(0),
        c.ReduceFuncs.Sum(
            c.item(1).pipe(c.aggregate(c.ReduceFuncs.Sum(c.this())))),
    )).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]
    agg_conv = c.aggregate(c.ReduceFuncs.Sum(c.this()))
    assert c.group_by(c.item(0)).aggregate((
        c.item(0),
        c.if_(
            c.item(1),
            c.item(1),
            c.item(1),
        ).pipe(
            c.if_(
                c.this(),
                c.this(),
                c.this(),
            ).pipe(
                c.ReduceFuncs.Sum(
                    c.if_(
                        c.this(),
                        c.this(),
                        c.this(),
                    ).pipe((agg_conv, agg_conv)).pipe(c.item(1))).pipe(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )), )),
    )).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]

    summer = c.aggregate(c.ReduceFuncs.Sum(c.this()))

    merger = c.aggregate({
        "value1":
        c.ReduceFuncs.First(c.item("value1"), where=c("value1").in_(c.this())),
        "value2":
        c.ReduceFuncs.First(c.item("value2"),
                            where=c("value2").in_(c.this())).pipe(
                                c.if_(c.this(),
                                      c.this().pipe(summer))),
    })
    converter = (c.group_by(c.item("id_")).aggregate({
        "id_":
        c.item("id_"),
        "data":
        c.ReduceFuncs.Array(c.this()).pipe(merger),
    }).gen_converter(debug=False))
    assert converter([
        {
            "id_": 1,
            "value1": 2
        },
        {
            "id_": 2,
            "value1": 3
        },
        {
            "id_": 2,
            "value2": [1, 2, 3]
        },
    ]) == [
        {
            "id_": 1,
            "data": {
                "value1": 2,
                "value2": None
            }
        },
        {
            "id_": 2,
            "data": {
                "value1": 3,
                "value2": 6
            }
        },
    ]

    def g():
        yield 1
        raise Exception

    assert (c.aggregate(c.ReduceFuncs.First(c.this())).execute(
        g(), debug=False)) == 1
Exemple #26
0
def test_doc__index_intro():

    # ======== #
    # GROUP BY #
    # ======== #
    input_data = [
        {
            "a": 5,
            "b": "foo"
        },
        {
            "a": 10,
            "b": "foo"
        },
        {
            "a": 10,
            "b": "bar"
        },
        {
            "a": 10,
            "b": "bar"
        },
        {
            "a": 20,
            "b": "bar"
        },
    ]

    conv = (c.group_by(c.item("b")).aggregate({
        "b":
        c.item("b"),
        "a_first":
        c.ReduceFuncs.First(c.item("a")),
        "a_max":
        c.ReduceFuncs.Max(c.item("a")),
    }).gen_converter(debug=True))

    assert conv(input_data) == [
        {
            "b": "foo",
            "a_first": 5,
            "a_max": 10
        },
        {
            "b": "bar",
            "a_first": 10,
            "a_max": 20
        },
    ]

    # ========= #
    # AGGREGATE #
    # ========= #
    conv = c.aggregate({
        # list of "a" values where "b" equals to "bar"
        "a":
        c.ReduceFuncs.Array(c.item("a"), where=c.item("b") == "bar"),
        # "b" value of a row where "a" has Max value
        "b":
        c.ReduceFuncs.MaxRow(c.item("a"), ).item("b", default=None),
    }).gen_converter(debug=True)

    assert conv(input_data) == {"a": [10, 10, 20], "b": "bar"}

    # ==== #
    # JOIN #
    # ==== #
    collection_1 = [
        {
            "id": 1,
            "name": "Nick"
        },
        {
            "id": 2,
            "name": "Joash"
        },
        {
            "id": 3,
            "name": "Bob"
        },
    ]
    collection_2 = [
        {
            "ID": "3",
            "age": 17,
            "country": "GB"
        },
        {
            "ID": "2",
            "age": 21,
            "country": "US"
        },
        {
            "ID": "1",
            "age": 18,
            "country": "CA"
        },
    ]
    input_data = (collection_1, collection_2)

    conv = (c.join(
        c.item(0),
        c.item(1),
        c.and_(
            c.LEFT.item("id") == c.RIGHT.item("ID").as_type(int),
            c.RIGHT.item("age") >= 18,
        ),
        how="left",
    ).pipe(
        c.list_comp({
            "id": c.item(0, "id"),
            "name": c.item(0, "name"),
            "age": c.item(1, "age", default=None),
            "country": c.item(1, "country", default=None),
        })).gen_converter(debug=True))

    assert conv(input_data) == [
        {
            "id": 1,
            "name": "Nick",
            "age": 18,
            "country": "CA"
        },
        {
            "id": 2,
            "name": "Joash",
            "age": 21,
            "country": "US"
        },
        {
            "id": 3,
            "name": "Bob",
            "age": None,
            "country": None
        },
    ]
Exemple #27
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
        ),
        c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter(
            c.call_func(lambda x: x < 0, c.item("balance"))),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(max, c.item("debit"), default=0).filter(
                c.call_func(lambda x: x < 0, c.item("balance"))),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
            ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))),
            -1,
        ),
        c.reduce(
            c.ReduceFuncs.MaxRow,
            c.item("debit"),
        ).item("balance"),
        c.reduce(
            c.ReduceFuncs.MinRow,
            c.item("debit"),
        ).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              debug=False))
    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120)
    ]
    # fmt: on

    aggregation = {
        c.call_func(
            tuple,
            c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.reduce(c.ReduceFuncs.Count),
        "count_distinct":
        c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")),
        "array_agg_distinct":
        c.reduce(
            c.ReduceFuncs.ArrayDistinct,
            c.item("name"),
        ),
        "dict":
        c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(*by).aggregate(by + (
        c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data,
                                                                 debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(
        c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data,
                                                              debug=False))
    assert result5 == 583
def test_doc__quickstart_aggregation():
    input_data = [
        {
            "company_name": "Facebrochure",
            "company_hq": "CA",
            "app_name": "Tardygram",
            "date": "2019-01-01",
            "country": "US",
            "sales": Decimal("45678.98"),
        },
        {
            "company_name": "Facebrochure",
            "company_hq": "CA",
            "app_name": "Tardygram",
            "date": "2019-01-02",
            "country": "US",
            "sales": Decimal("86869.12"),
        },
        {
            "company_name": "Facebrochure",
            "company_hq": "CA",
            "app_name": "Tardygram",
            "date": "2019-01-03",
            "country": "CA",
            "sales": Decimal("45000.35"),
        },
        {
            "company_name": "BrainCorp",
            "company_hq": "NY",
            "app_name": "Learn FT",
            "date": "2019-01-01",
            "country": "US",
            "sales": Decimal("86869.12"),
        },
    ]

    # we are going to reuse this reducer
    top_sales_day = c.ReduceFuncs.MaxRow(c.item("sales"))

    # so the result is going to be a list of dicts
    converter = (
        c.group_by(c.item("company_name"))
        .aggregate(
            {
                "company_name": c.item("company_name").call_method("upper"),
                # this would work as well
                # c.item("company_name"): ...,
                "none_sensitive_sum": c.ReduceFuncs.SumOrNone(c.item("sales")),
                # as you can see, next two reduce objects do the same except taking
                # different fields after finding a row with max value.
                # but please check the generated code below, you'll see that it is
                # calculated just once AND then reused to take necessary fields
                "top_sales_app": top_sales_day.item("app_name"),
                "top_sales_day": (
                    top_sales_day.item("date")
                    .pipe(
                        datetime.strptime,
                        "%Y-%m-%d",
                    )
                    .call_method("date")
                ),
                "company_hq": c.ReduceFuncs.First(c.item("company_hq")),
                "app_name_to_countries": c.ReduceFuncs.DictArrayDistinct(
                    c.item("app_name"), c.item("country")
                ),
                "app_name_to_sales": c.ReduceFuncs.DictSum(
                    c.item("app_name"), c.item("sales")
                ),
            }
        )
        .gen_converter(debug=True)
    )

    assert converter(input_data) == [
        {
            "app_name_to_countries": {"Tardygram": ["US", "CA"]},
            "app_name_to_sales": {"Tardygram": Decimal("177548.45")},
            "company_hq": "CA",
            "company_name": "FACEBROCHURE",
            "none_sensitive_sum": Decimal("177548.45"),
            "top_sales_app": "Tardygram",
            "top_sales_day": date(2019, 1, 2),
        },
        {
            "app_name_to_countries": {"Learn FT": ["US"]},
            "app_name_to_sales": {"Learn FT": Decimal("86869.12")},
            "company_hq": "NY",
            "company_name": "BRAINCORP",
            "none_sensitive_sum": Decimal("86869.12"),
            "top_sales_app": "Learn FT",
            "top_sales_day": date(2019, 1, 1),
        },
    ]