Exemple #1
0
def test_complex_labeling():
    conv1 = (c.this().add_label("input").pipe(
        c.filter(c.this() % 3 == 0),
        label_input={
            "input_type": c.call_func(type, c.this())
        },
    ).pipe(
        c.list_comp(c.this().as_type(str)),
        label_output={
            "list_length": c.call_func(len, c.this()),
            "separator": c.if_(c.label("list_length") > 10, ",", ";"),
        },
    ).pipe({
        "result": c.label("separator").call_method("join", c.this()),
        "input_type": c.label("input_type"),
        "input_data": c.label("input"),
    }).gen_converter(debug=False))
    assert conv1(range(30)) == {
        "result": "0;3;6;9;12;15;18;21;24;27",
        "input_type": range,
        "input_data": range(0, 30),
    }
    assert conv1(range(40)) == {
        "result": "0,3,6,9,12,15,18,21,24,27,30,33,36,39",
        "input_type": range,
        "input_data": range(0, 40),
    }
Exemple #2
0
def test_caching_conversion():
    class CustomException(Exception):
        pass

    def f(number):
        if not f.first_time:
            raise CustomException
        f.first_time = False
        return number

    f.first_time = True

    conv = (c.call_func(f, c.this()).pipe(
        c.if_(c.this(),
              c.this() + 1,
              c.this() + 2)).gen_converter())
    assert conv(0) == 2
    with pytest.raises(CustomException):
        assert conv(0) == 2

    f.first_time = True
    assert conv(1) == 2

    with pytest.raises(CustomException):
        c.call_func(f, c.this()).pipe(
            c.if_(c.this(), c.this() + 1, c.this() + 2,
                  no_input_caching=True)).execute(0)
Exemple #3
0
def test_pipes():
    assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe(
        c.call_func(sum, c.this())).pipe(
            c.call_func(
                lambda x, a: x + a,
                c.this(),
                c.naive({
                    "abc": 10
                }).item(c.input_arg("key_name")),
            )).pipe([c.this(), c.this()]).execute([1, 2, 3],
                                                  key_name="abc",
                                                  debug=False) == [
                                                      24,
                                                      24,
                                                  ]
    assert c.item(0).pipe(
        datetime.strptime,
        "%Y-%m-%d",
    ).pipe(c.call_func(lambda dt: dt.date(),
                       c.this())).execute([
                           "2019-01-01",
                       ], debug=False) == date(2019, 1, 1)

    assert c.item(0).pipe(
        datetime.strptime,
        "%Y-%m-%d",
    ).pipe(c.this().call_method("date")).execute([
        "2019-01-01",
    ], debug=False) == date(2019, 1, 1)

    with pytest.raises(c.ConversionException):
        c.naive(True).pipe(c.item("key1", _predefined_input={"key1": 777}))
Exemple #4
0
def test_pipes():
    assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this)).pipe(
        c.call_func(sum, c.this)).pipe(
            c.call_func(
                lambda x, a: x + a,
                c.this,
                c.naive({
                    "abc": 10
                }).item(c.input_arg("key_name")),
            )).pipe([c.this, c.this]).execute([1, 2, 3],
                                              key_name="abc",
                                              debug=False) == [
                                                  24,
                                                  24,
                                              ]
    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.call_func(lambda dt: dt.date(),
                    c.this)).execute(["2019-01-01"],
                                     debug=False) == date(2019, 1, 1)

    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.this.call_method("date")).execute(["2019-01-01"],
                                            debug=False) == date(2019, 1, 1)

    conv = c.dict_comp(
        c.item("name"),
        c.item("transactions").pipe(
            c.list_comp({
                "id":
                c.item(0).as_type(str),
                "amount":
                c.item(1).pipe(c.if_(c.this, c.this.as_type(Decimal), None)),
            })),
    ).gen_converter(debug=False)
    assert conv([{
        "name": "test",
        "transactions": [(0, 0), (1, 10)]
    }]) == {
        "test": [
            {
                "id": "0",
                "amount": None
            },
            {
                "id": "1",
                "amount": Decimal("10")
            },
        ]
    }

    assert c.this.pipe(lambda it: it).filter(
        c.this).sort().as_type(list).execute((2, 1, 0)) == [1, 2]
Exemple #5
0
    class A:
        x = 10

        def __init__(self):
            self.x = 20

        conv1 = (c.this() +
                 c.input_arg("self").attr("x")).gen_converter(method=True)
        conv2 = (c.this() +
                 c.input_arg("cls").attr("x")).gen_converter(method=True)

        conv3 = classmethod(
            (c.this() +
             c.input_arg("cls").attr("x")).gen_converter(class_method=True))
        conv4 = classmethod(
            (c.this() +
             c.input_arg("self").attr("x")).gen_converter(class_method=True))

        conv5 = (c.this() + c.input_arg("self").attr("x") +
                 c.input_arg("n")).gen_converter(
                     signature="self, n=1000, data_=15")

        conv6 = staticmethod(
            ((c.this() + c.call_func(sum, c.input_arg("args"))) *
             c.input_arg("kwargs").call_method("get", "multiplicator", 1)
             ).gen_converter(signature="data_, *args, **kwargs"))
def test_take_while():
    result = c.take_while(c.this < 3).as_type(list).execute(range(5))
    assert result == [0, 1, 2]

    result = (
        c.call_func(range, c.this)
        .take_while(c.this < 3)
        .as_type(list)
        .execute(5)
    )
    assert result == [0, 1, 2]

    def f():
        yield from range(5)
        raise Exception

    result = (
        c.take_while(c.this < c.input_arg("stop_before"))
        .filter(c.this >= c.input_arg("min_value"))
        .filter(c.this < 3, cast=list)
        .execute(f(), min_value=2, stop_before=4)
    )
    assert result == [2]

    result = c.take_while(c.this < 0).as_type(list).execute(range(10))
    assert result == []
Exemple #7
0
def test_naive_conversion_call():
    assert c.naive("TEST").attr("lower").call().gen_converter()(100) == "test"
    assert c.call_func(str.lower, c.this()).gen_converter()("TEST") == "test"
    assert (c.naive("TE ST").attr("replace").call(
        " ", "").gen_converter()(100) == "TEST")

    f = MagicMock(return_value=1)
    c.naive(f).call(1, 2, test1=True, test2="test3").gen_converter()(100)
    f.assert_called_with(1, 2, test1=True, test2="test3")
    c.call(10, test="abc").gen_converter()(f)
    f.assert_called_with(10, test="abc")
Exemple #8
0
def test_comprehension_filter_cast_assumptions():
    assert isinstance(
        c.generator_comp(c.this).filter(c.this).execute(range(10)),
        GeneratorType,
    )
    assert isinstance(
        c.generator_comp(c.this).filter(c.this, cast=None).execute(range(10)),
        GeneratorType,
    )
    assert (c.list_comp(c.this).filter(c.this).execute(range(3))) == [
        1,
        2,
    ]

    def f(x):
        f.number_of_calls += 1
        if f.number_of_calls > f.max_number_of_calls:
            raise ValueError
        return bool(x)

    f.max_number_of_calls = 2
    f.number_of_calls = 0

    assert (c.set_comp(c.this).filter(c.call_func(f,
                                                  c.this)).execute([0, 0,
                                                                    1])) == {
                                                                        1,
                                                                    }
    assert (c.set_comp(c.this).filter(c.this, cast=list).execute([0, 0,
                                                                  1])) == [
                                                                      1,
                                                                  ]
    assert (c.set_comp(c.this).filter(c.this).execute(range(3))) == {
        1,
        2,
    }
    assert (c.tuple_comp(c.this).filter(c.this).execute(range(3))) == (
        1,
        2,
    )
    assert (c.tuple_comp(c.this).filter(c.this, list).execute(range(3))) == [
        1,
        2,
    ]
    assert (c.dict_comp(c.this,
                        c.this).filter(c.item(0)).execute(range(3))) == {
                            1: 1,
                            2: 2,
                        }
    assert (c.dict_comp(c.this, c.this).filter(c.item(0),
                                               dict).execute(range(3))) == {
                                                   1: 1,
                                                   2: 2,
                                               }
Exemple #9
0
def test_pipe_conversion():
    from convtools import conversion as c
    from convtools.base import PipeConversion

    assert PipeConversion(c.naive([1, 2, 3]), c.item(1)).execute(None) == 2
    assert (PipeConversion(c.item("key1"),
                           c.item("key2")).execute({"key1": {
                               "key2": 3
                           }},
                                                   debug=False) == 3)
    assert (c.this.pipe(c.list_comp(c.this + 1)).filter(c.this > 3).execute(
        [1, 2, 3, 4, 5, 6], debug=False)) == [4, 5, 6, 7]

    c.aggregate(
        c.ReduceFuncs.Array(c.item("key"), default=list).pipe(
            c.if_(
                c.call_func(any, c.generator_comp(c.this.is_(None))),
                c.call_func(list),
                c.this,
            ))).gen_converter(debug=False)
Exemple #10
0
def test_chunks_by_condition(data_for_chunking):
    assert c.chunk_by_condition(c.call_func(len, c.CHUNK) < 5).iter(
        c.list_comp(c.item("z"))
    ).as_type(list).execute(data_for_chunking) == [
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18],
    ]
    assert c.chunk_by_condition(
        c.and_(c.call_func(len, c.CHUNK) < 5, c.item("z") < 18)
    ).aggregate(c.ReduceFuncs.Median(c.item("z"))).as_type(list).execute(
        data_for_chunking,
    ) == [
        12,
        16,
        18,
    ]
    assert c.chunk_by_condition(False).as_type(list).execute(range(3)) == [
        [0],
        [1],
        [2],
    ]
def test_group_by_percentile():
    input_data = [{
        "key": key,
        "value": value
    } for index, key in enumerate("abc")
                  for value in range(index + 90, -1, -1)]
    c_round = c.call_func(round, c.this, 2)
    result = (c.group_by(c.item("key")).aggregate({
        "key":
        c.item("key"),
        "min":
        c.ReduceFuncs.Percentile(0, c.item("value")).pipe(c_round),
        "min":
        c.ReduceFuncs.Percentile(0,
                                 c.item("value"),
                                 where=c.and_(default=True)).pipe(c_round),
        "percentile_5":
        c.ReduceFuncs.Percentile(5, c.item("value")).pipe(c_round),
        "median":
        c.ReduceFuncs.Percentile(50, c.item("value")).pipe(c_round),
        "percentile_95":
        c.ReduceFuncs.Percentile(95, c.item("value")).pipe(c_round),
        "max":
        c.ReduceFuncs.Percentile(100, c.item("value")).pipe(c_round),
    }).execute(input_data))

    assert result == [
        {
            "key": "a",
            "max": 90,
            "median": 45.0,
            "min": 0.0,
            "percentile_5": 4.5,
            "percentile_95": 85.5,
        },
        {
            "key": "b",
            "max": 91,
            "median": 45.5,
            "min": 0.0,
            "percentile_5": 4.55,
            "percentile_95": 86.45,
        },
        {
            "key": "c",
            "max": 92,
            "median": 46.0,
            "min": 0.0,
            "percentile_5": 4.6,
            "percentile_95": 87.4,
        },
    ]
Exemple #12
0
def test_generator_exception_handling():
    class CustomException(Exception):
        pass

    def f_second_call_raises():
        if f_second_call_raises.counter:
            raise CustomException
        f_second_call_raises.counter += 1

    f_second_call_raises.counter = 0

    conv = c.generator_comp(c.call_func(f_second_call_raises)).gen_converter()
    with pytest.raises(CustomException):
        list(conv([1, 2]))
Exemple #13
0
def test_base_zip():
    meta = {1: "a", 2: "b", 3: "c"}
    input_data = {"items": [1, 2, 3], "meta": meta}
    converter = (c.zip(
        c.item("items"),
        c.repeat(c.item("meta")),
    ).as_type(list).gen_converter(debug=False))
    assert converter(input_data) == [
        (1, meta),
        (2, meta),
        (3, meta),
    ]
    converter = (c.zip(
        item=c.item("items"),
        meta=c.repeat(c.item("meta")),
    ).as_type(list).gen_converter(debug=False))
    assert converter(input_data) == [
        {
            "item": 1,
            "meta": meta
        },
        {
            "item": 2,
            "meta": meta
        },
        {
            "item": 3,
            "meta": meta
        },
    ]

    input_data = [
        ([1, 2, 3], {
            1: "a",
            2: "b",
            3: "c"
        }),
        ([4, 5, 6], {
            4: "a",
            5: "b",
            6: "c"
        }),
    ]
    converter = (c.iter(c.zip(c.item(0), c.repeat(c.item(1)))).flatten().iter(
        c.item(1, c.item(0))).pipe(c.call_func(
            ",".join, c.this)).gen_converter(debug=False))
    assert converter(input_data) == "a,b,c,a,b,c"

    with pytest.raises(ValueError):
        c.zip(1, 2, a=1)
def test_drop_while():
    result = c.drop_while(c.this < 3).as_type(list).execute(range(5))
    assert result == [3, 4]

    result = (
        c.call_func(range, c.this)
        .drop_while(c.this < c.input_arg("min_value"))
        .as_type(list)
        .execute(5, min_value=3)
    )
    assert result == [3, 4]

    result = c.drop_while(c.this >= 0).as_type(list).execute(range(10))
    assert result == []
def test_mutation_item():
    now = datetime.now()
    assert c.list_comp(
        {
            "name": c.item("fullName"),
            "age": c.item("age").as_type(int),
            "to_del": 1,
        }
    ).pipe(
        c.list_comp(
            c.call_func(lambda d: d, c.this).tap(
                c.Mut.set_item(
                    "name_before", c.label("_input").item(0, "name")
                ),
                c.Mut.set_item("name", c.item("name").call_method("lower")),
                c.Mut.set_item(
                    "name_after", c.label("_input").item(0, "name")
                ),
                c.Mut.set_item("_updated", c.input_arg("now")),
                c.Mut.set_item(c.item("age"), c.item("age") >= 18),
                c.Mut.del_item("to_del"),
                c.Mut.custom(c.this.call_method("update", {"to_add": 2})),
                c.this.call_method("update", {"to_add2": 4}),
            )
        ),
        label_input="_input",
    ).execute(
        [{"fullName": "John", "age": "28"}], debug=False, now=now
    ) == [
        {
            "name": "john",
            "name_after": "john",
            "name_before": "John",
            "age": 28,
            "_updated": now,
            28: True,
            "to_add": 2,
            "to_add2": 4,
        }
    ]

    with pytest.raises(Exception):
        c.item(c.Mut.set_item("abc", "cde"))
    with pytest.raises(Exception):
        conversion = c.item(1)
        conversion.ensure_conversion(
            c.Mut.set_item("abc", "cde"), explicitly_allowed_cls=GetItem
        )
def test_iter_mut_method():
    assert c.iter(c.item(0)).as_type(list).execute([[1], [2]]) == [1, 2]
    assert c.iter_mut(c.Mut.custom(c.this.call_method("append", 7))).as_type(
        list
    ).execute([[1], [2]]) == [[1, 7], [2, 7]]
    result = (
        c.this.iter({"a": c.this})
        .iter_mut(
            c.Mut.set_item("b", c.item("a") + 1),
            c.Mut.set_item("c", c.item("a") + 2),
        )
        .iter_mut(
            c.Mut.set_item("d", c.item("a") + 3),
        )
        .as_type(list)
        .execute([1, 2, 3], debug=False)
    )
    assert result == [
        {"a": 1, "b": 2, "c": 3, "d": 4},
        {"a": 2, "b": 3, "c": 4, "d": 5},
        {"a": 3, "b": 4, "c": 5, "d": 6},
    ]

    result = (
        c.group_by(c.item(0))
        .aggregate(
            c(
                [
                    {c.item(0): c.item(1).pipe(c.ReduceFuncs.Max(c.this))},
                    {c.item(1).pipe(c.ReduceFuncs.Max(c.this)): c.item(0)},
                ]
            )
            .iter_mut(
                c.Mut.set_item(
                    "x",
                    c.call_func(sum, c.this.call_method("values"))
                    + c.input_arg("base"),
                )
            )
            .as_type(tuple)
        )
        .execute([(0, 1), (0, 2), (1, 7)], base=100, debug=False)
    )
    assert result == [
        ({0: 2, "x": 102}, {2: 0, "x": 100}),
        ({1: 7, "x": 107}, {7: 1, "x": 101}),
    ]
Exemple #17
0
def test_namespaces():
    with pytest.raises(ValueError):
        LazyEscapedString("abc").execute([1])

    with pytest.raises(ValueError):
        Namespace(LazyEscapedString("abc"), name_to_code={
            "abc": None
        }).execute([1])

    assert (Namespace(LazyEscapedString("abc"), name_to_code={
        "abc": True
    }).execute(1) == 1)
    assert (Namespace(
        c.input_arg("abc") + LazyEscapedString("abc"),
        name_to_code={
            "abc": "abc"
        },
    ).execute(0.1, abc=2) == 4)
    assert Namespace(c.item(1), {}).execute([0, 10]) == 10
    assert (Namespace(
        Namespace(
            Namespace(LazyEscapedString("abc"), name_to_code={"abc": True
                                                              })  # 1
            + LazyEscapedString("abc")  # 10
            + LazyEscapedString("foo")  # 1000
            + c.item() * 0.1,  # 0.1,
            name_to_code={"foo": "arg_foo2"},
        ),
        name_to_code={
            "abc": "arg_abc",
            "foo": "arg_foo"
        },
    )).gen_converter(
        debug=False,
        signature="data_, arg_abc=10, arg_foo=100, arg_foo2=1000")(1) == 1011.1

    assert (Namespace(
        c.call_func(list, (1, )).pipe(
            c.if_(
                c.this,
                c.this * LazyEscapedString("number"),
                c.this,
            )),
        {
            "number": "3"
        },
    ).execute(None) == [1, 1, 1])
Exemple #18
0
def test_pipe_single_call_functions():
    class CustomException(Exception):
        pass

    def one_off_func():
        if one_off_func.first:
            one_off_func.first = False
            return 1
        raise CustomException

    one_off_func.first = True

    assert (c.list_comp(
        c.call_func(one_off_func).pipe((
            c.this + 1,
            c.this + 2,
        ))).gen_converter(debug=False)([1]) == [(2, 3)])
Exemple #19
0
def test_join_with_complex_pipe():
    def f(l):
        return l + [1, 3]

    pipeline = (c.aggregate(c.ReduceFuncs.Array(c.item("a"))).pipe(
        c.join(c.this(), c.call_func(f, c.this()),
               c.LEFT == c.RIGHT)).iter(c.item(1)).as_type(list))

    assert (pipeline.execute([
        {
            "a": 1
        },
        {
            "a": 2
        },
        {
            "a": 3
        },
    ]) == [1, 1, 2, 3, 3])
Exemple #20
0
def test_callfunc():
    def func(i, abc=None):
        assert i == 1 and abc == 2

    c.call_func(func, 1, abc=2).gen_converter()(100)
    assert c.this.len().execute([1, 2]) == 2
Exemple #21
0
def test_callfunc():
    def func(i, abc=None):
        assert i == 1 and abc == 2

    c.call_func(func, 1, abc=2).gen_converter()(100)
Exemple #22
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
        ),
        c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter(
            c.call_func(lambda x: x < 0, c.item("balance"))),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(max, c.item("debit"), default=0).filter(
                c.call_func(lambda x: x < 0, c.item("balance"))),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
            ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))),
            -1,
        ),
        c.reduce(
            c.ReduceFuncs.MaxRow,
            c.item("debit"),
        ).item("balance"),
        c.reduce(
            c.ReduceFuncs.MinRow,
            c.item("debit"),
        ).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              debug=False))
    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120)
    ]
    # fmt: on

    aggregation = {
        c.call_func(
            tuple,
            c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.reduce(c.ReduceFuncs.Count),
        "count_distinct":
        c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")),
        "array_agg_distinct":
        c.reduce(
            c.ReduceFuncs.ArrayDistinct,
            c.item("name"),
        ),
        "dict":
        c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(*by).aggregate(by + (
        c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data,
                                                                 debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(
        c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data,
                                                              debug=False))
    assert result5 == 583
Exemple #23
0
def test_table_base_init():
    list(
        Table.from_rows([(1, -2), (2, -3)], ["a", "b"]).update(
            c=c.col("a") + c.col("b"))  # adding new column: "c"
        .update(c=c.call_func(abs, c.col("c")))  # updating new column: "c"
        .into_iter_rows(dict))
    result = list(
        Table.from_rows([(1, 2, 3), (2, 3, 4)],
                        ["a", "b", "c"]).into_iter_rows(include_header=True))
    assert result == [
        ("a", "b", "c"),
        (1, 2, 3),
        (2, 3, 4),
    ]
    result = list(
        Table.from_rows([(1, 2, 3), (2, 3, 4)], {
            "a": 2,
            "b": 1,
            "c": 0
        }).into_iter_rows(dict))
    assert result == [
        {
            "a": 3,
            "b": 2,
            "c": 1
        },
        {
            "a": 4,
            "b": 3,
            "c": 2
        },
    ]

    input_data = [("a", "a", "b"), (1, 2, 3)]
    with pytest.raises(ValueError):
        Table.from_rows(input_data, True)
    with pytest.raises(ValueError):
        Table.from_rows(input_data, True, duplicate_columns="raise")

    result = list(
        Table.from_rows(
            input_data, True,
            duplicate_columns="keep").into_iter_rows(include_header=True))
    assert result == input_data

    result = list(
        Table.from_rows(
            input_data, True,
            duplicate_columns="drop").into_iter_rows(include_header=True))
    assert result == [("a", "b"), (1, 3)]

    result = list(
        Table.from_rows(
            input_data, True,
            duplicate_columns="mangle").into_iter_rows(include_header=True))
    assert result == [("a", "a_1", "b"), (1, 2, 3)]

    result = list(
        Table.from_rows(input_data, None).into_iter_rows(include_header=True))
    assert result == [
        ("COLUMN_0", "COLUMN_1", "COLUMN_2"),
        ("a", "a", "b"),
        (1, 2, 3),
    ]

    result = list(
        Table.from_rows(input_data, {
            "a": 0,
            "b": 1,
            "c": 2
        }).into_iter_rows(include_header=True))
    assert result == [("a", "b", "c"), ("a", "a", "b"), (1, 2, 3)]

    result = list(
        Table.from_rows(input_data, {
            "a": 1,
            "b": 0,
            "c": 2
        }, skip_rows=1).into_iter_rows(dict))
    assert result == [{"a": 2, "b": 1, "c": 3}]

    result = list(
        Table.from_rows([{
            "a": 1,
            "b": 2,
            "c": 3
        }]).into_iter_rows(dict))
    assert result == [{"a": 1, "b": 2, "c": 3}]

    result = list(
        Table.from_rows([{
            "a": 1,
            "b": 2,
            "c": 3
        }], header=False).into_iter_rows(dict))
    assert result == [{"COLUMN_0": 1, "COLUMN_1": 2, "COLUMN_2": 3}]

    assert list(
        Table.from_rows([1, (1, ), (2, )],
                        header=True).update(**{
                            "abc": c.col("1").item(0)
                        }).take("abc").into_iter_rows(dict)) == [
                            {
                                "abc": 1
                            },
                            {
                                "abc": 2
                            },
                        ]

    Table.from_rows(range(3), header=False).update(a=c.col("COLUMN_0"))

    assert list(
        Table.from_rows(["name", "cde"],
                        header=True).into_iter_rows(dict)) == [{
                            "name": "cde"
                        }]
    assert list(
        Table.from_rows(["name", "cde"],
                        header=False).into_iter_rows(dict)) == [{
                            "COLUMN_0":
                            "name"
                        }, {
                            "COLUMN_0":
                            "cde"
                        }]
Exemple #24
0
def test_group_by_with_pipes():
    # fmt: off
    input_data = [
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "A"
        },
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": date(2020, 1, 2),
            "product": "B"
        },
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "C"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "D"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 2, 1),
            "stopped_at": None,
            "product": "D"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 2, 1),
            "stopped_at": None,
            "product": "E"
        },
    ]
    # fmt: on
    output = (c.group_by(
        c.item("name"),
        c.item("started_at"),
    ).aggregate({
        "name":
        c.item("name"),
        "started_at":
        c.item("started_at"),
        "products":
        c.ReduceFuncs.ArrayDistinct(
            c.if_(
                c.item("stopped_at").is_(None),
                c.item("product"),
                None,
            ), ).pipe(c.filter(c.this())).pipe(
                c.call_func(sorted, c.this()).pipe(
                    c(", ").call_method("join", c.this()))).pipe(c.this()),
    }).execute(input_data))
    # fmt: off
    assert output == [{
        'name': 'John',
        'products': 'A, C',
        'started_at': date(2020, 1, 1)
    }, {
        'name': 'Nick',
        'products': 'D',
        'started_at': date(2020, 1, 1)
    }, {
        'name': 'Nick',
        'products': 'D, E',
        'started_at': date(2020, 2, 1)
    }]
    # fmt: on

    reducer = c.ReduceFuncs.Array(c.this(), default=list)
    output = (c.group_by(
        c.this()["name"],
        c.this()["started_at"],
    ).aggregate({
        "name": c.this()["name"],
        "started_at": c.this()["started_at"],
        "products": c.this()["product"].pipe(reducer)[:3],
    }).execute(input_data))
    assert output == [
        {
            "name": "John",
            "products": ["A", "B", "C"],
            "started_at": date(2020, 1, 1),
        },
        {
            "name": "Nick",
            "products": ["D"],
            "started_at": date(2020, 1, 1),
        },
        {
            "name": "Nick",
            "products": ["D", "E"],
            "started_at": date(2020, 2, 1),
        },
    ]
def test_doc__index_word_count():

    # Let's say we need to count words across all files
    input_data = [
        "war-and-peace-1.txt",
        "war-and-peace-2.txt",
        "war-and-peace-3.txt",
        "war-and-peace-4.txt",
    ]

    # # iterate an input and read file lines
    #
    # def read_file(filename):
    #     with open(filename) as f:
    #         for line in f:
    #             yield line
    # extract_strings = c.generator_comp(c.call_func(read_file, c.this()))

    # to simplify testing
    extract_strings = c.generator_comp(
        c.call_func(lambda filename: [filename], c.this()))

    # 1. make ``re`` pattern available to the code to be generated
    # 2. call ``finditer`` method of the pattern and pass the string
    #    as an argument
    # 3. pass the result to the next conversion
    # 4. iterate results, call ``.group()`` method of each re.Match
    #    and call ``.lower()`` on each result
    split_words = (c.naive(re.compile(r"\w+")).call_method(
        "finditer", c.this()).pipe(
            c.generator_comp(c.this().call_method("group",
                                                  0).call_method("lower"))))

    # ``extract_strings`` is the generator of strings
    # so we iterate it and pass each item to ``split_words`` conversion
    vectorized_split_words = c.generator_comp(c.this().pipe(split_words))

    # flattening the result of ``vectorized_split_words``, which is
    # a generator of generators of strings
    flatten = c.call_func(
        chain.from_iterable,
        c.this(),
    )

    # aggregate the input, the result is a single dict
    # words are keys, values are count of words
    dict_word_to_count = c.aggregate(
        c.ReduceFuncs.DictCount(c.this(), c.this(), default=dict))

    # take top N words by:
    #  - call ``.items()`` method of the dict (the result of the aggregate)
    #  - pass the result to ``sorted``
    #  - take the slice, using input argument named ``top_n``
    #  - cast to a dict
    take_top_n = (c.this().call_method("items").sort(
        key=lambda t: t[1],
        reverse=True).pipe(c.this()[:c.input_arg("top_n")]).as_type(dict))

    # the resulting pipeline is pretty self-descriptive, except the ``c.if_``
    # part, which checks the condition (first argument),
    # and returns the 2nd if True OR the 3rd (input data by default) otherwise
    pipeline = (
        extract_strings.pipe(flatten).pipe(vectorized_split_words).pipe(
            flatten).pipe(dict_word_to_count).pipe(
                c.if_(
                    c.input_arg("top_n").is_not(None),
                    c.this().pipe(take_top_n),
                ))
        # Define the resulting converter function signature.  In fact this
        # isn't necessary if you don't need to specify default values
    ).gen_converter(debug=True, signature="data_, top_n=None")

    assert pipeline(input_data, top_n=3) == {"war": 4, "and": 4, "peace": 4}
Exemple #26
0
def test_pipes():
    assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe(
        c.call_func(sum, c.this())).pipe(
            c.call_func(
                lambda x, a: x + a,
                c.this(),
                c.naive({
                    "abc": 10
                }).item(c.input_arg("key_name")),
            )).pipe([c.this(), c.this()]).execute([1, 2, 3],
                                                  key_name="abc",
                                                  debug=False) == [
                                                      24,
                                                      24,
                                                  ]
    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.call_func(lambda dt: dt.date(),
                    c.this())).execute(["2019-01-01"],
                                       debug=False) == date(2019, 1, 1)

    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.this().call_method("date")).execute(["2019-01-01"],
                                              debug=False) == date(2019, 1, 1)

    with c.OptionsCtx() as options:
        max_pipe_length = options.max_pipe_length = 10
        with pytest.raises(c.ConversionException):
            conv = c.this()
            for i in range(max_pipe_length + 1):
                conv = c.this().pipe(conv)

        with c.OptionsCtx() as options2, pytest.raises(c.ConversionException):
            options2.max_pipe_length = 5
            conv.clone()

    conv = c.dict_comp(
        c.item("name"),
        c.item("transactions").pipe(
            c.list_comp({
                "id":
                c.item(0).as_type(str),
                "amount":
                c.item(1).pipe(c.if_(c.this(),
                                     c.this().as_type(Decimal), None)),
            })),
    ).gen_converter(debug=True)
    assert conv([{
        "name": "test",
        "transactions": [(0, 0), (1, 10)]
    }]) == {
        "test": [
            {
                "id": "0",
                "amount": None
            },
            {
                "id": "1",
                "amount": Decimal("10")
            },
        ]
    }

    with c.OptionsCtx() as options:
        max_pipe_length = options.max_pipe_length = 10
        conv1 = c.item(0).pipe(c.item(1).pipe(c.item(2)))

        def measure_pipe_length(conv):
            length = 0
            for i in range(max_pipe_length):
                if conv._predefined_input is not None:
                    length += 1
                    conv = conv._predefined_input
                else:
                    break
            return length

        pipe_length_before = measure_pipe_length(conv1)
        for i in range(max_pipe_length + 20):
            c.generator_comp(c.this().pipe(conv1))
        pipe_length_after = measure_pipe_length(conv1)
        assert pipe_length_after == pipe_length_before
Exemple #27
0
def test_gen_converter():
    class A:
        x = 10

        def __init__(self):
            self.x = 20

        conv1 = (c.this() +
                 c.input_arg("self").attr("x")).gen_converter(method=True)
        conv2 = (c.this() +
                 c.input_arg("cls").attr("x")).gen_converter(method=True)

        conv3 = classmethod(
            (c.this() +
             c.input_arg("cls").attr("x")).gen_converter(class_method=True))
        conv4 = classmethod(
            (c.this() +
             c.input_arg("self").attr("x")).gen_converter(class_method=True))

        conv5 = (c.this() + c.input_arg("self").attr("x") +
                 c.input_arg("n")).gen_converter(
                     signature="self, n=1000, data_=15")

        conv6 = staticmethod(
            ((c.this() + c.call_func(sum, c.input_arg("args"))) *
             c.input_arg("kwargs").call_method("get", "multiplicator", 1)
             ).gen_converter(signature="data_, *args, **kwargs"))

    assert A().conv1(100) == 120
    assert A.conv3(100) == 110

    with pytest.raises(NameError):
        A().conv2(100)
    with pytest.raises(NameError):
        A.conv4(100)

    assert A().conv5() == 1035
    assert A().conv5(data_=7) == 1027
    assert A().conv5(n=100) == 135

    assert A.conv6(20) == 20
    assert A.conv6(20, 1, 2, 3) == 26
    assert A.conv6(20, 1, 2, 3, multiplicator=10) == 260

    assert (c.call_func(sum,
                        c.this()).gen_converter(signature="*data_")(1, 2,
                                                                    3) == 6)
    assert (c.call_func(lambda i: globals().__setitem__("A", 1) or sum(i),
                        c.this()).gen_converter(signature="*data_")(1, 2,
                                                                    3) == 6)
    assert c({
        c.naive("-").call_method("join",
                                 c.this().call_method("keys")):
        c.call_func(sum,
                    c.this().call_method("values"))
    }).gen_converter(signature="**data_")(a=1, b=2, c=3) == {
        "a-b-c": 6
    }
    with pytest.raises(c.ConversionException):
        c.call_func(sum,
                    c.input_arg("x")).gen_converter(signature="*data_")(1, 2,
                                                                        3)
    with pytest.raises(c.ConversionException):
        c.this().gen_converter(method=True, class_method=True)
Exemple #28
0
def test_doc__index_deserialization():
    class Employee:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    input_data = {
        "objects": [
            {
                "id": 1,
                "first_name": "john",
                "last_name": "black",
                "dob": None,
                "salary": "1,000.00",
                "department": "D1 ",
                "date": "2000-01-01",
            },
            {
                "id": 2,
                "first_name": "bob",
                "last_name": "wick",
                "dob": "1900-01-01",
                "salary": "1,001.00",
                "department": "D3 ",
                "date": "2000-01-01",
            },
        ]
    }

    # get by "department" key and then call method "strip"
    department = c.item("department").call_method("strip")
    first_name = c.item("first_name").call_method("capitalize")
    last_name = c.item("last_name").call_method("capitalize")

    # call "format" method of a string and pass first & last names as
    # parameters
    full_name = c("{} {}").call_method("format", first_name, last_name)
    date_of_birth = c.item("dob")

    # partially initialized "strptime"
    parse_date = c.call_func(datetime.strptime, c.this(),
                             "%Y-%m-%d").call_method("date")

    conv = (
        c.item("objects").pipe(
            c.generator_comp({
                "id":
                c.item("id"),
                "first_name":
                first_name,
                "last_name":
                last_name,
                "full_name":
                full_name,
                "date_of_birth":
                c.if_(
                    date_of_birth,
                    date_of_birth.pipe(parse_date),
                    None,
                ),
                "salary":
                c.call_func(
                    Decimal,
                    c.item("salary").call_method("replace", ",", ""),
                ),
                # pass a hardcoded dict and to get value by "department"
                # key
                "department_id":
                c.naive({
                    "D1": 10,
                    "D2": 11,
                    "D3": 12,
                }).item(department),
                "date":
                c.item("date").pipe(parse_date),
            })).
        pipe(
            c.dict_comp(
                c.item(
                    "id"),  # key
                # write a python code expression, format with passed parameters
                c.inline_expr("{employee_cls}(**{kwargs})").pass_args(
                    employee_cls=Employee,
                    kwargs=c.this(),
                ),  # value
            )).gen_converter(debug=True))

    result = conv(input_data)
    assert result[1].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": None,
        "department_id": 10,
        "first_name": "John",
        "full_name": "John Black",
        "id": 1,
        "last_name": "Black",
        "salary": Decimal("1000.00"),
    }
    assert result[2].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": date(1900, 1, 1),
        "department_id": 12,
        "first_name": "Bob",
        "full_name": "Bob Wick",
        "id": 2,
        "last_name": "Wick",
        "salary": Decimal("1001.00"),
    }
Exemple #29
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
            unconditional_init=True,
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
            unconditional_init=True,
        ),
        c.reduce(
            max,
            c.item("debit"),
            prepare_first=lambda a: a,
            default=c.input_arg("arg1"),
            where=c.call_func(lambda x: x < 0, c.item("balance")),
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                max,
                c.item("debit"),
                prepare_first=lambda a: a,
                default=0,
                where=c.call_func(lambda x: x < 0, c.item("balance")),
            ),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
                where=c.inline_expr("{0} > {1}").pass_args(
                    c.item("balance"),
                    c.input_arg("arg2"),
                ),
            ),
            -1,
        ),
        c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"),
        c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              arg2=0,
                                                              debug=False))

    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120),
    ]
    # fmt: on

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        result = (c.group_by(c.item("name")).aggregate((
            c.item("category"),
            c.reduce(c.ReduceFuncs.Sum, c.item("debit")),
        )).execute(data, debug=False))

    aggregation = {
        c.call_func(
            tuple,
            c.ReduceFuncs.Array(c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.ReduceFuncs.Count(),
        "max":
        c.ReduceFuncs.Max(c.item("debit")),
        "min":
        c.ReduceFuncs.Min(c.item("debit")),
        "count_distinct":
        c.ReduceFuncs.CountDistinct(c.item("name")),
        "array_agg_distinct":
        c.ReduceFuncs.ArrayDistinct(c.item("name")),
        "dict":
        c.ReduceFuncs.Dict(c.item("debit"), c.item("name")),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            'max': 300,
            'min': 10,
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            'max': 30,
            'min': 7,
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(
        *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute(
            data, debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum(
        c.item("debit"))).execute(data, debug=False))
    assert result5 == 583

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        (c.group_by(by).aggregate(
            by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(
                data, debug=False))
def test_doc__index_deserialization():
    class Employee:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    input_data = {
        "objects": [
            {
                "id": 1,
                "first_name": "john",
                "last_name": "black",
                "dob": None,
                "salary": "1,000.00",
                "department": "D1 ",
                "date": "2000-01-01",
            },
            {
                "id": 2,
                "first_name": "bob",
                "last_name": "wick",
                "dob": "1900-01-01",
                "salary": "1,001.00",
                "department": "D3 ",
                "date": "2000-01-01",
            },
        ]
    }

    # prepare a few conversions to reuse
    c_strip = c.this.call_method("strip")
    c_capitalize = c.this.call_method("capitalize")
    c_decimal = c.this.call_method("replace", ",", "").as_type(Decimal)
    c_date = c.call_func(datetime.strptime, c.this,
                         "%Y-%m-%d").call_method("date")
    # reusing c_date
    c_optional_date = c.if_(c.this, c_date, None)

    first_name = c.item("first_name").pipe(c_capitalize)
    last_name = c.item("last_name").pipe(c_capitalize)
    # call "format" method of a string and pass first & last names as
    # parameters
    full_name = c("{} {}").call_method("format", first_name, last_name)

    conv = (
        c.item("objects").pipe(
            c.generator_comp({
                "id":
                c.item("id"),
                "first_name":
                first_name,
                "last_name":
                last_name,
                "full_name":
                full_name,
                "date_of_birth":
                c.item("dob").pipe(c_optional_date),
                "salary":
                c.item("salary").pipe(c_decimal),
                # pass a hardcoded dict and to get value by "department"
                # key
                "department_id":
                c.naive({
                    "D1": 10,
                    "D2": 11,
                    "D3": 12,
                }).item(c.item("department").pipe(c_strip)),
                "date":
                c.item("date").pipe(c_date),
            })).pipe(
                c.dict_comp(
                    c.item("id"),  # key
                    c.apply_func(  # value
                        Employee,
                        args=(),
                        kwargs=c.this,
                    ),
                )).gen_converter(debug=True)  # to see print generated code
    )

    result = conv(input_data)
    assert result[1].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": None,
        "department_id": 10,
        "first_name": "John",
        "full_name": "John Black",
        "id": 1,
        "last_name": "Black",
        "salary": Decimal("1000.00"),
    }
    assert result[2].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": date(1900, 1, 1),
        "department_id": 12,
        "first_name": "Bob",
        "full_name": "Bob Wick",
        "id": 2,
        "last_name": "Wick",
        "salary": Decimal("1001.00"),
    }