Example #1
0
def test_caching_conversion():
    class CustomException(Exception):
        pass

    def f(number):
        if not f.first_time:
            raise CustomException
        f.first_time = False
        return number

    f.first_time = True

    conv = (c.call_func(f, c.this()).pipe(
        c.if_(c.this(),
              c.this() + 1,
              c.this() + 2)).gen_converter())
    assert conv(0) == 2
    with pytest.raises(CustomException):
        assert conv(0) == 2

    f.first_time = True
    assert conv(1) == 2

    with pytest.raises(CustomException):
        c.call_func(f, c.this()).pipe(
            c.if_(c.this(), c.this() + 1, c.this() + 2,
                  no_input_caching=True)).execute(0)
Example #2
0
def test_complex_labeling():
    conv1 = (c.this().add_label("input").pipe(
        c.filter(c.this() % 3 == 0),
        label_input={
            "input_type": c.call_func(type, c.this())
        },
    ).pipe(
        c.list_comp(c.this().as_type(str)),
        label_output={
            "list_length": c.call_func(len, c.this()),
            "separator": c.if_(c.label("list_length") > 10, ",", ";"),
        },
    ).pipe({
        "result": c.label("separator").call_method("join", c.this()),
        "input_type": c.label("input_type"),
        "input_data": c.label("input"),
    }).gen_converter(debug=False))
    assert conv1(range(30)) == {
        "result": "0;3;6;9;12;15;18;21;24;27",
        "input_type": range,
        "input_data": range(0, 30),
    }
    assert conv1(range(40)) == {
        "result": "0,3,6,9,12,15,18,21,24,27,30,33,36,39",
        "input_type": range,
        "input_data": range(0, 40),
    }
def test_nested_group_by():
    data = [
        [0, [1, 2, 3]],
        [0, [4, 5, 6]],
        [1, [2, 3, 4]],
    ]
    assert c.group_by(c.item(0)).aggregate(
        (
            c.item(0),
            c.ReduceFuncs.Sum(
                c.item(1).pipe(c.aggregate(c.ReduceFuncs.Sum(c.this())))
            ),
        )
    ).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]
    agg_conv = c.aggregate(c.ReduceFuncs.Sum(c.this()))
    assert c.group_by(c.item(0)).aggregate(
        (
            c.item(0),
            c.if_(c.item(1), c.item(1), c.item(1),).pipe(
                c.if_(c.this(), c.this(), c.this(),).pipe(
                    c.ReduceFuncs.Sum(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )
                        .pipe((agg_conv, agg_conv))
                        .pipe(c.item(1))
                    ).pipe(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )
                    ),
                )
            ),
        )
    ).execute(data, debug=True) == [
        (0, 21),
        (1, 9),
    ]
Example #4
0
def test_pipes():
    assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this)).pipe(
        c.call_func(sum, c.this)).pipe(
            c.call_func(
                lambda x, a: x + a,
                c.this,
                c.naive({
                    "abc": 10
                }).item(c.input_arg("key_name")),
            )).pipe([c.this, c.this]).execute([1, 2, 3],
                                              key_name="abc",
                                              debug=False) == [
                                                  24,
                                                  24,
                                              ]
    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.call_func(lambda dt: dt.date(),
                    c.this)).execute(["2019-01-01"],
                                     debug=False) == date(2019, 1, 1)

    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.this.call_method("date")).execute(["2019-01-01"],
                                            debug=False) == date(2019, 1, 1)

    conv = c.dict_comp(
        c.item("name"),
        c.item("transactions").pipe(
            c.list_comp({
                "id":
                c.item(0).as_type(str),
                "amount":
                c.item(1).pipe(c.if_(c.this, c.this.as_type(Decimal), None)),
            })),
    ).gen_converter(debug=False)
    assert conv([{
        "name": "test",
        "transactions": [(0, 0), (1, 10)]
    }]) == {
        "test": [
            {
                "id": "0",
                "amount": None
            },
            {
                "id": "1",
                "amount": Decimal("10")
            },
        ]
    }

    assert c.this.pipe(lambda it: it).filter(
        c.this).sort().as_type(list).execute((2, 1, 0)) == [1, 2]
Example #5
0
def test_labels():
    conv1 = c.if_(
        1,
        c.input_arg("y").item("abc").add_label("abc").pipe(
            c.input_arg("x").pipe(
                c.inline_expr("{cde} + 10").pass_args(
                    cde=c.this().item("cde")))).pipe(
                        c.inline_expr("{this} + {abc}").pass_args(
                            this=c.this(), abc=c.label("abc"))),
        2,
    ).gen_converter(debug=False)
    assert conv1(data_=1, x={"cde": 2}, y={"abc": 3}) == 15

    list(c.generator_comp(c.this().add_label("a")).execute([1, 2]))
    c.list_comp(c.this().add_label("a")).execute([1, 2])
Example #6
0
def test_namespaces():
    with pytest.raises(ValueError):
        LazyEscapedString("abc").execute([1])

    with pytest.raises(ValueError):
        Namespace(LazyEscapedString("abc"), name_to_code={
            "abc": None
        }).execute([1])

    assert (Namespace(LazyEscapedString("abc"), name_to_code={
        "abc": True
    }).execute(1) == 1)
    assert (Namespace(
        c.input_arg("abc") + LazyEscapedString("abc"),
        name_to_code={
            "abc": "abc"
        },
    ).execute(0.1, abc=2) == 4)
    assert Namespace(c.item(1), {}).execute([0, 10]) == 10
    assert (Namespace(
        Namespace(
            Namespace(LazyEscapedString("abc"), name_to_code={"abc": True
                                                              })  # 1
            + LazyEscapedString("abc")  # 10
            + LazyEscapedString("foo")  # 1000
            + c.item() * 0.1,  # 0.1,
            name_to_code={"foo": "arg_foo2"},
        ),
        name_to_code={
            "abc": "arg_abc",
            "foo": "arg_foo"
        },
    )).gen_converter(
        debug=False,
        signature="data_, arg_abc=10, arg_foo=100, arg_foo2=1000")(1) == 1011.1

    assert (Namespace(
        c.call_func(list, (1, )).pipe(
            c.if_(
                c.this,
                c.this * LazyEscapedString("number"),
                c.this,
            )),
        {
            "number": "3"
        },
    ).execute(None) == [1, 1, 1])
Example #7
0
def test_if():
    conv1 = c.if_(True,
                  c.this() * 2,
                  c.this() - 1000).gen_converter(debug=False)
    assert conv1(0) == -1000
    assert conv1(10) == 20

    conv2 = c.list_comp(c.if_(c.this() % 2 == 0,
                              c.this() * 10,
                              c.this() * 100)).gen_converter(debug=False)
    conv3 = c.list_comp(
        c.if_(
            c.this() % 2 == 0,
            c.this() * 10,
            c.this() * 100,
            no_input_caching=True,
        )).gen_converter(debug=False)
    assert conv2([1, 2, 3, 4]) == [100, 20, 300, 40]
    assert conv3([1, 2, 3, 4]) == [100, 20, 300, 40]

    conv4 = c.list_comp((c.this() - 5).pipe(
        c.if_(c.this() % 2 == 0,
              c.this() * 10,
              c.this() * 100))).gen_converter(debug=False)
    assert conv4([1, 2, 3, 4]) == [-40, -300, -20, -100]

    conv5 = c.if_().gen_converter(debug=False)
    assert conv5(0) == 0 and conv5(1) == 1

    conv6 = c.list_comp(
        c.if_(c.this(), None, c.this(),
              no_input_caching=True)).gen_converter(debug=False)
    assert conv6([1, False, 2, None, 3, 0]) == [
        None,
        False,
        None,
        None,
        None,
        0,
    ]

    assert PipeConversion.input_is_simple("'abc'")
    assert PipeConversion.input_is_simple("0")
    assert PipeConversion.input_is_simple("None")
    assert PipeConversion.input_is_simple("True")
    assert PipeConversion.input_is_simple("False")
    assert PipeConversion.input_is_simple("a[1]")
    assert PipeConversion.input_is_simple("a['1']")
    assert PipeConversion.input_is_simple("a[1][2]")
    assert not PipeConversion.input_is_simple("a[1][2][3]")
    assert not PipeConversion.input_is_simple("1 + 1")
    assert not PipeConversion.input_is_simple("x.a")
    assert not PipeConversion.input_is_simple("x()")
Example #8
0
def test_reducer_inlining(dict_series):
    def f():
        f.number_of_calls += 1
        if f.number_of_calls > f.max_number_of_calls:
            raise Exception
        return []

    f.max_number_of_calls = 1
    f.number_of_calls = 0

    converter = c.aggregate(
        c.ReduceFuncs.Array(c.item("name"),
                            default=f,
                            where=c.item("value") < 0).pipe(
                                c.if_(
                                    if_true=c.this(),
                                    if_false=c.this(),
                                ))).gen_converter(debug=False)
    assert converter(dict_series) == []
Example #9
0
def test_pipe_conversion():
    from convtools import conversion as c
    from convtools.base import PipeConversion

    assert PipeConversion(c.naive([1, 2, 3]), c.item(1)).execute(None) == 2
    assert (PipeConversion(c.item("key1"),
                           c.item("key2")).execute({"key1": {
                               "key2": 3
                           }},
                                                   debug=False) == 3)
    assert (c.this.pipe(c.list_comp(c.this + 1)).filter(c.this > 3).execute(
        [1, 2, 3, 4, 5, 6], debug=False)) == [4, 5, 6, 7]

    c.aggregate(
        c.ReduceFuncs.Array(c.item("key"), default=list).pipe(
            c.if_(
                c.call_func(any, c.generator_comp(c.this.is_(None))),
                c.call_func(list),
                c.this,
            ))).gen_converter(debug=False)
def test_group_by_key_edge_case():
    with pytest.raises(ValueError):
        c.this.add_label("row").pipe(c.ReduceFuncs.Count())
    with pytest.raises(ValueError):
        (c.this.add_label("row") + 1).pipe(c.ReduceFuncs.Count() + 1)
    with pytest.raises(ValueError):
        c.this.pipe(c.ReduceFuncs.Count(), label_input="row")
    data = [
        (0, 1),
        (1, 2),
    ]
    assert c.group_by(c.item(0)).aggregate(
        c.if_(c.item(1), c.item(1), c.item(1)).pipe(
            (c.ReduceFuncs.Sum(c.this) /
             c.ReduceFuncs.Count(c.this)).pipe(c.this + 10))).gen_converter(
                 debug=False)(data) == [11, 12]
    assert c.group_by(c.item(0)).aggregate(
        c.item(1).pipe(
            c.ReduceFuncs.Sum(c.this),
            label_output="count")).gen_converter(debug=False)(data) == [1, 2]
def test_group_by_key_edge_case():
    with pytest.raises(ValueError):
        c.this().add_label("row").pipe(c.ReduceFuncs.Count())
    with pytest.raises(ValueError):
        (c.this().add_label("row") + 1).pipe(c.ReduceFuncs.Count() + 1)
    with pytest.raises(ValueError):
        c.this().pipe(c.ReduceFuncs.Count(), label_input="row")
    data = [
        (0, 1),
        (1, 2),
    ]
    # TODO: try to test nested pipe (double overwrites)
    # TODO: reducer + label then pipe to somewhere
    assert c.group_by(c.item(0)).aggregate(
        c.if_(c.item(1), c.item(1), c.item(1)).pipe(
            (c.ReduceFuncs.Sum(c.this()) / c.ReduceFuncs.Count(c.this())).pipe(
                c.this() + 10
            )
        )
    ).gen_converter(debug=False)(data) == [11, 12]
    assert c.group_by(c.item(0)).aggregate(
        c.item(1).pipe(c.ReduceFuncs.Sum(c.this()), label_output="count")
    ).gen_converter(debug=False)(data) == [1, 2]
def test_doc__index_deserialization():
    class Employee:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    input_data = {
        "objects": [
            {
                "id": 1,
                "first_name": "john",
                "last_name": "black",
                "dob": None,
                "salary": "1,000.00",
                "department": "D1 ",
                "date": "2000-01-01",
            },
            {
                "id": 2,
                "first_name": "bob",
                "last_name": "wick",
                "dob": "1900-01-01",
                "salary": "1,001.00",
                "department": "D3 ",
                "date": "2000-01-01",
            },
        ]
    }

    # prepare a few conversions to reuse
    c_strip = c.this.call_method("strip")
    c_capitalize = c.this.call_method("capitalize")
    c_decimal = c.this.call_method("replace", ",", "").as_type(Decimal)
    c_date = c.call_func(datetime.strptime, c.this,
                         "%Y-%m-%d").call_method("date")
    # reusing c_date
    c_optional_date = c.if_(c.this, c_date, None)

    first_name = c.item("first_name").pipe(c_capitalize)
    last_name = c.item("last_name").pipe(c_capitalize)
    # call "format" method of a string and pass first & last names as
    # parameters
    full_name = c("{} {}").call_method("format", first_name, last_name)

    conv = (
        c.item("objects").pipe(
            c.generator_comp({
                "id":
                c.item("id"),
                "first_name":
                first_name,
                "last_name":
                last_name,
                "full_name":
                full_name,
                "date_of_birth":
                c.item("dob").pipe(c_optional_date),
                "salary":
                c.item("salary").pipe(c_decimal),
                # pass a hardcoded dict and to get value by "department"
                # key
                "department_id":
                c.naive({
                    "D1": 10,
                    "D2": 11,
                    "D3": 12,
                }).item(c.item("department").pipe(c_strip)),
                "date":
                c.item("date").pipe(c_date),
            })).pipe(
                c.dict_comp(
                    c.item("id"),  # key
                    c.apply_func(  # value
                        Employee,
                        args=(),
                        kwargs=c.this,
                    ),
                )).gen_converter(debug=True)  # to see print generated code
    )

    result = conv(input_data)
    assert result[1].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": None,
        "department_id": 10,
        "first_name": "John",
        "full_name": "John Black",
        "id": 1,
        "last_name": "Black",
        "salary": Decimal("1000.00"),
    }
    assert result[2].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": date(1900, 1, 1),
        "department_id": 12,
        "first_name": "Bob",
        "full_name": "Bob Wick",
        "id": 2,
        "last_name": "Wick",
        "salary": Decimal("1001.00"),
    }
Example #13
0
def test_doc__index_deserialization():
    class Employee:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    input_data = {
        "objects": [
            {
                "id": 1,
                "first_name": "john",
                "last_name": "black",
                "dob": None,
                "salary": "1,000.00",
                "department": "D1 ",
                "date": "2000-01-01",
            },
            {
                "id": 2,
                "first_name": "bob",
                "last_name": "wick",
                "dob": "1900-01-01",
                "salary": "1,001.00",
                "department": "D3 ",
                "date": "2000-01-01",
            },
        ]
    }

    # get by "department" key and then call method "strip"
    department = c.item("department").call_method("strip")
    first_name = c.item("first_name").call_method("capitalize")
    last_name = c.item("last_name").call_method("capitalize")

    # call "format" method of a string and pass first & last names as
    # parameters
    full_name = c("{} {}").call_method("format", first_name, last_name)
    date_of_birth = c.item("dob")

    # partially initialized "strptime"
    parse_date = c.call_func(datetime.strptime, c.this(),
                             "%Y-%m-%d").call_method("date")

    conv = (
        c.item("objects").pipe(
            c.generator_comp({
                "id":
                c.item("id"),
                "first_name":
                first_name,
                "last_name":
                last_name,
                "full_name":
                full_name,
                "date_of_birth":
                c.if_(
                    date_of_birth,
                    date_of_birth.pipe(parse_date),
                    None,
                ),
                "salary":
                c.call_func(
                    Decimal,
                    c.item("salary").call_method("replace", ",", ""),
                ),
                # pass a hardcoded dict and to get value by "department"
                # key
                "department_id":
                c.naive({
                    "D1": 10,
                    "D2": 11,
                    "D3": 12,
                }).item(department),
                "date":
                c.item("date").pipe(parse_date),
            })).
        pipe(
            c.dict_comp(
                c.item(
                    "id"),  # key
                # write a python code expression, format with passed parameters
                c.inline_expr("{employee_cls}(**{kwargs})").pass_args(
                    employee_cls=Employee,
                    kwargs=c.this(),
                ),  # value
            )).gen_converter(debug=True))

    result = conv(input_data)
    assert result[1].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": None,
        "department_id": 10,
        "first_name": "John",
        "full_name": "John Black",
        "id": 1,
        "last_name": "Black",
        "salary": Decimal("1000.00"),
    }
    assert result[2].kwargs == {
        "date": date(2000, 1, 1),
        "date_of_birth": date(1900, 1, 1),
        "department_id": 12,
        "first_name": "Bob",
        "full_name": "Bob Wick",
        "id": 2,
        "last_name": "Wick",
        "salary": Decimal("1001.00"),
    }
Example #14
0
def test_nested_group_by():
    data = [
        [0, [1, 2, 3]],
        [0, [4, 5, 6]],
        [1, [2, 3, 4]],
    ]
    assert c.group_by(c.item(0)).aggregate((
        c.item(0),
        c.ReduceFuncs.Sum(
            c.item(1).pipe(c.aggregate(c.ReduceFuncs.Sum(c.this())))),
    )).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]
    agg_conv = c.aggregate(c.ReduceFuncs.Sum(c.this()))
    assert c.group_by(c.item(0)).aggregate((
        c.item(0),
        c.if_(
            c.item(1),
            c.item(1),
            c.item(1),
        ).pipe(
            c.if_(
                c.this(),
                c.this(),
                c.this(),
            ).pipe(
                c.ReduceFuncs.Sum(
                    c.if_(
                        c.this(),
                        c.this(),
                        c.this(),
                    ).pipe((agg_conv, agg_conv)).pipe(c.item(1))).pipe(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )), )),
    )).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]

    summer = c.aggregate(c.ReduceFuncs.Sum(c.this()))

    merger = c.aggregate({
        "value1":
        c.ReduceFuncs.First(c.item("value1"), where=c("value1").in_(c.this())),
        "value2":
        c.ReduceFuncs.First(c.item("value2"),
                            where=c("value2").in_(c.this())).pipe(
                                c.if_(c.this(),
                                      c.this().pipe(summer))),
    })
    converter = (c.group_by(c.item("id_")).aggregate({
        "id_":
        c.item("id_"),
        "data":
        c.ReduceFuncs.Array(c.this()).pipe(merger),
    }).gen_converter(debug=False))
    assert converter([
        {
            "id_": 1,
            "value1": 2
        },
        {
            "id_": 2,
            "value1": 3
        },
        {
            "id_": 2,
            "value2": [1, 2, 3]
        },
    ]) == [
        {
            "id_": 1,
            "data": {
                "value1": 2,
                "value2": None
            }
        },
        {
            "id_": 2,
            "data": {
                "value1": 3,
                "value2": 6
            }
        },
    ]

    def g():
        yield 1
        raise Exception

    assert (c.aggregate(c.ReduceFuncs.First(c.this())).execute(
        g(), debug=False)) == 1
Example #15
0
def test_pipes():
    assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe(
        c.call_func(sum, c.this())).pipe(
            c.call_func(
                lambda x, a: x + a,
                c.this(),
                c.naive({
                    "abc": 10
                }).item(c.input_arg("key_name")),
            )).pipe([c.this(), c.this()]).execute([1, 2, 3],
                                                  key_name="abc",
                                                  debug=False) == [
                                                      24,
                                                      24,
                                                  ]
    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.call_func(lambda dt: dt.date(),
                    c.this())).execute(["2019-01-01"],
                                       debug=False) == date(2019, 1, 1)

    assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe(
        c.this().call_method("date")).execute(["2019-01-01"],
                                              debug=False) == date(2019, 1, 1)

    with c.OptionsCtx() as options:
        max_pipe_length = options.max_pipe_length = 10
        with pytest.raises(c.ConversionException):
            conv = c.this()
            for i in range(max_pipe_length + 1):
                conv = c.this().pipe(conv)

        with c.OptionsCtx() as options2, pytest.raises(c.ConversionException):
            options2.max_pipe_length = 5
            conv.clone()

    conv = c.dict_comp(
        c.item("name"),
        c.item("transactions").pipe(
            c.list_comp({
                "id":
                c.item(0).as_type(str),
                "amount":
                c.item(1).pipe(c.if_(c.this(),
                                     c.this().as_type(Decimal), None)),
            })),
    ).gen_converter(debug=True)
    assert conv([{
        "name": "test",
        "transactions": [(0, 0), (1, 10)]
    }]) == {
        "test": [
            {
                "id": "0",
                "amount": None
            },
            {
                "id": "1",
                "amount": Decimal("10")
            },
        ]
    }

    with c.OptionsCtx() as options:
        max_pipe_length = options.max_pipe_length = 10
        conv1 = c.item(0).pipe(c.item(1).pipe(c.item(2)))

        def measure_pipe_length(conv):
            length = 0
            for i in range(max_pipe_length):
                if conv._predefined_input is not None:
                    length += 1
                    conv = conv._predefined_input
                else:
                    break
            return length

        pipe_length_before = measure_pipe_length(conv1)
        for i in range(max_pipe_length + 20):
            c.generator_comp(c.this().pipe(conv1))
        pipe_length_after = measure_pipe_length(conv1)
        assert pipe_length_after == pipe_length_before
Example #16
0
def test_group_by_with_pipes():
    # fmt: off
    input_data = [
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "A"
        },
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": date(2020, 1, 2),
            "product": "B"
        },
        {
            "name": "John",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "C"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 1, 1),
            "stopped_at": None,
            "product": "D"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 2, 1),
            "stopped_at": None,
            "product": "D"
        },
        {
            "name": "Nick",
            "started_at": date(2020, 2, 1),
            "stopped_at": None,
            "product": "E"
        },
    ]
    # fmt: on
    output = (c.group_by(
        c.item("name"),
        c.item("started_at"),
    ).aggregate({
        "name":
        c.item("name"),
        "started_at":
        c.item("started_at"),
        "products":
        c.ReduceFuncs.ArrayDistinct(
            c.if_(
                c.item("stopped_at").is_(None),
                c.item("product"),
                None,
            ), ).pipe(c.filter(c.this())).pipe(
                c.call_func(sorted, c.this()).pipe(
                    c(", ").call_method("join", c.this()))).pipe(c.this()),
    }).execute(input_data))
    # fmt: off
    assert output == [{
        'name': 'John',
        'products': 'A, C',
        'started_at': date(2020, 1, 1)
    }, {
        'name': 'Nick',
        'products': 'D',
        'started_at': date(2020, 1, 1)
    }, {
        'name': 'Nick',
        'products': 'D, E',
        'started_at': date(2020, 2, 1)
    }]
    # fmt: on

    reducer = c.ReduceFuncs.Array(c.this(), default=list)
    output = (c.group_by(
        c.this()["name"],
        c.this()["started_at"],
    ).aggregate({
        "name": c.this()["name"],
        "started_at": c.this()["started_at"],
        "products": c.this()["product"].pipe(reducer)[:3],
    }).execute(input_data))
    assert output == [
        {
            "name": "John",
            "products": ["A", "B", "C"],
            "started_at": date(2020, 1, 1),
        },
        {
            "name": "Nick",
            "products": ["D"],
            "started_at": date(2020, 1, 1),
        },
        {
            "name": "Nick",
            "products": ["D", "E"],
            "started_at": date(2020, 2, 1),
        },
    ]
def test_doc__index_word_count():

    # Let's say we need to count words across all files
    input_data = [
        "war-and-peace-1.txt",
        "war-and-peace-2.txt",
        "war-and-peace-3.txt",
        "war-and-peace-4.txt",
    ]

    # # iterate an input and read file lines
    #
    # def read_file(filename):
    #     with open(filename) as f:
    #         for line in f:
    #             yield line
    # extract_strings = c.generator_comp(c.call_func(read_file, c.this()))

    # to simplify testing
    extract_strings = c.generator_comp(
        c.call_func(lambda filename: [filename], c.this()))

    # 1. make ``re`` pattern available to the code to be generated
    # 2. call ``finditer`` method of the pattern and pass the string
    #    as an argument
    # 3. pass the result to the next conversion
    # 4. iterate results, call ``.group()`` method of each re.Match
    #    and call ``.lower()`` on each result
    split_words = (c.naive(re.compile(r"\w+")).call_method(
        "finditer", c.this()).pipe(
            c.generator_comp(c.this().call_method("group",
                                                  0).call_method("lower"))))

    # ``extract_strings`` is the generator of strings
    # so we iterate it and pass each item to ``split_words`` conversion
    vectorized_split_words = c.generator_comp(c.this().pipe(split_words))

    # flattening the result of ``vectorized_split_words``, which is
    # a generator of generators of strings
    flatten = c.call_func(
        chain.from_iterable,
        c.this(),
    )

    # aggregate the input, the result is a single dict
    # words are keys, values are count of words
    dict_word_to_count = c.aggregate(
        c.ReduceFuncs.DictCount(c.this(), c.this(), default=dict))

    # take top N words by:
    #  - call ``.items()`` method of the dict (the result of the aggregate)
    #  - pass the result to ``sorted``
    #  - take the slice, using input argument named ``top_n``
    #  - cast to a dict
    take_top_n = (c.this().call_method("items").sort(
        key=lambda t: t[1],
        reverse=True).pipe(c.this()[:c.input_arg("top_n")]).as_type(dict))

    # the resulting pipeline is pretty self-descriptive, except the ``c.if_``
    # part, which checks the condition (first argument),
    # and returns the 2nd if True OR the 3rd (input data by default) otherwise
    pipeline = (
        extract_strings.pipe(flatten).pipe(vectorized_split_words).pipe(
            flatten).pipe(dict_word_to_count).pipe(
                c.if_(
                    c.input_arg("top_n").is_not(None),
                    c.this().pipe(take_top_n),
                ))
        # Define the resulting converter function signature.  In fact this
        # isn't necessary if you don't need to specify default values
    ).gen_converter(debug=True, signature="data_, top_n=None")

    assert pipeline(input_data, top_n=3) == {"war": 4, "and": 4, "peace": 4}
Example #18
0
def test_gen_converter():
    class A:
        x = 10

        def __init__(self):
            self.x = 20

        conv1 = (c.this() +
                 c.input_arg("self").attr("x")).gen_converter(method=True)
        conv2 = (c.this +
                 c.input_arg("cls").attr("x")).gen_converter(method=True)

        conv3 = classmethod(
            (c.this +
             c.input_arg("cls").attr("x")).gen_converter(class_method=True))
        conv4 = classmethod(
            (c.this +
             c.input_arg("self").attr("x")).gen_converter(class_method=True))

        conv5 = (c.this + c.input_arg("self").attr("x") +
                 c.input_arg("n")).gen_converter(
                     signature="self, n=1000, data_=15")

        conv6 = staticmethod(
            ((c.this + c.call_func(sum, c.input_arg("args"))) *
             c.input_arg("kwargs").call_method("get", "multiplicator", 1)
             ).gen_converter(signature="data_, *args, **kwargs"))

    with pytest.raises(ValueError):
        (Namespace(
            c.call_func(list).pipe(
                c.if_(
                    LazyEscapedString("abc"),
                    c.this() * LazyEscapedString("abc") * c.input_arg("abc"),
                    c.this,
                )),
            {"abc": "(0 + 1)"},
        )).execute(1, abc=10)

    assert A().conv1(100) == 120
    assert A.conv3(100) == 110

    with pytest.raises(NameError):
        A().conv2(100)
    with pytest.raises(NameError):
        A.conv4(100)

    assert A().conv5() == 1035
    assert A().conv5(data_=7) == 1027
    assert A().conv5(n=100) == 135

    assert A.conv6(20) == 20
    assert A.conv6(20, 1, 2, 3) == 26
    assert A.conv6(20, 1, 2, 3, multiplicator=10) == 260

    assert (c.call_func(sum, c.this).gen_converter(signature="*data_")(1, 2,
                                                                       3) == 6)
    assert (c.call_func(lambda i: globals().__setitem__("A", 1) or sum(i),
                        c.this).gen_converter(signature="*data_")(1, 2,
                                                                  3) == 6)
    assert c({
        c.naive("-").call_method("join", c.this.call_method("keys")):
        c.call_func(sum, c.this.call_method("values"))
    }).gen_converter(signature="**data_")(a=1, b=2, c=3) == {
        "a-b-c": 6
    }
    with pytest.raises(c.ConversionException):
        c.call_func(sum,
                    c.input_arg("x")).gen_converter(signature="*data_")(1, 2,
                                                                        3)
    with pytest.raises(c.ConversionException):
        c.this.gen_converter(method=True, class_method=True)