def test_complex_labeling(): conv1 = (c.this().add_label("input").pipe( c.filter(c.this() % 3 == 0), label_input={ "input_type": c.call_func(type, c.this()) }, ).pipe( c.list_comp(c.this().as_type(str)), label_output={ "list_length": c.call_func(len, c.this()), "separator": c.if_(c.label("list_length") > 10, ",", ";"), }, ).pipe({ "result": c.label("separator").call_method("join", c.this()), "input_type": c.label("input_type"), "input_data": c.label("input"), }).gen_converter(debug=False)) assert conv1(range(30)) == { "result": "0;3;6;9;12;15;18;21;24;27", "input_type": range, "input_data": range(0, 30), } assert conv1(range(40)) == { "result": "0,3,6,9,12,15,18,21,24,27,30,33,36,39", "input_type": range, "input_data": range(0, 40), }
def test_caching_conversion(): class CustomException(Exception): pass def f(number): if not f.first_time: raise CustomException f.first_time = False return number f.first_time = True conv = (c.call_func(f, c.this()).pipe( c.if_(c.this(), c.this() + 1, c.this() + 2)).gen_converter()) assert conv(0) == 2 with pytest.raises(CustomException): assert conv(0) == 2 f.first_time = True assert conv(1) == 2 with pytest.raises(CustomException): c.call_func(f, c.this()).pipe( c.if_(c.this(), c.this() + 1, c.this() + 2, no_input_caching=True)).execute(0)
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe( c.call_func(sum, c.this())).pipe( c.call_func( lambda x, a: x + a, c.this(), c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this(), c.this()]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe( datetime.strptime, "%Y-%m-%d", ).pipe(c.call_func(lambda dt: dt.date(), c.this())).execute([ "2019-01-01", ], debug=False) == date(2019, 1, 1) assert c.item(0).pipe( datetime.strptime, "%Y-%m-%d", ).pipe(c.this().call_method("date")).execute([ "2019-01-01", ], debug=False) == date(2019, 1, 1) with pytest.raises(c.ConversionException): c.naive(True).pipe(c.item("key1", _predefined_input={"key1": 777}))
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this)).pipe( c.call_func(sum, c.this)).pipe( c.call_func( lambda x, a: x + a, c.this, c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this, c.this]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.call_func(lambda dt: dt.date(), c.this)).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.this.call_method("date")).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) conv = c.dict_comp( c.item("name"), c.item("transactions").pipe( c.list_comp({ "id": c.item(0).as_type(str), "amount": c.item(1).pipe(c.if_(c.this, c.this.as_type(Decimal), None)), })), ).gen_converter(debug=False) assert conv([{ "name": "test", "transactions": [(0, 0), (1, 10)] }]) == { "test": [ { "id": "0", "amount": None }, { "id": "1", "amount": Decimal("10") }, ] } assert c.this.pipe(lambda it: it).filter( c.this).sort().as_type(list).execute((2, 1, 0)) == [1, 2]
class A: x = 10 def __init__(self): self.x = 20 conv1 = (c.this() + c.input_arg("self").attr("x")).gen_converter(method=True) conv2 = (c.this() + c.input_arg("cls").attr("x")).gen_converter(method=True) conv3 = classmethod( (c.this() + c.input_arg("cls").attr("x")).gen_converter(class_method=True)) conv4 = classmethod( (c.this() + c.input_arg("self").attr("x")).gen_converter(class_method=True)) conv5 = (c.this() + c.input_arg("self").attr("x") + c.input_arg("n")).gen_converter( signature="self, n=1000, data_=15") conv6 = staticmethod( ((c.this() + c.call_func(sum, c.input_arg("args"))) * c.input_arg("kwargs").call_method("get", "multiplicator", 1) ).gen_converter(signature="data_, *args, **kwargs"))
def test_take_while(): result = c.take_while(c.this < 3).as_type(list).execute(range(5)) assert result == [0, 1, 2] result = ( c.call_func(range, c.this) .take_while(c.this < 3) .as_type(list) .execute(5) ) assert result == [0, 1, 2] def f(): yield from range(5) raise Exception result = ( c.take_while(c.this < c.input_arg("stop_before")) .filter(c.this >= c.input_arg("min_value")) .filter(c.this < 3, cast=list) .execute(f(), min_value=2, stop_before=4) ) assert result == [2] result = c.take_while(c.this < 0).as_type(list).execute(range(10)) assert result == []
def test_naive_conversion_call(): assert c.naive("TEST").attr("lower").call().gen_converter()(100) == "test" assert c.call_func(str.lower, c.this()).gen_converter()("TEST") == "test" assert (c.naive("TE ST").attr("replace").call( " ", "").gen_converter()(100) == "TEST") f = MagicMock(return_value=1) c.naive(f).call(1, 2, test1=True, test2="test3").gen_converter()(100) f.assert_called_with(1, 2, test1=True, test2="test3") c.call(10, test="abc").gen_converter()(f) f.assert_called_with(10, test="abc")
def test_comprehension_filter_cast_assumptions(): assert isinstance( c.generator_comp(c.this).filter(c.this).execute(range(10)), GeneratorType, ) assert isinstance( c.generator_comp(c.this).filter(c.this, cast=None).execute(range(10)), GeneratorType, ) assert (c.list_comp(c.this).filter(c.this).execute(range(3))) == [ 1, 2, ] def f(x): f.number_of_calls += 1 if f.number_of_calls > f.max_number_of_calls: raise ValueError return bool(x) f.max_number_of_calls = 2 f.number_of_calls = 0 assert (c.set_comp(c.this).filter(c.call_func(f, c.this)).execute([0, 0, 1])) == { 1, } assert (c.set_comp(c.this).filter(c.this, cast=list).execute([0, 0, 1])) == [ 1, ] assert (c.set_comp(c.this).filter(c.this).execute(range(3))) == { 1, 2, } assert (c.tuple_comp(c.this).filter(c.this).execute(range(3))) == ( 1, 2, ) assert (c.tuple_comp(c.this).filter(c.this, list).execute(range(3))) == [ 1, 2, ] assert (c.dict_comp(c.this, c.this).filter(c.item(0)).execute(range(3))) == { 1: 1, 2: 2, } assert (c.dict_comp(c.this, c.this).filter(c.item(0), dict).execute(range(3))) == { 1: 1, 2: 2, }
def test_pipe_conversion(): from convtools import conversion as c from convtools.base import PipeConversion assert PipeConversion(c.naive([1, 2, 3]), c.item(1)).execute(None) == 2 assert (PipeConversion(c.item("key1"), c.item("key2")).execute({"key1": { "key2": 3 }}, debug=False) == 3) assert (c.this.pipe(c.list_comp(c.this + 1)).filter(c.this > 3).execute( [1, 2, 3, 4, 5, 6], debug=False)) == [4, 5, 6, 7] c.aggregate( c.ReduceFuncs.Array(c.item("key"), default=list).pipe( c.if_( c.call_func(any, c.generator_comp(c.this.is_(None))), c.call_func(list), c.this, ))).gen_converter(debug=False)
def test_chunks_by_condition(data_for_chunking): assert c.chunk_by_condition(c.call_func(len, c.CHUNK) < 5).iter( c.list_comp(c.item("z")) ).as_type(list).execute(data_for_chunking) == [ [10, 11, 12, 13, 14], [15, 16, 17, 18], ] assert c.chunk_by_condition( c.and_(c.call_func(len, c.CHUNK) < 5, c.item("z") < 18) ).aggregate(c.ReduceFuncs.Median(c.item("z"))).as_type(list).execute( data_for_chunking, ) == [ 12, 16, 18, ] assert c.chunk_by_condition(False).as_type(list).execute(range(3)) == [ [0], [1], [2], ]
def test_group_by_percentile(): input_data = [{ "key": key, "value": value } for index, key in enumerate("abc") for value in range(index + 90, -1, -1)] c_round = c.call_func(round, c.this, 2) result = (c.group_by(c.item("key")).aggregate({ "key": c.item("key"), "min": c.ReduceFuncs.Percentile(0, c.item("value")).pipe(c_round), "min": c.ReduceFuncs.Percentile(0, c.item("value"), where=c.and_(default=True)).pipe(c_round), "percentile_5": c.ReduceFuncs.Percentile(5, c.item("value")).pipe(c_round), "median": c.ReduceFuncs.Percentile(50, c.item("value")).pipe(c_round), "percentile_95": c.ReduceFuncs.Percentile(95, c.item("value")).pipe(c_round), "max": c.ReduceFuncs.Percentile(100, c.item("value")).pipe(c_round), }).execute(input_data)) assert result == [ { "key": "a", "max": 90, "median": 45.0, "min": 0.0, "percentile_5": 4.5, "percentile_95": 85.5, }, { "key": "b", "max": 91, "median": 45.5, "min": 0.0, "percentile_5": 4.55, "percentile_95": 86.45, }, { "key": "c", "max": 92, "median": 46.0, "min": 0.0, "percentile_5": 4.6, "percentile_95": 87.4, }, ]
def test_generator_exception_handling(): class CustomException(Exception): pass def f_second_call_raises(): if f_second_call_raises.counter: raise CustomException f_second_call_raises.counter += 1 f_second_call_raises.counter = 0 conv = c.generator_comp(c.call_func(f_second_call_raises)).gen_converter() with pytest.raises(CustomException): list(conv([1, 2]))
def test_base_zip(): meta = {1: "a", 2: "b", 3: "c"} input_data = {"items": [1, 2, 3], "meta": meta} converter = (c.zip( c.item("items"), c.repeat(c.item("meta")), ).as_type(list).gen_converter(debug=False)) assert converter(input_data) == [ (1, meta), (2, meta), (3, meta), ] converter = (c.zip( item=c.item("items"), meta=c.repeat(c.item("meta")), ).as_type(list).gen_converter(debug=False)) assert converter(input_data) == [ { "item": 1, "meta": meta }, { "item": 2, "meta": meta }, { "item": 3, "meta": meta }, ] input_data = [ ([1, 2, 3], { 1: "a", 2: "b", 3: "c" }), ([4, 5, 6], { 4: "a", 5: "b", 6: "c" }), ] converter = (c.iter(c.zip(c.item(0), c.repeat(c.item(1)))).flatten().iter( c.item(1, c.item(0))).pipe(c.call_func( ",".join, c.this)).gen_converter(debug=False)) assert converter(input_data) == "a,b,c,a,b,c" with pytest.raises(ValueError): c.zip(1, 2, a=1)
def test_drop_while(): result = c.drop_while(c.this < 3).as_type(list).execute(range(5)) assert result == [3, 4] result = ( c.call_func(range, c.this) .drop_while(c.this < c.input_arg("min_value")) .as_type(list) .execute(5, min_value=3) ) assert result == [3, 4] result = c.drop_while(c.this >= 0).as_type(list).execute(range(10)) assert result == []
def test_mutation_item(): now = datetime.now() assert c.list_comp( { "name": c.item("fullName"), "age": c.item("age").as_type(int), "to_del": 1, } ).pipe( c.list_comp( c.call_func(lambda d: d, c.this).tap( c.Mut.set_item( "name_before", c.label("_input").item(0, "name") ), c.Mut.set_item("name", c.item("name").call_method("lower")), c.Mut.set_item( "name_after", c.label("_input").item(0, "name") ), c.Mut.set_item("_updated", c.input_arg("now")), c.Mut.set_item(c.item("age"), c.item("age") >= 18), c.Mut.del_item("to_del"), c.Mut.custom(c.this.call_method("update", {"to_add": 2})), c.this.call_method("update", {"to_add2": 4}), ) ), label_input="_input", ).execute( [{"fullName": "John", "age": "28"}], debug=False, now=now ) == [ { "name": "john", "name_after": "john", "name_before": "John", "age": 28, "_updated": now, 28: True, "to_add": 2, "to_add2": 4, } ] with pytest.raises(Exception): c.item(c.Mut.set_item("abc", "cde")) with pytest.raises(Exception): conversion = c.item(1) conversion.ensure_conversion( c.Mut.set_item("abc", "cde"), explicitly_allowed_cls=GetItem )
def test_iter_mut_method(): assert c.iter(c.item(0)).as_type(list).execute([[1], [2]]) == [1, 2] assert c.iter_mut(c.Mut.custom(c.this.call_method("append", 7))).as_type( list ).execute([[1], [2]]) == [[1, 7], [2, 7]] result = ( c.this.iter({"a": c.this}) .iter_mut( c.Mut.set_item("b", c.item("a") + 1), c.Mut.set_item("c", c.item("a") + 2), ) .iter_mut( c.Mut.set_item("d", c.item("a") + 3), ) .as_type(list) .execute([1, 2, 3], debug=False) ) assert result == [ {"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 2, "b": 3, "c": 4, "d": 5}, {"a": 3, "b": 4, "c": 5, "d": 6}, ] result = ( c.group_by(c.item(0)) .aggregate( c( [ {c.item(0): c.item(1).pipe(c.ReduceFuncs.Max(c.this))}, {c.item(1).pipe(c.ReduceFuncs.Max(c.this)): c.item(0)}, ] ) .iter_mut( c.Mut.set_item( "x", c.call_func(sum, c.this.call_method("values")) + c.input_arg("base"), ) ) .as_type(tuple) ) .execute([(0, 1), (0, 2), (1, 7)], base=100, debug=False) ) assert result == [ ({0: 2, "x": 102}, {2: 0, "x": 100}), ({1: 7, "x": 107}, {7: 1, "x": 101}), ]
def test_namespaces(): with pytest.raises(ValueError): LazyEscapedString("abc").execute([1]) with pytest.raises(ValueError): Namespace(LazyEscapedString("abc"), name_to_code={ "abc": None }).execute([1]) assert (Namespace(LazyEscapedString("abc"), name_to_code={ "abc": True }).execute(1) == 1) assert (Namespace( c.input_arg("abc") + LazyEscapedString("abc"), name_to_code={ "abc": "abc" }, ).execute(0.1, abc=2) == 4) assert Namespace(c.item(1), {}).execute([0, 10]) == 10 assert (Namespace( Namespace( Namespace(LazyEscapedString("abc"), name_to_code={"abc": True }) # 1 + LazyEscapedString("abc") # 10 + LazyEscapedString("foo") # 1000 + c.item() * 0.1, # 0.1, name_to_code={"foo": "arg_foo2"}, ), name_to_code={ "abc": "arg_abc", "foo": "arg_foo" }, )).gen_converter( debug=False, signature="data_, arg_abc=10, arg_foo=100, arg_foo2=1000")(1) == 1011.1 assert (Namespace( c.call_func(list, (1, )).pipe( c.if_( c.this, c.this * LazyEscapedString("number"), c.this, )), { "number": "3" }, ).execute(None) == [1, 1, 1])
def test_pipe_single_call_functions(): class CustomException(Exception): pass def one_off_func(): if one_off_func.first: one_off_func.first = False return 1 raise CustomException one_off_func.first = True assert (c.list_comp( c.call_func(one_off_func).pipe(( c.this + 1, c.this + 2, ))).gen_converter(debug=False)([1]) == [(2, 3)])
def test_join_with_complex_pipe(): def f(l): return l + [1, 3] pipeline = (c.aggregate(c.ReduceFuncs.Array(c.item("a"))).pipe( c.join(c.this(), c.call_func(f, c.this()), c.LEFT == c.RIGHT)).iter(c.item(1)).as_type(list)) assert (pipeline.execute([ { "a": 1 }, { "a": 2 }, { "a": 3 }, ]) == [1, 1, 2, 3, 3])
def test_callfunc(): def func(i, abc=None): assert i == 1 and abc == 2 c.call_func(func, 1, abc=2).gen_converter()(100) assert c.this.len().execute([1, 2]) == 2
def test_callfunc(): def func(i, abc=None): assert i == 1 and abc == 2 c.call_func(func, 1, abc=2).gen_converter()(100)
def test_grouping(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] result = (c.group_by(c.item("name")).aggregate(( c.item("name"), c.item("name").call_method("lower"), c.call_func(str.lower, c.item("name")), c.reduce( lambda a, b: a + b, c.item("debit"), initial=c.input_arg("arg1"), ), c.reduce( c.inline_expr("{0} + {1}"), c.item("debit"), initial=lambda: 100, ), c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter( c.call_func(lambda x: x < 0, c.item("balance"))), c.call_func( lambda max_debit, n: max_debit * n, c.reduce(max, c.item("debit"), default=0).filter( c.call_func(lambda x: x < 0, c.item("balance"))), 1000, ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( c.ReduceFuncs.Max, c.item("debit"), default=1000, ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))), -1, ), c.reduce( c.ReduceFuncs.MaxRow, c.item("debit"), ).item("balance"), c.reduce( c.ReduceFuncs.MinRow, c.item("debit"), ).item("balance"), )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data, arg1=100, debug=False)) # fmt: off assert result == [ ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50), ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90), ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120) ] # fmt: on aggregation = { c.call_func( tuple, c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None), ): c.item("category").call_method("lower"), "count": c.reduce(c.ReduceFuncs.Count), "count_distinct": c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")), "array_agg_distinct": c.reduce( c.ReduceFuncs.ArrayDistinct, c.item("name"), ), "dict": c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))), } result = (c.group_by(c.item("category")).aggregate(aggregation).execute( data, debug=False)) result2 = (c.group_by(c.item("category")).aggregate( c.dict(*aggregation.items())).execute(data, debug=False)) # fmt: off assert result == result2 == [ { 'array_agg_distinct': ['John', 'Nick', 'Bill'], 'count': 5, 'count_distinct': 3, 'dict': { 10: 'John', 18: 'Bill', 200: 'John', 300: 'John' }, ('John', 'John', 'John', 'Nick', 'Bill'): 'games' }, { 'array_agg_distinct': ['John', 'Nick'], 'count': 2, 'count_distinct': 2, 'dict': { 7: 'Nick', 30: 'John' }, ('John', 'Nick'): 'food' } ] # fmt: on result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe( c.inline_expr("{0} + {1}").pass_args(c.this(), c.this())).execute(data, debug=False)) assert result3 == 583 * 2 by = c.item("name"), c.item("category") result4 = (c.group_by(*by).aggregate(by + ( c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data, debug=False)) # fmt: off assert result4 == [('John', 'Games', 510), ('John', 'Food', 30), ('Nick', 'Food', 7), ('Nick', 'Games', 18), ('Bill', 'Games', 18)] # fmt: on result5 = (c.group_by().aggregate( c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data, debug=False)) assert result5 == 583
def test_table_base_init(): list( Table.from_rows([(1, -2), (2, -3)], ["a", "b"]).update( c=c.col("a") + c.col("b")) # adding new column: "c" .update(c=c.call_func(abs, c.col("c"))) # updating new column: "c" .into_iter_rows(dict)) result = list( Table.from_rows([(1, 2, 3), (2, 3, 4)], ["a", "b", "c"]).into_iter_rows(include_header=True)) assert result == [ ("a", "b", "c"), (1, 2, 3), (2, 3, 4), ] result = list( Table.from_rows([(1, 2, 3), (2, 3, 4)], { "a": 2, "b": 1, "c": 0 }).into_iter_rows(dict)) assert result == [ { "a": 3, "b": 2, "c": 1 }, { "a": 4, "b": 3, "c": 2 }, ] input_data = [("a", "a", "b"), (1, 2, 3)] with pytest.raises(ValueError): Table.from_rows(input_data, True) with pytest.raises(ValueError): Table.from_rows(input_data, True, duplicate_columns="raise") result = list( Table.from_rows( input_data, True, duplicate_columns="keep").into_iter_rows(include_header=True)) assert result == input_data result = list( Table.from_rows( input_data, True, duplicate_columns="drop").into_iter_rows(include_header=True)) assert result == [("a", "b"), (1, 3)] result = list( Table.from_rows( input_data, True, duplicate_columns="mangle").into_iter_rows(include_header=True)) assert result == [("a", "a_1", "b"), (1, 2, 3)] result = list( Table.from_rows(input_data, None).into_iter_rows(include_header=True)) assert result == [ ("COLUMN_0", "COLUMN_1", "COLUMN_2"), ("a", "a", "b"), (1, 2, 3), ] result = list( Table.from_rows(input_data, { "a": 0, "b": 1, "c": 2 }).into_iter_rows(include_header=True)) assert result == [("a", "b", "c"), ("a", "a", "b"), (1, 2, 3)] result = list( Table.from_rows(input_data, { "a": 1, "b": 0, "c": 2 }, skip_rows=1).into_iter_rows(dict)) assert result == [{"a": 2, "b": 1, "c": 3}] result = list( Table.from_rows([{ "a": 1, "b": 2, "c": 3 }]).into_iter_rows(dict)) assert result == [{"a": 1, "b": 2, "c": 3}] result = list( Table.from_rows([{ "a": 1, "b": 2, "c": 3 }], header=False).into_iter_rows(dict)) assert result == [{"COLUMN_0": 1, "COLUMN_1": 2, "COLUMN_2": 3}] assert list( Table.from_rows([1, (1, ), (2, )], header=True).update(**{ "abc": c.col("1").item(0) }).take("abc").into_iter_rows(dict)) == [ { "abc": 1 }, { "abc": 2 }, ] Table.from_rows(range(3), header=False).update(a=c.col("COLUMN_0")) assert list( Table.from_rows(["name", "cde"], header=True).into_iter_rows(dict)) == [{ "name": "cde" }] assert list( Table.from_rows(["name", "cde"], header=False).into_iter_rows(dict)) == [{ "COLUMN_0": "name" }, { "COLUMN_0": "cde" }]
def test_group_by_with_pipes(): # fmt: off input_data = [ { "name": "John", "started_at": date(2020, 1, 1), "stopped_at": None, "product": "A" }, { "name": "John", "started_at": date(2020, 1, 1), "stopped_at": date(2020, 1, 2), "product": "B" }, { "name": "John", "started_at": date(2020, 1, 1), "stopped_at": None, "product": "C" }, { "name": "Nick", "started_at": date(2020, 1, 1), "stopped_at": None, "product": "D" }, { "name": "Nick", "started_at": date(2020, 2, 1), "stopped_at": None, "product": "D" }, { "name": "Nick", "started_at": date(2020, 2, 1), "stopped_at": None, "product": "E" }, ] # fmt: on output = (c.group_by( c.item("name"), c.item("started_at"), ).aggregate({ "name": c.item("name"), "started_at": c.item("started_at"), "products": c.ReduceFuncs.ArrayDistinct( c.if_( c.item("stopped_at").is_(None), c.item("product"), None, ), ).pipe(c.filter(c.this())).pipe( c.call_func(sorted, c.this()).pipe( c(", ").call_method("join", c.this()))).pipe(c.this()), }).execute(input_data)) # fmt: off assert output == [{ 'name': 'John', 'products': 'A, C', 'started_at': date(2020, 1, 1) }, { 'name': 'Nick', 'products': 'D', 'started_at': date(2020, 1, 1) }, { 'name': 'Nick', 'products': 'D, E', 'started_at': date(2020, 2, 1) }] # fmt: on reducer = c.ReduceFuncs.Array(c.this(), default=list) output = (c.group_by( c.this()["name"], c.this()["started_at"], ).aggregate({ "name": c.this()["name"], "started_at": c.this()["started_at"], "products": c.this()["product"].pipe(reducer)[:3], }).execute(input_data)) assert output == [ { "name": "John", "products": ["A", "B", "C"], "started_at": date(2020, 1, 1), }, { "name": "Nick", "products": ["D"], "started_at": date(2020, 1, 1), }, { "name": "Nick", "products": ["D", "E"], "started_at": date(2020, 2, 1), }, ]
def test_doc__index_word_count(): # Let's say we need to count words across all files input_data = [ "war-and-peace-1.txt", "war-and-peace-2.txt", "war-and-peace-3.txt", "war-and-peace-4.txt", ] # # iterate an input and read file lines # # def read_file(filename): # with open(filename) as f: # for line in f: # yield line # extract_strings = c.generator_comp(c.call_func(read_file, c.this())) # to simplify testing extract_strings = c.generator_comp( c.call_func(lambda filename: [filename], c.this())) # 1. make ``re`` pattern available to the code to be generated # 2. call ``finditer`` method of the pattern and pass the string # as an argument # 3. pass the result to the next conversion # 4. iterate results, call ``.group()`` method of each re.Match # and call ``.lower()`` on each result split_words = (c.naive(re.compile(r"\w+")).call_method( "finditer", c.this()).pipe( c.generator_comp(c.this().call_method("group", 0).call_method("lower")))) # ``extract_strings`` is the generator of strings # so we iterate it and pass each item to ``split_words`` conversion vectorized_split_words = c.generator_comp(c.this().pipe(split_words)) # flattening the result of ``vectorized_split_words``, which is # a generator of generators of strings flatten = c.call_func( chain.from_iterable, c.this(), ) # aggregate the input, the result is a single dict # words are keys, values are count of words dict_word_to_count = c.aggregate( c.ReduceFuncs.DictCount(c.this(), c.this(), default=dict)) # take top N words by: # - call ``.items()`` method of the dict (the result of the aggregate) # - pass the result to ``sorted`` # - take the slice, using input argument named ``top_n`` # - cast to a dict take_top_n = (c.this().call_method("items").sort( key=lambda t: t[1], reverse=True).pipe(c.this()[:c.input_arg("top_n")]).as_type(dict)) # the resulting pipeline is pretty self-descriptive, except the ``c.if_`` # part, which checks the condition (first argument), # and returns the 2nd if True OR the 3rd (input data by default) otherwise pipeline = ( extract_strings.pipe(flatten).pipe(vectorized_split_words).pipe( flatten).pipe(dict_word_to_count).pipe( c.if_( c.input_arg("top_n").is_not(None), c.this().pipe(take_top_n), )) # Define the resulting converter function signature. In fact this # isn't necessary if you don't need to specify default values ).gen_converter(debug=True, signature="data_, top_n=None") assert pipeline(input_data, top_n=3) == {"war": 4, "and": 4, "peace": 4}
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe( c.call_func(sum, c.this())).pipe( c.call_func( lambda x, a: x + a, c.this(), c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this(), c.this()]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.call_func(lambda dt: dt.date(), c.this())).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.this().call_method("date")).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) with c.OptionsCtx() as options: max_pipe_length = options.max_pipe_length = 10 with pytest.raises(c.ConversionException): conv = c.this() for i in range(max_pipe_length + 1): conv = c.this().pipe(conv) with c.OptionsCtx() as options2, pytest.raises(c.ConversionException): options2.max_pipe_length = 5 conv.clone() conv = c.dict_comp( c.item("name"), c.item("transactions").pipe( c.list_comp({ "id": c.item(0).as_type(str), "amount": c.item(1).pipe(c.if_(c.this(), c.this().as_type(Decimal), None)), })), ).gen_converter(debug=True) assert conv([{ "name": "test", "transactions": [(0, 0), (1, 10)] }]) == { "test": [ { "id": "0", "amount": None }, { "id": "1", "amount": Decimal("10") }, ] } with c.OptionsCtx() as options: max_pipe_length = options.max_pipe_length = 10 conv1 = c.item(0).pipe(c.item(1).pipe(c.item(2))) def measure_pipe_length(conv): length = 0 for i in range(max_pipe_length): if conv._predefined_input is not None: length += 1 conv = conv._predefined_input else: break return length pipe_length_before = measure_pipe_length(conv1) for i in range(max_pipe_length + 20): c.generator_comp(c.this().pipe(conv1)) pipe_length_after = measure_pipe_length(conv1) assert pipe_length_after == pipe_length_before
def test_gen_converter(): class A: x = 10 def __init__(self): self.x = 20 conv1 = (c.this() + c.input_arg("self").attr("x")).gen_converter(method=True) conv2 = (c.this() + c.input_arg("cls").attr("x")).gen_converter(method=True) conv3 = classmethod( (c.this() + c.input_arg("cls").attr("x")).gen_converter(class_method=True)) conv4 = classmethod( (c.this() + c.input_arg("self").attr("x")).gen_converter(class_method=True)) conv5 = (c.this() + c.input_arg("self").attr("x") + c.input_arg("n")).gen_converter( signature="self, n=1000, data_=15") conv6 = staticmethod( ((c.this() + c.call_func(sum, c.input_arg("args"))) * c.input_arg("kwargs").call_method("get", "multiplicator", 1) ).gen_converter(signature="data_, *args, **kwargs")) assert A().conv1(100) == 120 assert A.conv3(100) == 110 with pytest.raises(NameError): A().conv2(100) with pytest.raises(NameError): A.conv4(100) assert A().conv5() == 1035 assert A().conv5(data_=7) == 1027 assert A().conv5(n=100) == 135 assert A.conv6(20) == 20 assert A.conv6(20, 1, 2, 3) == 26 assert A.conv6(20, 1, 2, 3, multiplicator=10) == 260 assert (c.call_func(sum, c.this()).gen_converter(signature="*data_")(1, 2, 3) == 6) assert (c.call_func(lambda i: globals().__setitem__("A", 1) or sum(i), c.this()).gen_converter(signature="*data_")(1, 2, 3) == 6) assert c({ c.naive("-").call_method("join", c.this().call_method("keys")): c.call_func(sum, c.this().call_method("values")) }).gen_converter(signature="**data_")(a=1, b=2, c=3) == { "a-b-c": 6 } with pytest.raises(c.ConversionException): c.call_func(sum, c.input_arg("x")).gen_converter(signature="*data_")(1, 2, 3) with pytest.raises(c.ConversionException): c.this().gen_converter(method=True, class_method=True)
def test_doc__index_deserialization(): class Employee: def __init__(self, **kwargs): self.kwargs = kwargs input_data = { "objects": [ { "id": 1, "first_name": "john", "last_name": "black", "dob": None, "salary": "1,000.00", "department": "D1 ", "date": "2000-01-01", }, { "id": 2, "first_name": "bob", "last_name": "wick", "dob": "1900-01-01", "salary": "1,001.00", "department": "D3 ", "date": "2000-01-01", }, ] } # get by "department" key and then call method "strip" department = c.item("department").call_method("strip") first_name = c.item("first_name").call_method("capitalize") last_name = c.item("last_name").call_method("capitalize") # call "format" method of a string and pass first & last names as # parameters full_name = c("{} {}").call_method("format", first_name, last_name) date_of_birth = c.item("dob") # partially initialized "strptime" parse_date = c.call_func(datetime.strptime, c.this(), "%Y-%m-%d").call_method("date") conv = ( c.item("objects").pipe( c.generator_comp({ "id": c.item("id"), "first_name": first_name, "last_name": last_name, "full_name": full_name, "date_of_birth": c.if_( date_of_birth, date_of_birth.pipe(parse_date), None, ), "salary": c.call_func( Decimal, c.item("salary").call_method("replace", ",", ""), ), # pass a hardcoded dict and to get value by "department" # key "department_id": c.naive({ "D1": 10, "D2": 11, "D3": 12, }).item(department), "date": c.item("date").pipe(parse_date), })). pipe( c.dict_comp( c.item( "id"), # key # write a python code expression, format with passed parameters c.inline_expr("{employee_cls}(**{kwargs})").pass_args( employee_cls=Employee, kwargs=c.this(), ), # value )).gen_converter(debug=True)) result = conv(input_data) assert result[1].kwargs == { "date": date(2000, 1, 1), "date_of_birth": None, "department_id": 10, "first_name": "John", "full_name": "John Black", "id": 1, "last_name": "Black", "salary": Decimal("1000.00"), } assert result[2].kwargs == { "date": date(2000, 1, 1), "date_of_birth": date(1900, 1, 1), "department_id": 12, "first_name": "Bob", "full_name": "Bob Wick", "id": 2, "last_name": "Wick", "salary": Decimal("1001.00"), }
def test_grouping(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] result = (c.group_by(c.item("name")).aggregate(( c.item("name"), c.item("name").call_method("lower"), c.call_func(str.lower, c.item("name")), c.reduce( lambda a, b: a + b, c.item("debit"), initial=c.input_arg("arg1"), unconditional_init=True, ), c.reduce( c.inline_expr("{0} + {1}"), c.item("debit"), initial=lambda: 100, unconditional_init=True, ), c.reduce( max, c.item("debit"), prepare_first=lambda a: a, default=c.input_arg("arg1"), where=c.call_func(lambda x: x < 0, c.item("balance")), ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( max, c.item("debit"), prepare_first=lambda a: a, default=0, where=c.call_func(lambda x: x < 0, c.item("balance")), ), 1000, ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( c.ReduceFuncs.Max, c.item("debit"), default=1000, where=c.inline_expr("{0} > {1}").pass_args( c.item("balance"), c.input_arg("arg2"), ), ), -1, ), c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"), c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"), )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data, arg1=100, arg2=0, debug=False)) # fmt: off assert result == [ ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50), ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90), ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120), ] # fmt: on with pytest.raises(c.ConversionException): # there's a single group by field, while we use separate items # of this tuple in aggregate result = (c.group_by(c.item("name")).aggregate(( c.item("category"), c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data, debug=False)) aggregation = { c.call_func( tuple, c.ReduceFuncs.Array(c.item("name"), default=None), ): c.item("category").call_method("lower"), "count": c.ReduceFuncs.Count(), "max": c.ReduceFuncs.Max(c.item("debit")), "min": c.ReduceFuncs.Min(c.item("debit")), "count_distinct": c.ReduceFuncs.CountDistinct(c.item("name")), "array_agg_distinct": c.ReduceFuncs.ArrayDistinct(c.item("name")), "dict": c.ReduceFuncs.Dict(c.item("debit"), c.item("name")), } result = (c.group_by(c.item("category")).aggregate(aggregation).execute( data, debug=False)) result2 = (c.group_by(c.item("category")).aggregate( c.dict(*aggregation.items())).execute(data, debug=False)) # fmt: off assert result == result2 == [ { 'array_agg_distinct': ['John', 'Nick', 'Bill'], 'count': 5, 'count_distinct': 3, 'dict': { 10: 'John', 18: 'Bill', 200: 'John', 300: 'John' }, 'max': 300, 'min': 10, ('John', 'John', 'John', 'Nick', 'Bill'): 'games' }, { 'array_agg_distinct': ['John', 'Nick'], 'count': 2, 'count_distinct': 2, 'dict': { 7: 'Nick', 30: 'John' }, 'max': 30, 'min': 7, ('John', 'Nick'): 'food' } ] # fmt: on result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe( c.inline_expr("{0} + {1}").pass_args(c.this(), c.this())).execute(data, debug=False)) assert result3 == 583 * 2 by = c.item("name"), c.item("category") result4 = (c.group_by( *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute( data, debug=False)) # fmt: off assert result4 == [('John', 'Games', 510), ('John', 'Food', 30), ('Nick', 'Food', 7), ('Nick', 'Games', 18), ('Bill', 'Games', 18)] # fmt: on result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum( c.item("debit"))).execute(data, debug=False)) assert result5 == 583 with pytest.raises(c.ConversionException): # there's a single group by field, while we use separate items # of this tuple in aggregate (c.group_by(by).aggregate( by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute( data, debug=False))
def test_doc__index_deserialization(): class Employee: def __init__(self, **kwargs): self.kwargs = kwargs input_data = { "objects": [ { "id": 1, "first_name": "john", "last_name": "black", "dob": None, "salary": "1,000.00", "department": "D1 ", "date": "2000-01-01", }, { "id": 2, "first_name": "bob", "last_name": "wick", "dob": "1900-01-01", "salary": "1,001.00", "department": "D3 ", "date": "2000-01-01", }, ] } # prepare a few conversions to reuse c_strip = c.this.call_method("strip") c_capitalize = c.this.call_method("capitalize") c_decimal = c.this.call_method("replace", ",", "").as_type(Decimal) c_date = c.call_func(datetime.strptime, c.this, "%Y-%m-%d").call_method("date") # reusing c_date c_optional_date = c.if_(c.this, c_date, None) first_name = c.item("first_name").pipe(c_capitalize) last_name = c.item("last_name").pipe(c_capitalize) # call "format" method of a string and pass first & last names as # parameters full_name = c("{} {}").call_method("format", first_name, last_name) conv = ( c.item("objects").pipe( c.generator_comp({ "id": c.item("id"), "first_name": first_name, "last_name": last_name, "full_name": full_name, "date_of_birth": c.item("dob").pipe(c_optional_date), "salary": c.item("salary").pipe(c_decimal), # pass a hardcoded dict and to get value by "department" # key "department_id": c.naive({ "D1": 10, "D2": 11, "D3": 12, }).item(c.item("department").pipe(c_strip)), "date": c.item("date").pipe(c_date), })).pipe( c.dict_comp( c.item("id"), # key c.apply_func( # value Employee, args=(), kwargs=c.this, ), )).gen_converter(debug=True) # to see print generated code ) result = conv(input_data) assert result[1].kwargs == { "date": date(2000, 1, 1), "date_of_birth": None, "department_id": 10, "first_name": "John", "full_name": "John Black", "id": 1, "last_name": "Black", "salary": Decimal("1000.00"), } assert result[2].kwargs == { "date": date(2000, 1, 1), "date_of_birth": date(1900, 1, 1), "department_id": 12, "first_name": "Bob", "full_name": "Bob Wick", "id": 2, "last_name": "Wick", "salary": Decimal("1001.00"), }