def test_nested_loop_joins(): from convtools.conversion import join join1 = ( join( c.item(0), c.item(1), c.and_( c.LEFT.item("id") - 1 == c.RIGHT.item("ID"), c.LEFT.item("value") + 100 < c.RIGHT.item("value"), c.LEFT.item("value") > 101, c.RIGHT.item("value") < 209, c.input_arg("flag"), ), ) .as_type(list) .gen_converter(debug=True) ) assert join1( [ [{"id": i, "value": i + 100} for i in range(10)], [{"ID": i, "value": 210 - i} for i in range(10)], ], flag=True, ) == [ ({"id": 3, "value": 103}, {"ID": 2, "value": 208}), ({"id": 4, "value": 104}, {"ID": 3, "value": 207}), ({"id": 5, "value": 105}, {"ID": 4, "value": 206}), ] join2 = ( join( c.item(0), c.item(1), c.and_(c.LEFT * c.RIGHT < 30, c.LEFT + c.RIGHT > 8), ) .as_type(list) .gen_converter(debug=False) ) assert join2([range(4, 10), range(4, 10)]) == [ (4, 5), (4, 6), (4, 7), (5, 4), (5, 5), (6, 4), (7, 4), ] join3 = ( join(c.item(0), c.item(1), Eq(c.LEFT + c.RIGHT, 1)) .as_type(list) .gen_converter(debug=True) ) assert join3(([-1, 0, 1], [2, 1, 1])) == [(-1, 2), (0, 1), (0, 1)]
def test_naive_conversion_or_and(): assert c.naive(False).or_(c.naive(False)).gen_converter()(100) is False assert (c.naive(False) | c.naive(False)).gen_converter()(100) is False assert c.naive(0).or_(c.naive(10)).gen_converter()(100) == 10 assert c.naive(10).and_(c.naive(0)).gen_converter()(100) == 0 assert (c.naive(10) & c.naive(0)).gen_converter()(100) == 0 assert (c.this.and_(1).and_(2).execute(1) == c.and_(c.this, 1, 2).execute(1) == 2) assert (c.this.or_(1).or_(2).execute(1) == c.or_(c.this, 1, 2).execute(1) == 1) assert (c.this.and_(1).and_(2).or_(3).execute(1) == c.and_( c.this, 1, 2).or_(3).execute(1) == 2)
def test_right_join(): join1 = ( c.join( c.item(0), c.item(1), c.and_( c.LEFT == c.RIGHT, c.LEFT + c.RIGHT < 10, c.LEFT > 0, ), how="right", ) .as_type(list) .gen_converter(debug=False) ) assert join1([(0, 1, 2, 3, 3), (3, 3, 4, 5)]) == [ (3, 3), (3, 3), (3, 3), (3, 3), (None, 4), (None, 5), ] conv = ( c.join( c.item("first"), c.item("second"), ( c.LEFT.item("name").call_method("lower") == c.RIGHT.item("full_name").call_method("lower") ), how="right", ) .as_type(list) .gen_converter(debug=False) ) # fmt: off assert conv( { "first": [ {"name": "JOHN"}, {"name": "bob"}, {"name": "ron"}, ], "second": [ {"full_name": "BOB"}, {"full_name": "BOB"}, {"full_name": "John"}, {"full_name": "Nick"}, {"full_name": "Nick"}, ], } ) == [ ({"name": "bob"}, {"full_name": "BOB"},), ({"name": "bob"}, {"full_name": "BOB"},), ({"name": "JOHN"}, {"full_name": "John"},), (None, {"full_name": "Nick"}), (None, {"full_name": "Nick"}), ]
def test_or_and_not(): assert c.or_(None, 0).gen_converter()(100) == 0 assert c.and_(None, 0).gen_converter()(100) is None assert c.not_(True).gen_converter()(100) is False assert (~c.this).gen_converter()(True) is False assert c.naive(None).not_().execute(100) is True with pytest.raises(ValueError): c.or_()
def test_comprehension_where(): assert (c.generator_comp( c.this.neg(), where=c.this > 6).as_type(list).filter(c.this > -9).execute( range(10), debug=False)) == [-7, -8] assert (c.this.iter(c.this.neg(), where=c.this > 6).as_type(list).filter( c.this > -9).execute(range(10), debug=False)) == [-7, -8] assert (c.iter(c.this.neg(), where=c.this > 6).as_type(list).filter(c.this > -9).execute( range(10), debug=False)) == [-7, -8] assert c.iter(c.this, where=c.and_(default=True)).as_type(list).execute( range(3)) == [0, 1, 2] assert c.iter(c.this, where=True).as_type(list).execute(range(3)) == [ 0, 1, 2, ] assert (c.iter(c.this, where=c.and_(default=False)).as_type(list).execute( range(3)) == [])
def test_group_by_percentile(): input_data = [{ "key": key, "value": value } for index, key in enumerate("abc") for value in range(index + 90, -1, -1)] c_round = c.call_func(round, c.this, 2) result = (c.group_by(c.item("key")).aggregate({ "key": c.item("key"), "min": c.ReduceFuncs.Percentile(0, c.item("value")).pipe(c_round), "min": c.ReduceFuncs.Percentile(0, c.item("value"), where=c.and_(default=True)).pipe(c_round), "percentile_5": c.ReduceFuncs.Percentile(5, c.item("value")).pipe(c_round), "median": c.ReduceFuncs.Percentile(50, c.item("value")).pipe(c_round), "percentile_95": c.ReduceFuncs.Percentile(95, c.item("value")).pipe(c_round), "max": c.ReduceFuncs.Percentile(100, c.item("value")).pipe(c_round), }).execute(input_data)) assert result == [ { "key": "a", "max": 90, "median": 45.0, "min": 0.0, "percentile_5": 4.5, "percentile_95": 85.5, }, { "key": "b", "max": 91, "median": 45.5, "min": 0.0, "percentile_5": 4.55, "percentile_95": 86.45, }, { "key": "c", "max": 92, "median": 46.0, "min": 0.0, "percentile_5": 4.6, "percentile_95": 87.4, }, ]
def test_outer_join(): join1 = ( c.join( c.item(0), c.item(1), Eq(c.LEFT, c.RIGHT, 2), how="full", ) .as_type(list) .gen_converter(debug=False) ) assert join1(([0, 1, 2, 5], [2, 3, 4, 5])) == [ (0, None), (1, None), (2, 2), (5, None), (None, 3), (None, 4), (None, 5), ] join2 = ( c.join( c.item(0), c.item(1), c.and_( c.LEFT == c.RIGHT, c.LEFT + c.RIGHT < 10, c.LEFT > 0, ), how="outer", ) .as_type(list) .gen_converter(debug=False) ) assert join2([(10, 7, 8, 0, 1, 2, 3, 3), (3, 3, 4, 5, 8)]) == [ (10, None), (7, None), (8, None), (0, None), (1, None), (2, None), (3, 3), (3, 3), (3, 3), (3, 3), (None, 4), (None, 5), (None, 8), ]
def test_right_join(): join1 = (c.join( c.item(0), c.item(1), c.and_( c.LEFT == c.RIGHT, c.LEFT + c.RIGHT < 10, c.LEFT > 0, ), how="right", ).as_type(list).gen_converter(debug=False)) assert join1([(0, 1, 2, 3, 3), (3, 3, 4, 5)]) == [ (3, 3), (3, 3), (3, 3), (3, 3), (None, 4), (None, 5), ]
def test_left_join(): join1 = ( join( c.item(0), c.item(1), c.and_(c.LEFT == c.RIGHT, c.LEFT + c.RIGHT < 10, c.LEFT > 0,), how="left", ) .as_type(list) .gen_converter(debug=True) ) assert join1([(0, 1, 2, 3, 3), (3, 3, 4, 5)]) == [ (0, None), (1, None), (2, None), (3, 3), (3, 3), (3, 3), (3, 3), ]
def test_chunks_by_condition(data_for_chunking): assert c.chunk_by_condition(c.call_func(len, c.CHUNK) < 5).iter( c.list_comp(c.item("z")) ).as_type(list).execute(data_for_chunking) == [ [10, 11, 12, 13, 14], [15, 16, 17, 18], ] assert c.chunk_by_condition( c.and_(c.call_func(len, c.CHUNK) < 5, c.item("z") < 18) ).aggregate(c.ReduceFuncs.Median(c.item("z"))).as_type(list).execute( data_for_chunking, ) == [ 12, 16, 18, ] assert c.chunk_by_condition(False).as_type(list).execute(range(3)) == [ [0], [1], [2], ]
def test_join_conditions(): join_conditions = _JoinConditions.from_condition(c.LEFT == c.RIGHT) assert (True and join_conditions.inner_loop_conditions == [] and join_conditions.left_collection_filters == [] and join_conditions.left_row_filters == [] and join_conditions.left_row_hashers == [c.LEFT] and join_conditions.pre_filter == [] and join_conditions.right_collection_filters == [] and join_conditions.right_row_filters == [] and join_conditions.right_row_hashers == [c.RIGHT]) join_conditions = _JoinConditions.from_condition( c.or_(c.LEFT == c.RIGHT, c.LEFT == c.RIGHT)) c11 = c.LEFT.item(0) c12 = c.RIGHT.item(1) c21 = c.LEFT.item(1) c22 = c.RIGHT.item(0) c13 = c.LEFT.item(2) > 10 c23 = c.RIGHT.item(2) < 10 c01 = c.input_arg("x") > 100 join_conditions = _JoinConditions.from_condition( c.and_(c11 == c12, c22 == c21, c13).and_(c23, c01)) assert (True and join_conditions.inner_loop_conditions == [] and join_conditions.left_collection_filters == [c13] and join_conditions.left_row_filters == [] and join_conditions.left_row_hashers == [c11, c21] and join_conditions.pre_filter == [c01] and join_conditions.right_collection_filters == [c23] and join_conditions.right_row_filters == [] and join_conditions.right_row_hashers == [c12, c22]) join_conditions = _JoinConditions.from_condition(c.and_( c11 == c12, c22 == c21, c13).and_(c23, c01), how="left") assert (True and join_conditions.inner_loop_conditions == [] and join_conditions.left_collection_filters == [] and join_conditions.left_row_filters == [c13] and join_conditions.left_row_hashers == [c11, c21] and join_conditions.pre_filter == [c01] and join_conditions.right_collection_filters == [c23] and join_conditions.right_row_filters == [] and join_conditions.right_row_hashers == [c12, c22]) join_conditions = _JoinConditions.from_condition(c.and_( c11 == c12, c22 == c21, c13).and_(c23, c01), how="right") assert (True and join_conditions.inner_loop_conditions == [] and join_conditions.left_collection_filters == [c13] and join_conditions.left_row_filters == [] and join_conditions.left_row_hashers == [c11, c21] and join_conditions.pre_filter == [c01] and join_conditions.right_collection_filters == [] and join_conditions.right_row_filters == [c23] and join_conditions.right_row_hashers == [c12, c22]) join_conditions = _JoinConditions.from_condition(c.and_( c11 == c12, c22 == c21, c13).and_(c23, c01), how="outer") assert (True and join_conditions.inner_loop_conditions == [] and join_conditions.left_collection_filters == [] and join_conditions.left_row_filters == [c13] and join_conditions.left_row_hashers == [c11, c21] and join_conditions.pre_filter == [c01] and join_conditions.right_collection_filters == [] and join_conditions.right_row_filters == [c23] and join_conditions.right_row_hashers == [c12, c22]) with pytest.raises(AssertionError): _JoinConditions.from_condition(c.and_(True, False), how="abc") c1 = c.LEFT != c.RIGHT join_conditions = _JoinConditions.from_condition(c1) assert (True and join_conditions.inner_loop_conditions == [c1] and join_conditions.left_collection_filters == [] and join_conditions.left_row_filters == [] and join_conditions.left_row_hashers == [] and join_conditions.pre_filter == [] and join_conditions.right_collection_filters == [] and join_conditions.right_row_filters == [] and join_conditions.right_row_hashers == []) cond = c.LEFT > c.RIGHT join_conditions = _JoinConditions.from_condition(cond) assert (True and join_conditions.inner_loop_conditions == [cond] and join_conditions.left_collection_filters == [] and join_conditions.left_row_filters == [] and join_conditions.left_row_hashers == [] and join_conditions.pre_filter == [] and join_conditions.right_collection_filters == [] and join_conditions.right_row_filters == [] and join_conditions.right_row_hashers == []) c1 = c.LEFT == 1 c2 = c.RIGHT == 1 c3 = c.input_arg("x") == 1 join_conditions = _JoinConditions.from_condition(c1.and_(c2, c3), how="outer") assert (True and join_conditions.inner_loop_conditions == [] and join_conditions.left_collection_filters == [] and join_conditions.left_row_filters == [c1] and join_conditions.left_row_hashers == [] and join_conditions.pre_filter == [c3] and join_conditions.right_collection_filters == [] and join_conditions.right_row_filters == [c2] and join_conditions.right_row_hashers == []) c1 = c.LEFT + c.RIGHT + 10 c2 = c.LEFT + 1 c3 = c.RIGHT + 1 join_conditions = _JoinConditions.from_condition(c.and_(c1, c2, c3)) assert (True and join_conditions.inner_loop_conditions == [c1] and join_conditions.left_collection_filters == [c2] and join_conditions.left_row_filters == [] and join_conditions.left_row_hashers == [] and join_conditions.pre_filter == [] and join_conditions.right_collection_filters == [c3] and join_conditions.right_row_filters == [] and join_conditions.right_row_hashers == [])
def test_hash_joins(): join1 = (c.join( c.item(0), c.item(1), c.LEFT.item("id") == c.RIGHT.item("id")).as_type(list).gen_converter( debug=False)) join1([ [{ "id": i, "value": i + 100 } for i in range(3)], [{ "id": i, "value": i + 200 } for i in range(3)], ]) == [ ({ "id": 0, "value": 100 }, { "id": 0, "value": 200 }), ({ "id": 1, "value": 101 }, { "id": 1, "value": 201 }), ({ "id": 2, "value": 102 }, { "id": 2, "value": 202 }), ] join2 = (c.join( c.item(0), c.item(1), c.and_( c.LEFT.item("id") == c.RIGHT.item("ID"), c.LEFT.item("value") > 105, c.RIGHT.item("value") < 209, c.input_arg("flag"), ), ).as_type(list).gen_converter(debug=False)) assert join2( [ [{ "id": i, "value": i + 100 } for i in range(10)], [{ "ID": i, "value": i + 200 } for i in range(10)], ], flag=True, ) == [ ({ "id": 6, "value": 106 }, { "ID": 6, "value": 206 }), ({ "id": 7, "value": 107 }, { "ID": 7, "value": 207 }), ({ "id": 8, "value": 108 }, { "ID": 8, "value": 208 }), ] assert (join2( [ [{ "id": i, "value": i + 100 } for i in range(10)], [{ "ID": i, "value": i + 200 } for i in range(10)], ], flag=False, ) == []) join2 = (c.join( c.item(0), c.item(1), c.and_( c.LEFT.item("id") == c.RIGHT.item("ID"), c.LEFT.item("value") > 105, c.RIGHT.item("value") < 209, c.input_arg("flag"), ), ).as_type(list).gen_converter(debug=False)) assert join2( [ [{ "id": i, "value": i + 100 } for i in range(10)], [{ "ID": i, "value": i + 200 } for i in range(10)], ], flag=True, ) == [ ({ "id": 6, "value": 106 }, { "ID": 6, "value": 206 }), ({ "id": 7, "value": 107 }, { "ID": 7, "value": 207 }), ({ "id": 8, "value": 108 }, { "ID": 8, "value": 208 }), ]
def test_or_and_not(): assert c.or_(None, 0).gen_converter()(100) == 0 assert c.and_(None, 0).gen_converter()(100) is None assert c.not_(True).gen_converter()(100) is False assert (~c.this()).gen_converter()(True) is False assert c.naive(None).not_().execute(100) is True
def test_doc__index_intro(): # ======== # # GROUP BY # # ======== # input_data = [ { "a": 5, "b": "foo" }, { "a": 10, "b": "foo" }, { "a": 10, "b": "bar" }, { "a": 10, "b": "bar" }, { "a": 20, "b": "bar" }, ] conv = (c.group_by(c.item("b")).aggregate({ "b": c.item("b"), "a_first": c.ReduceFuncs.First(c.item("a")), "a_max": c.ReduceFuncs.Max(c.item("a")), }).gen_converter(debug=True)) assert conv(input_data) == [ { "b": "foo", "a_first": 5, "a_max": 10 }, { "b": "bar", "a_first": 10, "a_max": 20 }, ] # ========= # # AGGREGATE # # ========= # conv = c.aggregate({ # list of "a" values where "b" equals to "bar" "a": c.ReduceFuncs.Array(c.item("a"), where=c.item("b") == "bar"), # "b" value of a row where "a" has Max value "b": c.ReduceFuncs.MaxRow(c.item("a"), ).item("b", default=None), }).gen_converter(debug=True) assert conv(input_data) == {"a": [10, 10, 20], "b": "bar"} # ==== # # JOIN # # ==== # collection_1 = [ { "id": 1, "name": "Nick" }, { "id": 2, "name": "Joash" }, { "id": 3, "name": "Bob" }, ] collection_2 = [ { "ID": "3", "age": 17, "country": "GB" }, { "ID": "2", "age": 21, "country": "US" }, { "ID": "1", "age": 18, "country": "CA" }, ] input_data = (collection_1, collection_2) conv = (c.join( c.item(0), c.item(1), c.and_( c.LEFT.item("id") == c.RIGHT.item("ID").as_type(int), c.RIGHT.item("age") >= 18, ), how="left", ).pipe( c.list_comp({ "id": c.item(0, "id"), "name": c.item(0, "name"), "age": c.item(1, "age", default=None), "country": c.item(1, "country", default=None), })).gen_converter(debug=True)) assert conv(input_data) == [ { "id": 1, "name": "Nick", "age": 18, "country": "CA" }, { "id": 2, "name": "Joash", "age": 21, "country": "US" }, { "id": 3, "name": "Bob", "age": None, "country": None }, ]