def test_nested_dataframe(): df1 = DataFrame() df2 = DataFrame(expr=ast_DataFrame(df1)) r = dumps(df1 + df2) assert '\n'.join(r) == '''df_1 = DataFrame()
def test_callback(): df = DataFrame() df1 = df.map(lambda e: e.x) r = dumps(df1) assert '\n'.join(r) == '''df_1 = DataFrame()
def test_computed_col_def_updated(): df = DataFrame() df.jets['ptgev'] = lambda j: j.pt / 1000 d1 = df.jets.ptgev expr_1, context_1 = render(d1) assert isinstance(expr_1, ast.Call) assert isinstance(expr_1.func, ast_Callable) assert len(expr_1.args) == 1 expr_2, _ = render_callable(expr_1.func, context_1, expr_1.func.dataframe) # type: ignore # Run the render again. df.jets['ptgev'] = lambda j: j.pt / 1001 d1 = df.jets.ptgev expr_1, context_1 = render(d1) assert isinstance(expr_1, ast.Call) assert isinstance(expr_1.func, ast_Callable) assert len(expr_1.args) == 1 expr_2, _ = render_callable(expr_1.func, context_1, expr_1.func.dataframe) # type: ignore
def test_second_dr_returns_filtered(): df = DataFrame() @user_func def DeltaR(p1_eta: float) -> float: ''' Calculate the DeltaR between two particles given their `eta` and `phi` locations. Implemented on the back end. ''' assert False, 'This should never be called' mc_part = df.TruthParticles('TruthParticles') eles = df.Electrons('Electrons') def dr(e, mc): '''Make calculating DR easier as I have a hard-to-use DR calculation function on the back end''' return DeltaR(e.eta()) def very_near2(mcs, e): 'Return all particles in mcs that are DR less than 0.5' return mcs[lambda m: dr(e, m) < 0.1] eles['near_mcs'] = lambda reco_e: very_near2(mc_part, reco_e) eles['hasMC'] = lambda e: e.near_mcs.Count() > 0 good_eles_with_mc = eles[eles.hasMC] good_eles_with_mc['mc'] = lambda e: e.near_mcs.First().ptgev d1 = good_eles_with_mc.mc expr_1, context_1 = render(d1) class render_in_depth(ast.NodeTransformer): def __init__(self, context): ast.NodeTransformer.__init__(self) self._context = context def visit_Call(self, a: ast.Call): if not isinstance(a.func, ast_Callable): return self.generic_visit(a) assert len(a.args) == 1 # arg = self.visit(a.args[0]) expr, new_context = render_callable( cast(ast_Callable, a.func), self._context, a.func.dataframe) # type: ignore old_context = self._context try: self._context = new_context return self.visit(expr) finally: self._context = old_context assert isinstance(expr_1, ast.Call) rendered = render_in_depth(context_1).visit(expr_1) assert rendered is not None
def test_fluent_function_kwarg(): d = DataFrame() d1 = d.count(dude=22.0) assert d1.filter is None assert d1.child_expr is not None assert ast.dump(d1.child_expr) == \ "Call(func=Attribute(value=ast_DataFrame(), attr='count', ctx=Load()), args=[], " \ "keywords=[keyword(arg='dude', value=Num(n=22.0))])"
def test_lambda_function(): df = DataFrame() df.jets['ptgev'] = lambda j: j.pt / 1000 df1 = df.jets.ptgev r = dumps(df1) assert '\n'.join(r) == '''df_1 = DataFrame()
def test_repeated_use(): df = DataFrame() df1 = df.jets("Hi") df2 = df1 + df1 r = dumps(df2) assert '\n'.join(r) == '''df_1 = DataFrame()
def test_lambda_in_filter(): df = DataFrame() df1 = df[df.apply(lambda e: e == 1)] assert isinstance(df1.child_expr, ast_DataFrame) assert df1.filter is not None assert isinstance(df1.filter, Column) assert isinstance(df1.filter.child_expr, ast.Call)
def test_create_col_no_confusion(): df = DataFrame() df.jets['ptgev'] = df.jets.pt / 1000.0 d1 = df.jets.pt.ptgev assert d1.child_expr is not None assert ast.dump( d1.child_expr ) == "Attribute(value=ast_DataFrame(), attr='ptgev', ctx=Load())"
def test_fluent_function_pos_arg(): d = DataFrame() d1 = d.count(22.0) assert d1.filter is None assert d1.child_expr is not None assert ast.dump(d1.child_expr) == \ "Call(func=Attribute(value=ast_DataFrame(), attr='count', ctx=Load()), " \ "args=[Num(n=22.0)], keywords=[])"
def test_create_col_access_with_text(): df = DataFrame() df.jets['ptgev'] = df.jets.pt / 1000 d1 = df.jets['ptgev'] assert d1.child_expr is not None assert ast.dump( d1.child_expr ) == "BinOp(left=ast_DataFrame(), op=Div(), right=Num(n=1000))"
def test_lambda_argument(): df = DataFrame() df1 = df.apply(lambda e: e) assert df1.child_expr is not None assert isinstance(df1.child_expr, ast.Call) assert len(df1.child_expr.args) == 1 arg1 = df1.child_expr.args[0] assert isinstance(arg1, ast_Callable)
def test_render_func_with_df_arg(): d = DataFrame() d1 = d.count(d) expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 1 arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_DataFrame) assert arg1.dataframe is d
def test_render_func_with_args(): d = DataFrame() d1 = d.count(10) expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 1 arg1 = expr.args[0] assert isinstance(arg1, ast.Num) assert arg1.n == 10
def test_computed_reference(): df = DataFrame() df.jets['ptgev'] = df.jets.pt / 1000 d1 = df.jets.ptgev expr_1, context_1 = render(d1) assert isinstance(expr_1, ast.BinOp) assert isinstance(expr_1.left, ast.Attribute) assert isinstance(expr_1.right, ast.Num)
def test_render_base_call(): d = DataFrame() d1 = d.count() expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 0 e_func = expr.func assert isinstance(e_func, ast.Attribute) e_val = e_func.value # type: ast.AST assert isinstance(e_val, ast_DataFrame) assert e_val.dataframe is d
def test_callable_wrong_number_args(): d = DataFrame() d1 = d.apply(lambda b: b) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) with pytest.raises(Exception): render_callable(arg1, ctx, d, d)
def test_callable_returns_const(): d = DataFrame() d1 = d.apply(lambda b: 20) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, new_ctx = render_callable(arg1, ctx, d) assert isinstance(expr1, ast.Num) assert expr1.n == 20
def test_callable_simple_call(): d = DataFrame() d1 = d.apply(lambda b: b) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, new_ctx = render_callable(arg1, ctx, d) assert isinstance(expr1, ast_DataFrame) assert expr1.dataframe is d
def test_create_col_twice(): df = DataFrame() df.jets['ptgev'] = df.jets.pt / 1000.0 # This should generate a warning, but nothing else. df.jets['ptgev'] = df.jets.pt / 1001 d1 = df.jets.ptgev assert d1.child_expr is not None assert ast.dump( d1.child_expr ) == "BinOp(left=ast_DataFrame(), op=Div(), right=Num(n=1001))"
def test_create_col_with_lambda(): df = DataFrame() df.jets['ptgev'] = lambda j: j.pt / 1000 d1 = df.jets.ptgev assert d1.child_expr is not None assert isinstance(d1.child_expr, ast.Call) assert len(d1.child_expr.args) == 1 assert isinstance(d1.child_expr.args[0], ast_DataFrame) p = cast(ast_DataFrame, d1.child_expr.args[0]).dataframe assert isinstance(p.child_expr, ast.Attribute) assert isinstance(d1.child_expr.func, ast_Callable) assert cast(ast_Callable, d1.child_expr.func).dataframe is p
def test_create_col_yuck_doesnot_track(): df = DataFrame() df.jets['ptgev'] = df.met d1 = df.jets[df.jets.eta < 2.4].ptgev assert d1.child_expr is not None assert ast.dump( d1.child_expr ) == "Attribute(value=ast_DataFrame(), attr='met', ctx=Load())" assert isinstance(d1.child_expr, ast.Attribute) d1_parent = d1.child_expr.value assert isinstance(d1_parent, ast_DataFrame) p_df = cast(ast_DataFrame, d1_parent).dataframe assert p_df is df
def test_callable_function(): def test_func(b): return b d = DataFrame() d1 = d.apply(test_func) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, new_ctx = render_callable(arg1, ctx, d) assert isinstance(expr1, ast_DataFrame) assert expr1.dataframe is d
def test_np_func_where(): import numpy as np d = DataFrame() f1 = np.where(d.x > 0, d.x, d.y) from dataframe_expressions import dumps assert '\n'.join(dumps(cast(DataFrame, f1))) == '''df_1 = DataFrame()
def test_simple_slice(): d = DataFrame() d1 = d[0] expr, _ = render(d1) assert isinstance(expr, ast.Subscript) assert isinstance(expr.value, ast_DataFrame)
def test_mask_operator_and(): d = DataFrame() ref1 = d.x != 10 ref2 = d.x != 8 ref3 = ref1 & ref2 assert ast.dump(ref3.child_expr) == \ "BoolOp(op=And(), values=[ast_Column(), ast_Column()])"
def test_resolve_hidden_alias(): define_alias("jets", "pt", lambda j: j.pt / 1000.0) df = DataFrame() df1 = df.jets.pt assert df1.filter is None assert df1.child_expr is not None assert '1000' in ast.dump(df1.child_expr)
def test_render_twice_for_same_results(): df = DataFrame() eles = df.Electrons() mc_part = df.TruthParticles() mc_ele = mc_part[mc_part.pdgId == 11] good_mc_ele = mc_ele[mc_ele.ptgev > 20] ele_mcs = eles.map(lambda reco_e: good_mc_ele) expr1, context1 = render(ele_mcs) expr2, context2 = render(ele_mcs) assert ast.dump(expr1) == ast.dump(expr2) assert len(context1._resolved) == len(context2._resolved) assert len(context1._seen_datasources) == len(context2._seen_datasources)
def test_filter_with_attribute(): d = DataFrame() d1 = d.jets[d.jets.pt > 30].pt expr, _ = render(d1) assert isinstance(expr, ast.Attribute) assert isinstance(expr.value, ast_Filter)
def test_np_func_histogram(): import numpy as np d = DataFrame() f1 = np.histogram(d.x, bins=50, range=(-0.5, 10.0)) from dataframe_expressions import dumps assert '\n'.join(dumps(cast(DataFrame, f1))) == '''df_1 = DataFrame()