def test_computed_col_def_updated(): df = DataFrame() df.jets['ptgev'] = lambda j: j.pt / 1000 d1 = df.jets.ptgev expr_1, context_1 = render(d1) assert isinstance(expr_1, ast.Call) assert isinstance(expr_1.func, ast_Callable) assert len(expr_1.args) == 1 expr_2, _ = render_callable(expr_1.func, context_1, expr_1.func.dataframe) # type: ignore # Run the render again. df.jets['ptgev'] = lambda j: j.pt / 1001 d1 = df.jets.ptgev expr_1, context_1 = render(d1) assert isinstance(expr_1, ast.Call) assert isinstance(expr_1.func, ast_Callable) assert len(expr_1.args) == 1 expr_2, _ = render_callable(expr_1.func, context_1, expr_1.func.dataframe) # type: ignore
def test_render_twice_with_filter(): d = DataFrame() jets = d.jets[d.jets.pt > 10].pt expr1, ctx1 = render(jets) expr2, ctx2 = render(jets) assert ast.dump(expr1) == ast.dump(expr2)
def test_computed_reference_updated(): df = DataFrame() df.jets['ptgev'] = df.jets.pt / 1000 d1 = df.jets.ptgev expr_1, context_1 = render(d1) df.jets['ptgev'] = df.jets.pt / 1001 d1 = df.jets.ptgev expr_1, context_1 = render(d1) assert isinstance(expr_1, ast.BinOp) assert isinstance(expr_1.left, ast.Attribute) assert isinstance(expr_1.right, ast.Num)
def test_filter_with_attribute(): d = DataFrame() d1 = d.jets[d.jets.pt > 30].pt expr, _ = render(d1) assert isinstance(expr, ast.Attribute) assert isinstance(expr.value, ast_Filter)
def test_simple_slice(): d = DataFrame() d1 = d[0] expr, _ = render(d1) assert isinstance(expr, ast.Subscript) assert isinstance(expr.value, ast_DataFrame)
def test_render_twice_for_same_results(): df = DataFrame() eles = df.Electrons() mc_part = df.TruthParticles() mc_ele = mc_part[mc_part.pdgId == 11] good_mc_ele = mc_ele[mc_ele.ptgev > 20] ele_mcs = eles.map(lambda reco_e: good_mc_ele) expr1, context1 = render(ele_mcs) expr2, context2 = render(ele_mcs) assert ast.dump(expr1) == ast.dump(expr2) assert len(context1._resolved) == len(context2._resolved) assert len(context1._seen_datasources) == len(context2._seen_datasources)
def test_second_dr_returns_filtered(): df = DataFrame() @user_func def DeltaR(p1_eta: float) -> float: ''' Calculate the DeltaR between two particles given their `eta` and `phi` locations. Implemented on the back end. ''' assert False, 'This should never be called' mc_part = df.TruthParticles('TruthParticles') eles = df.Electrons('Electrons') def dr(e, mc): '''Make calculating DR easier as I have a hard-to-use DR calculation function on the back end''' return DeltaR(e.eta()) def very_near2(mcs, e): 'Return all particles in mcs that are DR less than 0.5' return mcs[lambda m: dr(e, m) < 0.1] eles['near_mcs'] = lambda reco_e: very_near2(mc_part, reco_e) eles['hasMC'] = lambda e: e.near_mcs.Count() > 0 good_eles_with_mc = eles[eles.hasMC] good_eles_with_mc['mc'] = lambda e: e.near_mcs.First().ptgev d1 = good_eles_with_mc.mc expr_1, context_1 = render(d1) class render_in_depth(ast.NodeTransformer): def __init__(self, context): ast.NodeTransformer.__init__(self) self._context = context def visit_Call(self, a: ast.Call): if not isinstance(a.func, ast_Callable): return self.generic_visit(a) assert len(a.args) == 1 # arg = self.visit(a.args[0]) expr, new_context = render_callable( cast(ast_Callable, a.func), self._context, a.func.dataframe) # type: ignore old_context = self._context try: self._context = new_context return self.visit(expr) finally: self._context = old_context assert isinstance(expr_1, ast.Call) rendered = render_in_depth(context_1).visit(expr_1) assert rendered is not None
def test_collection_object_excl(): df = DataFrame() mlo = multi_leaf_object_excl(df) df1 = mlo.x1 expr, _ = render(df1) assert ast.dump( expr) == "Attribute(value=ast_DataFrame(), attr='x_new_1', ctx=Load())"
def test_render_single_collection(): d = DataFrame() d1 = d.jets expr, _ = render(d1) assert isinstance(expr, ast.Attribute) assert expr.attr == 'jets' ast_df = expr.value # type: ast.AST assert isinstance(ast_df, ast_DataFrame) assert ast_df.dataframe is d
def test_render_callable_captured(): d = DataFrame() jets = d.jets mcs = d.mcs near_a_jet = mcs[mcs.map(lambda mc: jets.pt.Count() == 2)] expr1, ctx = render(near_a_jet) assert expr1 is not None assert isinstance(expr1, ast_Filter)
def test_collection_nested(): df = DataFrame() mlo = multi_leaf_object(df) ml1 = leaf_object(mlo) df1 = ml1.x2 expr, _ = render(df1) assert ast.dump( expr) == "Attribute(value=ast_DataFrame(), attr='x_new_1', ctx=Load())"
def test_collection_subtract(): df = DataFrame() mlo1 = multi_leaf_object(df.m1) mlo2 = multi_leaf_object(df.m2) df1 = mlo1 - mlo2 expr, _ = render(df1) assert ast.dump(expr) == "BinOp(left=Attribute(value=ast_DataFrame(), attr='m1', " \ "ctx=Load()), op=Sub(), right=Attribute(value=ast_DataFrame(), attr='m2', ctx=Load()))"
def test_render_func_with_args(): d = DataFrame() d1 = d.count(10) expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 1 arg1 = expr.args[0] assert isinstance(arg1, ast.Num) assert arg1.n == 10
def test_collection_object_other_excl(): with pytest.raises(Exception) as e: df = DataFrame() mlo = multi_leaf_object_excl(df) df1 = mlo.x12 expr, _ = render(df1) assert 'No such attribute' in str(e.value)
def test_render_func_with_df_arg(): d = DataFrame() d1 = d.count(d) expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 1 arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_DataFrame) assert arg1.dataframe is d
def test_callable_reference(): d = DataFrame() d1 = d.jets.apply(lambda b: b) expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 1 arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable)
def test_multilevel_subexpr(): d = DataFrame() d1 = d.jets.pt[d.jets.pt > 30.0] expr, _ = render(d1) assert isinstance(expr, ast_Filter) assert isinstance(expr.filter, ast.Compare) ref_in_filter = expr.filter.left ref_in_root = expr.expr assert ref_in_filter is ref_in_root
def test_render_base_call(): d = DataFrame() d1 = d.count() expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 0 e_func = expr.func assert isinstance(e_func, ast.Attribute) e_val = e_func.value # type: ast.AST assert isinstance(e_val, ast_DataFrame) assert e_val.dataframe is d
def test_simple_filter_lambda(): d = DataFrame() d1 = d[lambda j: j.x > 0] expr, _ = render(d1) assert isinstance(expr, ast_Filter) l_value = check_col_comp(expr.filter) assert l_value.dataframe is d assert isinstance(expr.expr, ast_DataFrame) assert expr.expr.dataframe is d assert expr.expr is l_value
def test_callable_wrong_number_args(): d = DataFrame() d1 = d.apply(lambda b: b) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) with pytest.raises(Exception): render_callable(arg1, ctx, d, d)
def test_render_func_with_dfattr_arg(): d = DataFrame() d1 = d.jets.count(d.jets) expr, _ = render(d1) assert isinstance(expr, ast.Call) assert len(expr.args) == 1 arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast.Attribute) assert isinstance(expr.func, ast.Attribute) root_of_call = expr.func.value assert isinstance(root_of_call, ast.Attribute) assert arg1 is root_of_call
async def process(self, df: DataFrame) -> Union[DataFrame, Column, result]: 'Process as much of the tree as we can process' # Render it into an ast that we can now process! r, context = render(df) calc = inline_executor(context).visit(r) if isinstance(calc, ast_awkward): return result(calc.awkward) return df
def test_xy(): df = DataFrame() v = vec(df) df1 = v.xy expr, _ = render(df1) assert isinstance(expr, ast.Call) assert isinstance(expr.func, ast.Name) assert expr.func.id == 'sqrt' assert len(expr.args) == 1 assert isinstance(expr.args[0], ast.BinOp) assert isinstance(cast(ast.BinOp, expr.args[0]).op, ast.Add)
def test_callable_returns_const(): d = DataFrame() d1 = d.apply(lambda b: 20) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, new_ctx = render_callable(arg1, ctx, d) assert isinstance(expr1, ast.Num) assert expr1.n == 20
def test_callable_simple_call(): d = DataFrame() d1 = d.apply(lambda b: b) expr, ctx = render(d1) assert isinstance(expr, ast.Call) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, new_ctx = render_callable(arg1, ctx, d) assert isinstance(expr1, ast_DataFrame) assert expr1.dataframe is d
def test_simple_filter(): d = DataFrame() d1 = d[d.x > 0] expr, _ = render(d1) assert isinstance(expr, ast_Filter) l_value = check_col_comp(expr.filter) assert l_value.dataframe is d assert isinstance(expr.expr, ast_DataFrame) assert expr.expr.dataframe is d # This line assures that the sub-expressions are the same, allowing # render code to take advantage of this.str() assert expr.expr is l_value
def test_df_user_render_args(): @user_func def func1(x: float) -> float: assert False d = DataFrame() d1 = func1(d.jets) chain, _ = render(d1) assert chain is not None assert isinstance(chain, ast.Call) call = chain # type: ast.Call assert len(call.args) == 1 a1 = call.args[0] # type: ast.AST assert isinstance(a1, ast.Attribute)
def test_simple_filter_func(): def test(j): return j.x > 0 d = DataFrame() d1 = d[test] expr, _ = render(d1) assert isinstance(expr, ast_Filter) l_value = check_col_comp(expr.filter) assert l_value.dataframe is d assert isinstance(expr.expr, ast_DataFrame) assert expr.expr.dataframe is d assert expr.expr is l_value
def test_callable_captures_column(): d = DataFrame() d1 = d.jets.apply(lambda b: d.met > 20.0) expr, ctx = render(d1) assert isinstance(expr, ast.Call) assert isinstance(expr.func, ast.Attribute) root_of_call = expr.func.value assert isinstance(root_of_call, ast.Attribute) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, _ = render_callable(arg1, ctx, d.jets) assert isinstance(expr1, ast.Compare)
def test_callable_returns_matched_ast(): d = DataFrame() d1 = d.jets.apply(lambda b: b) expr, ctx = render(d1) assert isinstance(expr, ast.Call) assert isinstance(expr.func, ast.Attribute) root_of_call = expr.func.value assert isinstance(root_of_call, ast.Attribute) arg1 = expr.args[0] # type: ast.AST assert isinstance(arg1, ast_Callable) expr1, new_ctx = render_callable(arg1, ctx, d.jets) assert root_of_call is expr1