def test_nested_dataframe():
    df1 = DataFrame()
    df2 = DataFrame(expr=ast_DataFrame(df1))

    r = dumps(df1 + df2)

    assert '\n'.join(r) == '''df_1 = DataFrame()
def test_callback():
    df = DataFrame()
    df1 = df.map(lambda e: e.x)

    r = dumps(df1)

    assert '\n'.join(r) == '''df_1 = DataFrame()
def test_computed_col_def_updated():
    df = DataFrame()
    df.jets['ptgev'] = lambda j: j.pt / 1000
    d1 = df.jets.ptgev

    expr_1, context_1 = render(d1)

    assert isinstance(expr_1, ast.Call)
    assert isinstance(expr_1.func, ast_Callable)
    assert len(expr_1.args) == 1

    expr_2, _ = render_callable(expr_1.func, context_1,
                                expr_1.func.dataframe)  # type: ignore

    # Run the render again.
    df.jets['ptgev'] = lambda j: j.pt / 1001
    d1 = df.jets.ptgev

    expr_1, context_1 = render(d1)

    assert isinstance(expr_1, ast.Call)
    assert isinstance(expr_1.func, ast_Callable)
    assert len(expr_1.args) == 1

    expr_2, _ = render_callable(expr_1.func, context_1,
                                expr_1.func.dataframe)  # type: ignore
def test_second_dr_returns_filtered():
    df = DataFrame()

    @user_func
    def DeltaR(p1_eta: float) -> float:
        '''
        Calculate the DeltaR between two particles given their `eta` and `phi` locations.
        Implemented on the back end.
        '''
        assert False, 'This should never be called'

    mc_part = df.TruthParticles('TruthParticles')
    eles = df.Electrons('Electrons')

    def dr(e, mc):
        '''Make calculating DR easier as I have a hard-to-use DR calculation function on
        the back end'''
        return DeltaR(e.eta())

    def very_near2(mcs, e):
        'Return all particles in mcs that are DR less than 0.5'
        return mcs[lambda m: dr(e, m) < 0.1]

    eles['near_mcs'] = lambda reco_e: very_near2(mc_part, reco_e)

    eles['hasMC'] = lambda e: e.near_mcs.Count() > 0
    good_eles_with_mc = eles[eles.hasMC]
    good_eles_with_mc['mc'] = lambda e: e.near_mcs.First().ptgev

    d1 = good_eles_with_mc.mc

    expr_1, context_1 = render(d1)

    class render_in_depth(ast.NodeTransformer):
        def __init__(self, context):
            ast.NodeTransformer.__init__(self)
            self._context = context

        def visit_Call(self, a: ast.Call):
            if not isinstance(a.func, ast_Callable):
                return self.generic_visit(a)

            assert len(a.args) == 1
            # arg = self.visit(a.args[0])

            expr, new_context = render_callable(
                cast(ast_Callable,
                     a.func), self._context, a.func.dataframe)  # type: ignore
            old_context = self._context
            try:
                self._context = new_context
                return self.visit(expr)
            finally:
                self._context = old_context

    assert isinstance(expr_1, ast.Call)

    rendered = render_in_depth(context_1).visit(expr_1)
    assert rendered is not None
def test_fluent_function_kwarg():
    d = DataFrame()
    d1 = d.count(dude=22.0)
    assert d1.filter is None
    assert d1.child_expr is not None
    assert ast.dump(d1.child_expr) == \
        "Call(func=Attribute(value=ast_DataFrame(), attr='count', ctx=Load()), args=[], " \
        "keywords=[keyword(arg='dude', value=Num(n=22.0))])"
def test_lambda_function():
    df = DataFrame()
    df.jets['ptgev'] = lambda j: j.pt / 1000
    df1 = df.jets.ptgev

    r = dumps(df1)

    assert '\n'.join(r) == '''df_1 = DataFrame()
def test_repeated_use():
    df = DataFrame()
    df1 = df.jets("Hi")
    df2 = df1 + df1

    r = dumps(df2)

    assert '\n'.join(r) == '''df_1 = DataFrame()
def test_lambda_in_filter():
    df = DataFrame()
    df1 = df[df.apply(lambda e: e == 1)]

    assert isinstance(df1.child_expr, ast_DataFrame)
    assert df1.filter is not None
    assert isinstance(df1.filter, Column)
    assert isinstance(df1.filter.child_expr, ast.Call)
def test_create_col_no_confusion():
    df = DataFrame()
    df.jets['ptgev'] = df.jets.pt / 1000.0
    d1 = df.jets.pt.ptgev
    assert d1.child_expr is not None
    assert ast.dump(
        d1.child_expr
    ) == "Attribute(value=ast_DataFrame(), attr='ptgev', ctx=Load())"
def test_fluent_function_pos_arg():
    d = DataFrame()
    d1 = d.count(22.0)
    assert d1.filter is None
    assert d1.child_expr is not None
    assert ast.dump(d1.child_expr) == \
        "Call(func=Attribute(value=ast_DataFrame(), attr='count', ctx=Load()), " \
        "args=[Num(n=22.0)], keywords=[])"
def test_create_col_access_with_text():
    df = DataFrame()
    df.jets['ptgev'] = df.jets.pt / 1000
    d1 = df.jets['ptgev']

    assert d1.child_expr is not None
    assert ast.dump(
        d1.child_expr
    ) == "BinOp(left=ast_DataFrame(), op=Div(), right=Num(n=1000))"
def test_lambda_argument():
    df = DataFrame()
    df1 = df.apply(lambda e: e)

    assert df1.child_expr is not None
    assert isinstance(df1.child_expr, ast.Call)
    assert len(df1.child_expr.args) == 1
    arg1 = df1.child_expr.args[0]
    assert isinstance(arg1, ast_Callable)
def test_render_func_with_df_arg():
    d = DataFrame()
    d1 = d.count(d)
    expr, _ = render(d1)
    assert isinstance(expr, ast.Call)
    assert len(expr.args) == 1
    arg1 = expr.args[0]  # type: ast.AST
    assert isinstance(arg1, ast_DataFrame)
    assert arg1.dataframe is d
def test_render_func_with_args():
    d = DataFrame()
    d1 = d.count(10)
    expr, _ = render(d1)
    assert isinstance(expr, ast.Call)
    assert len(expr.args) == 1
    arg1 = expr.args[0]
    assert isinstance(arg1, ast.Num)
    assert arg1.n == 10
def test_computed_reference():
    df = DataFrame()
    df.jets['ptgev'] = df.jets.pt / 1000
    d1 = df.jets.ptgev

    expr_1, context_1 = render(d1)

    assert isinstance(expr_1, ast.BinOp)
    assert isinstance(expr_1.left, ast.Attribute)
    assert isinstance(expr_1.right, ast.Num)
def test_render_base_call():
    d = DataFrame()
    d1 = d.count()
    expr, _ = render(d1)
    assert isinstance(expr, ast.Call)
    assert len(expr.args) == 0
    e_func = expr.func
    assert isinstance(e_func, ast.Attribute)
    e_val = e_func.value  # type: ast.AST
    assert isinstance(e_val, ast_DataFrame)
    assert e_val.dataframe is d
def test_callable_wrong_number_args():
    d = DataFrame()
    d1 = d.apply(lambda b: b)
    expr, ctx = render(d1)

    assert isinstance(expr, ast.Call)
    arg1 = expr.args[0]  # type: ast.AST
    assert isinstance(arg1, ast_Callable)

    with pytest.raises(Exception):
        render_callable(arg1, ctx, d, d)
def test_callable_returns_const():
    d = DataFrame()
    d1 = d.apply(lambda b: 20)
    expr, ctx = render(d1)

    assert isinstance(expr, ast.Call)
    arg1 = expr.args[0]  # type: ast.AST
    assert isinstance(arg1, ast_Callable)

    expr1, new_ctx = render_callable(arg1, ctx, d)
    assert isinstance(expr1, ast.Num)
    assert expr1.n == 20
def test_callable_simple_call():
    d = DataFrame()
    d1 = d.apply(lambda b: b)
    expr, ctx = render(d1)

    assert isinstance(expr, ast.Call)
    arg1 = expr.args[0]  # type: ast.AST
    assert isinstance(arg1, ast_Callable)

    expr1, new_ctx = render_callable(arg1, ctx, d)
    assert isinstance(expr1, ast_DataFrame)
    assert expr1.dataframe is d
def test_create_col_twice():
    df = DataFrame()
    df.jets['ptgev'] = df.jets.pt / 1000.0

    # This should generate a warning, but nothing else.
    df.jets['ptgev'] = df.jets.pt / 1001
    d1 = df.jets.ptgev

    assert d1.child_expr is not None
    assert ast.dump(
        d1.child_expr
    ) == "BinOp(left=ast_DataFrame(), op=Div(), right=Num(n=1001))"
def test_create_col_with_lambda():
    df = DataFrame()
    df.jets['ptgev'] = lambda j: j.pt / 1000
    d1 = df.jets.ptgev

    assert d1.child_expr is not None
    assert isinstance(d1.child_expr, ast.Call)
    assert len(d1.child_expr.args) == 1
    assert isinstance(d1.child_expr.args[0], ast_DataFrame)
    p = cast(ast_DataFrame, d1.child_expr.args[0]).dataframe
    assert isinstance(p.child_expr, ast.Attribute)

    assert isinstance(d1.child_expr.func, ast_Callable)
    assert cast(ast_Callable, d1.child_expr.func).dataframe is p
def test_create_col_yuck_doesnot_track():
    df = DataFrame()
    df.jets['ptgev'] = df.met
    d1 = df.jets[df.jets.eta < 2.4].ptgev

    assert d1.child_expr is not None
    assert ast.dump(
        d1.child_expr
    ) == "Attribute(value=ast_DataFrame(), attr='met', ctx=Load())"
    assert isinstance(d1.child_expr, ast.Attribute)
    d1_parent = d1.child_expr.value
    assert isinstance(d1_parent, ast_DataFrame)
    p_df = cast(ast_DataFrame, d1_parent).dataframe
    assert p_df is df
def test_callable_function():
    def test_func(b):
        return b

    d = DataFrame()
    d1 = d.apply(test_func)
    expr, ctx = render(d1)

    assert isinstance(expr, ast.Call)
    arg1 = expr.args[0]  # type: ast.AST
    assert isinstance(arg1, ast_Callable)

    expr1, new_ctx = render_callable(arg1, ctx, d)
    assert isinstance(expr1, ast_DataFrame)
    assert expr1.dataframe is d
def test_np_func_where():
    import numpy as np
    d = DataFrame()
    f1 = np.where(d.x > 0, d.x, d.y)

    from dataframe_expressions import dumps
    assert '\n'.join(dumps(cast(DataFrame, f1))) == '''df_1 = DataFrame()
def test_simple_slice():
    d = DataFrame()
    d1 = d[0]
    expr, _ = render(d1)

    assert isinstance(expr, ast.Subscript)
    assert isinstance(expr.value, ast_DataFrame)
def test_mask_operator_and():
    d = DataFrame()
    ref1 = d.x != 10
    ref2 = d.x != 8
    ref3 = ref1 & ref2
    assert ast.dump(ref3.child_expr) == \
        "BoolOp(op=And(), values=[ast_Column(), ast_Column()])"
def test_resolve_hidden_alias():
    define_alias("jets", "pt", lambda j: j.pt / 1000.0)
    df = DataFrame()
    df1 = df.jets.pt
    assert df1.filter is None
    assert df1.child_expr is not None
    assert '1000' in ast.dump(df1.child_expr)
def test_render_twice_for_same_results():

    df = DataFrame()
    eles = df.Electrons()
    mc_part = df.TruthParticles()
    mc_ele = mc_part[mc_part.pdgId == 11]
    good_mc_ele = mc_ele[mc_ele.ptgev > 20]

    ele_mcs = eles.map(lambda reco_e: good_mc_ele)

    expr1, context1 = render(ele_mcs)
    expr2, context2 = render(ele_mcs)

    assert ast.dump(expr1) == ast.dump(expr2)
    assert len(context1._resolved) == len(context2._resolved)
    assert len(context1._seen_datasources) == len(context2._seen_datasources)
def test_filter_with_attribute():
    d = DataFrame()
    d1 = d.jets[d.jets.pt > 30].pt
    expr, _ = render(d1)

    assert isinstance(expr, ast.Attribute)
    assert isinstance(expr.value, ast_Filter)
def test_np_func_histogram():
    import numpy as np
    d = DataFrame()
    f1 = np.histogram(d.x, bins=50, range=(-0.5, 10.0))

    from dataframe_expressions import dumps
    assert '\n'.join(dumps(cast(DataFrame, f1))) == '''df_1 = DataFrame()