def get_pandas_from_root_file(file_name, tree_name, kwargs): """Load a pandas DataFrame from a ROOT file. Optional keys in `kwargs` are: + `variables`: List of variables to load. + `selection`: Selection to apply. Arguments: file_name (str): File to load. tree_name (str): Tree to load. kwargs (dict): Extra configuration. Return: pandas.DataFrame: ROOT file converted to pandas. """ logger.debug("Loading ROOT file in pandas format -> %s:%s", file_name, tree_name) if not os.path.exists(file_name): raise OSError("Cannot find input file -> {}".format(file_name)) selection = kwargs.get('selection') variables = kwargs.get('variables', []) if selection: selection_expr = formulate.from_numexpr(selection) full_variables = variables + list(selection_expr.variables) output_data = read_root(file_name, tree_name, columns=full_variables).query(selection) if variables: output_data = output_data[variables] else: output_data = read_root(file_name, tree_name, columns=variables) return output_data
def test_readme(): momentum = from_root('TMath::Sqrt(X_PX**2 + X_PY**2 + X_PZ**2)') assert momentum.to_numexpr( ) == 'sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))' assert momentum.to_root( ) == 'TMath::Sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))' my_selection = from_numexpr('X_PT > 5 & (Mu_NHits > 3 | Mu_PT > 10)') assert my_selection.to_root( ) == '(X_PT > 5) && ((Mu_NHits > 3) || (Mu_PT > 10))' assert my_selection.to_numexpr( ) == '(X_PT > 5) & ((Mu_NHits > 3) | (Mu_PT > 10))' my_sum = from_auto('True + False') assert my_sum.to_root() == 'true + false' assert my_sum.to_numexpr() == 'True + False' my_check = from_auto('(X_THETA*TMath::DegToRad() > pi/4) && D_PE > 9.2') assert my_check.variables == {'D_PE', 'X_THETA'} assert my_check.named_constants == {'DEG2RAD', 'PI'} assert my_check.unnamed_constants == {'4', '9.2'} new_selection = (momentum > 100) and (my_check or (np.sqrt(my_sum) < 1)) def numexpr_eval(string): return numexpr.evaluate(string, local_dict=dict(X_THETA=1234, D_PE=678)) assert pytest.approx( numexpr_eval(new_selection.to_numexpr()), numexpr_eval( '((X_THETA * 0.017453292519943295) > (3.141592653589793 / 4)) & (D_PE > 9.2)' ))
def test(): root_expression = from_root(root_input) numexpr_expression = from_numexpr(numexpr_input) assert_equal_expressions(root_expression, numexpr_expression) assert to_numexpr(root_expression) == to_numexpr(numexpr_expression) assert to_root(root_expression) == to_root(numexpr_expression) assert root_expression.to_numexpr() == numexpr_expression.to_numexpr() assert root_expression.to_root() == numexpr_expression.to_root()
def test_unnamed_constants(): assert from_root('pi').unnamed_constants == set() assert from_numexpr('2').unnamed_constants == {'2'} assert from_numexpr('2e-3').unnamed_constants == {'2e-3'} assert from_numexpr('A').unnamed_constants == set() assert from_numexpr('A + A').unnamed_constants == set() assert from_numexpr('A + B').unnamed_constants == set() assert from_numexpr('A + A*A - 3e7').unnamed_constants == {'3e7'} assert from_numexpr('arctan2(A, A)').unnamed_constants == set() assert from_numexpr('arctan2(A, B)').unnamed_constants == set() assert from_root('arctan2(A, pi)').unnamed_constants == set() assert from_numexpr('arctan2(arctan2(A, B), C)').unnamed_constants == set() for base, expect in [(UC('2'), {'2'}), (Variable('A'), set()), (NC(ConstantIDs.PI), set())]: expr = base for i in list(range(100)): expr = Expression(IDs.SQRT, expr) assert expr.unnamed_constants == expect
def test_get_variables(): assert from_root('pi').variables == set() assert from_numexpr('2').variables == set() assert from_numexpr('2e-3').variables == set() assert from_numexpr('A').variables == set(['A']) assert from_numexpr('A + A').variables == set(['A']) assert from_numexpr('A + B').variables == set(['A', 'B']) assert from_numexpr('A + A*A - 3e7').variables == set(['A']) assert from_numexpr('arctan2(A, A)').variables == set(['A']) assert from_numexpr('arctan2(A, B)').variables == set(['A', 'B']) assert from_root('arctan2(A, pi)').variables == set(['A']) assert from_numexpr('arctan2(arctan2(A, B), C)').variables == set( ['A', 'B', 'C']) for base, expect in [(UC('2'), set()), (Variable('A'), set(['A'])), (NC(ConstantIDs.PI), set())]: expr = base for i in list(range(100)): expr = Expression(IDs.SQRT, expr) assert expr.variables == expect
def parse_args(args): parser = argparse.ArgumentParser( description='Convert between different types of formulae') from_group = parser.add_mutually_exclusive_group(required=True) from_group.add_argument('--from-root') from_group.add_argument('--from-numexpr') to_group = parser.add_mutually_exclusive_group(required=True) to_group.add_argument('--to-root', action='store_true') to_group.add_argument('--to-numexpr', action='store_true') to_group.add_argument('--variables', action='store_true') to_group.add_argument('--named-constants', action='store_true') to_group.add_argument('--unnamed-constants', action='store_true') args = parser.parse_args(args) if args.from_root is not None: expression = from_root(args.from_root) elif args.from_numexpr is not None: expression = from_numexpr(args.from_numexpr) else: raise NotImplementedError() if args.to_root: result = to_root(expression) elif args.to_numexpr: result = to_numexpr(expression) elif args.variables: result = '\n'.join(sorted(expression.variables)) elif args.named_constants: result = '\n'.join(sorted(expression.named_constants)) elif args.unnamed_constants: result = '\n'.join(sorted(expression.unnamed_constants)) else: raise NotImplementedError() return result
def test_too_many_function_arguments(): with pytest.raises(ParsingException): from_numexpr('sqrt(2, 3)')
def test_invalid_arg_parse(): with pytest.raises(ValueError): from_numexpr(Expression(IDs.SQRT, UC('2')))
def test_parse_invalid_expression(): with pytest.raises(ParsingException): from_numexpr('saadasd()&+|()')