async def execute_result_async(self, a: ast.AST): '''We will use generate the query. WARNING: this code is fragile - the ast above must end with an invocation of AsROOTTTree!! WARNING: Really will only work for xAOD backend due to separate logic required for each backend. This code was stolen from the `ServiceX.py` file located in `func_adl_servicex` ''' source = a if cast(ast.Name, a.func).id != 'ResultTTree': raise Exception('Must be a call to AsROOTTtree at end of query for now') # Get the qastle we are going to use! return python_ast_to_text_ast(source)
def translate(tree_name: str, selected_columns: str = "", tcut_selection: str = "", verbose: bool = False): if verbose: print(f'\033[32mTCut selection syntax:\033[0m\n{tcut_selection}\n\n') _check_parentheses(tcut_selection) if tree_name is "": raise Exception("Tree name is missing") if tcut_selection is "": query = f"EventDataset(\"ServiceXDatasetSource\", \"{tree_name}\").Select(\"lambda event: {_translate_selected_columns(selected_columns)} \")" else: query = f"EventDataset(\"ServiceXDatasetSource\", \"{tree_name}\").Where(\"lambda event: {_translate_selection(tcut_selection, verbose)} \").Select(\"lambda event: {_translate_selected_columns(selected_columns)} \")" query_qastle = qastle.python_ast_to_text_ast( qastle.insert_linq_nodes(ast.parse(query))) if verbose: print(f'\033[32mFull func-adl query:\033[0m\n{query}\n\n') print(f'\033[32mFull qastle query:\033[0m\n{query_qastle}\n\n') return query_qastle
async def execute_result_async(self, a: ast.AST, title: str) -> Any: 'Dummy executor that will return the ast properly rendered. If qastle_roundtrip is true, then we will round trip the ast via qastle first.' # Round trip qastle if requested. if self._q_roundtrip: import qastle print(f'before: {ast.dump(a)}') a_text = qastle.python_ast_to_text_ast(a) a = qastle.text_ast_to_python_ast(a_text).body[0].value print(f'after: {ast.dump(a)}') # Setup the rep for this dataset from func_adl import find_EventDataset file = find_EventDataset(a) iterator = cpp_variable("bogus-do-not-use", top_level_scope(), cpp_type=None) set_rep(file, cpp_sequence(iterator, iterator, top_level_scope())) # Use the dummy executor to process this, and return it. exe = self.get_dummy_executor_obj() exe.evaluate(a) return exe
def test_as_qastle_uproot(): a = ServiceXDatasetSource("junk.root", 'MainTree') from qastle import python_ast_to_text_ast q = python_ast_to_text_ast(a.query_ast) assert q == "(call EventDataset 'ServiceXDatasetSource' 'MainTree')"
async def execute_result_async(self, a: ast.AST) -> Any: r''' Run a query against a func-adl ServiceX backend. The appropriate part of the AST is shipped there, and it is interpreted. Arguments: a: The ast that we should evaluate Returns: v Whatever the data that is requested (awkward arrays, etc.) ''' # Now, make sure the ast is formed in a way we cna deal with. if not isinstance(a, ast.Call): raise FuncADLServerException( f'Unable to use ServiceX to fetch a {a}.') a_func = a.func if not isinstance(a_func, ast.Name): raise FuncADLServerException( f'Unable to use ServiceX to fetch a call from {a_func}') # Make the servicex call, asking for the appropriate return type. Depending on the return-type # alter it so it can return something that ServiceX can understand. if self._is_uproot: # The uproot transformer only returns parquet files at the moment. So we had better look something like that, or something # we can convert from. if a_func.id == 'ResultParquet': # For now, we have to strip off the ResultParquet and send the rest down to uproot. source = a.args[0] q_str = python_ast_to_text_ast( qastle.insert_linq_nodes(source)) logging.debug(f'Qastle string sent to uproot query: {q_str}') return await self._ds.get_data_parquet_async(q_str) elif a_func.id == 'ResultPandasDF': raise NotImplementedError() elif a_func.id == 'ResultAwkwardArray': raise NotImplementedError() else: raise FuncADLServerException( f'Unable to use ServiceX to fetch a result in the form {a_func.id} - Only ResultParquet, ResultPandasDF and ResultAwkwardArray are supported' ) else: # If we are xAOD then we can come back with a pandas df, awkward array, or root files. # TODO: #2 Add root files as a legal return type here. if a_func.id == 'ResultPandasDF': source = a.args[0] cols = a.args[1] top_level_ast = ast.Call(func=ast.Name('ResultTTree'), args=[ source, cols, ast.Str('treeme'), ast.Str('file.root') ]) q_str = python_ast_to_text_ast(top_level_ast) logging.debug(f'Qastle string sent to xAOD query: {q_str}') return await self._ds.get_data_pandas_df_async(q_str) elif a_func.id == 'ResultAwkwardArray': source = a.args[0] cols = a.args[1] top_level_ast = ast.Call(func=ast.Name('ResultTTree'), args=[ source, cols, ast.Str('treeme'), ast.Str('file.root') ]) q_str = python_ast_to_text_ast(top_level_ast) logging.debug(f'Qastle string sent to xAOD query: {q_str}') return await self._ds.get_data_awkward_async(q_str) elif a_func.id == 'ResultTTree': raise NotImplementedError() else: raise FuncADLServerException( f'Unable to use ServiceX to fetch a result in the form {a_func.id} - Only ResultTTree, ResultPandasDF and ResultAwkwardArray are supported' )
async def execute_result_async(self, a: ast.AST) -> Any: return python_ast_to_text_ast(a)
def tcut_to_qastle(selection, variable): if selection.lower() != "none": # 1st step: recognize all variable names ignore_patterns = { # These are supported by Qastle "abs": " ", "(": " ", ")": " ", "*": " ", "/": " ", "+": " ", "-": " " } temp = multiple_replace(ignore_patterns, selection) output1 = re.sub('[<&>!=|-]', ' ', temp) variables = [] for x in output1.split(): try: float(x) except ValueError: variables.append(x) variables = list(dict.fromkeys(variables)) # Remove duplicates # logging.info(f'Number of accessed branches for the selection: {len(variables)}') # 2nd step: replace variable names with event. for x in variables: selection = re.sub(r'\b(%s)\b' % x, r'event.%s' % x, selection) # 3rd step: replace operators replace_patterns = { "&&": " and ", "||": " or ", "!=": " != ", ">=": " >= ", "<=": " <= ", ">": " > ", "<": " < " } output = multiple_replace(replace_patterns, selection) output = " ".join(output.split()) # Remove duplicate whitespace # 4th step: bool (!! Still missing many combinations!!) output = "and " + output + " and" # Prepare for search. Better idea? for x in variables: if re.search(r'and\s*event.%s\s*and' % x, output): # and variable and output = re.sub(r'and\s*event.%s\s*and' % x, r'and event.%s > 0 and' % x, output) if re.search(r'and\s*!event.%s\s*and' % x, output): # and !variable and output = re.sub(r'and\s*!event.%s\s*and' % x, r'and event.%s == 0 and' % x, output) if re.search(r'and\s*event.%s\s*\)' % x, output): # and variable ) output = re.sub(r'and\s*event.%s\s*\)' % x, r'and event.%s > 0)' % x, output) if re.search(r'and\s*!event.%s\s*\)' % x, output): # and !variable ) output = re.sub(r'and\s*!event.%s\s*\)' % x, r'and event.%s == 0)' % x, output) if re.search(r'\(\s*event.%s\s*and' % x, output): # ( variable and output = re.sub(r'\(\s*event.%s\s*and' % x, r'(event.%s > 0 and' % x, output) if re.search(r'\(\s*!event.%s\s*and' % x, output): # ( !variable and output = re.sub(r'\(\s*!event.%s\s*and' % x, r'(event.%s == 0 and' % x, output) if re.search(r'or\s*event.%s\s*or' % x, output): # or variable or output = re.sub(r'or\s*event.%s\s*or' % x, r'or event.%s > 0 or' % x, output) if re.search(r'or\s*!event.%s\s*or' % x, output): # or !variable or output = re.sub(r'or\s*!event.%s\s*or' % x, r'or event.%s == 0 or' % x, output) if re.search(r'and\s*event.%s\s*or' % x, output): # and variable or output = re.sub(r'and\s*event.%s\s*or' % x, r'and event.%s > 0 or' % x, output) if re.search(r'and\s*!event.%s\s*or' % x, output): # and !variable or output = re.sub(r'and\s*!event.%s\s*or' % x, r'and event.%s == 0 or' % x, output) if re.search(r'or\s*event.%s\s*and' % x, output): # or variable and output = re.sub(r'or\s*event.%s\s*and' % x, r'or event.%s > 0 and' % x, output) if re.search(r'or\s*!event.%s\s*and' % x, output): # or !variable and output = re.sub(r'or\s*!event.%s\s*and' % x, r'or event.%s == 0 and' % x, output) if re.search(r'\(\s*event.%s\s*or' % x, output): # ( variable or output = re.sub(r'\(\s*event.%s\s*or' % x, r'(event.%s > 0 or' % x, output) if re.search(r'\(\s*!event.%s\s*or' % x, output): # ( !variable or output = re.sub(r'\(\s*!event.%s\s*or' % x, r'(event.%s == 0 or' % x, output) if re.search(r'or\s*event.%s\s*\)' % x, output): # or variable ) output = re.sub(r'or\s*event.%s\s*\)' % x, r'or event.%s > 0)' % x, output) if re.search(r'or\s*!event.%s\s*\)' % x, output): # or !variable ) output = re.sub(r'or\s*!event.%s\s*\)' % x, r'or event.%s == 0)' % x, output) if re.search(r'!\([^()]*\)', output): # Search for !(something) output = re.sub( r'!\([^()]*\)', re.search(r'!\([^()]*\)', output).group(0).lstrip('!') + "==0", output) output = output.rsplit(' ', 1)[0].split( ' ', 1)[1] # Delete `and` at the beginning and the last else: variables = [] passList = False passDict = True if variable.lower() == 'all': variable_text = 'event' else: if passDict: variable = [num.strip() for num in variable.split(',')] variable_list_new = [f'\'{i}\': event.{i}' for i in variable] variable_text = ', '.join(variable_list_new) variable_text = '{' + variable_text + '}' elif passList: variable = [num.strip() for num in variable.split(',')] variable_list_new = [f'event.{i}' for i in variable] variable_text = ', '.join(variable_list_new) variable_text = '(' + variable_text + ')' # Add Func ADL wrapper if selection.lower() == "none": query = "EventDataset().Select(\"lambda event: " + variable_text + "\")" else: query = "EventDataset().Where('lambda event: " + output + "').Select(\"lambda event: " + variable_text + "\")" text_ast = qastle.python_ast_to_text_ast( qastle.insert_linq_nodes(ast.parse(query))) return text_ast
async def translate(a: ast.AST): import qastle return qastle.python_ast_to_text_ast(a)