def test_merge_query_creation() -> None: inbound = Navigation(1, Navigation.Max, direction=Direction.inbound) for_foo = Query( [Part(IsTerm(["foo"])), Part(AllTerm(), navigation=inbound)]) merge_foo = [MergeQuery("ancestors.foo", for_foo)] # merge_foo is created automatically assert Part(AllTerm()).merge_queries_for(["ancestors.foo.reported.bla" ]) == merge_foo # merge_foo is already included and not added assert Part(MergeTerm(AllTerm(), merge_foo)).merge_queries_for( ["ancestors.foo.reported.bla"]) == merge_foo # neither ancestors/descendants with pytest.raises(Exception): Part(AllTerm()).merge_queries_for(["unknown.foo.reported.bla"]) # no path is given with pytest.raises(Exception): Part(AllTerm()).merge_queries_for(["ancestors.foo"]) # rewrite for ancestors/descendants also work with additional properties assert (str( Query.by("test").rewrite_for_ancestors_descendants([ "ancestors.kind.reported.prop", "test", "a" ])) == 'is("test") {ancestors.kind: all <-default[1:]- is("kind")}') assert ( str( Query.by("test").merge_with( "ancestors.cloud", NavigateUntilRoot, IsTerm(["cloud"])).rewrite_for_ancestors_descendants( ["ancestors.kind.reported.prop", "test", "a"])) == 'is("test") {ancestors.kind: all <-default[1:]- is("kind"), ancestors.cloud: all <-default[1:]- is("cloud")}' )
def test_simplify() -> None: # some_criteria | all => all assert str((IsTerm(["test"]) | AllTerm())) == "all" # some_criteria & all => some_criteria assert str((IsTerm(["test"]) & AllTerm())) == 'is("test")' # also works in nested setup q = Query.by(AllTerm() & ((P("test") == True) & (IsTerm(["test"]) | AllTerm()))) assert (str(q)) == "test == true"
def merge_query_query(ud: UD) -> Query: d = Drawer(ud) nav = d.draw(navigation()) trm = d.draw(term) # merge query need to start with navigation part without additional props parts = [Part(trm), Part(AllTerm(), navigation=nav)] return Query(parts)
def test_merge_term() -> None: next_foo = Query.by( AllTerm()).traverse_in(until=Navigation.Max).filter("foo") query = Query.by( MergeTerm(Query.mk_term("bla"), [MergeQuery("foo123", next_foo)], Query.mk_term("bla"))) assert_round_trip(term_parser, query)
def part_parser() -> Parser: term = yield term_parser.optional() yield whitespace with_clause = yield with_clause_parser.optional() tag = yield tag_parser sort = yield sort_parser.optional() limit = yield limit_parser.optional() nav = yield navigation_parser.optional() if term or sort or limit else navigation_parser term = term if term else AllTerm() reverse = yield reversed_p return Part(term, tag, with_clause, sort if sort else [], limit, nav, reverse)
def combine_fulltext(term: Term) -> Tuple[Term, Term]: if not term.contains_term_type(FulltextTerm): return AllTerm(), term elif isinstance(term, FulltextTerm): return term, AllTerm() elif isinstance(term, CombinedTerm): if ((term.left.contains_term_type(FulltextTerm) or term.right.contains_term_type(FulltextTerm)) and term.op == "or" and term.find_term(lambda x: not isinstance( x, FulltextTerm) and not isinstance(x, CombinedTerm))): # This term can not utilize the search index! return AllTerm(), term left = isinstance(term.left, FulltextTerm) right = isinstance(term.right, FulltextTerm) if left and right: return term, AllTerm() elif left: ft, remaining = combine_fulltext(term.right) return ft.combine(term.op, term.left), remaining elif right: ft, remaining = combine_fulltext(term.left) return ft.combine(term.op, term.right), remaining else: lf, remaining_left = combine_fulltext(term.right) rf, remaining_right = combine_fulltext(term.left) return lf.combine(term.op, rf), remaining_left.combine( term.op, remaining_right) elif isinstance(term, NotTerm): ft, remaining = combine_fulltext(term.term) return NotTerm(ft), remaining if isinstance( remaining, AllTerm) else NotTerm(remaining) elif isinstance(term, MergeTerm): ft, remaining = combine_fulltext(term.pre_filter) return ft, replace(term, pre_filter=remaining) else: raise AttributeError( f"Can not handle term of type: {type(term)} ({term})")
def add_merge_query(mq: MergeQuery, part_result: str) -> None: nonlocal merge_result # make sure the sub query is valid f = mq.query.parts[-1] assert (f.term == AllTerm() and not f.sort and not f.limit and not f.with_clause and not f.tag), "Merge query needs to start with navigation!" merge_crsr = next_crs("merge_part") # make sure the limit only yields one element mg_crs, mg_query = query_string(db, mq.query, query_model, merge_cursor, with_edges, bind_vars, counters, merge_crsr) if mq.only_first: merge_result += ( f"LET {part_result}=FIRST({mg_query} FOR r in {mg_crs} LIMIT 1 RETURN UNSET(r, {unset_props}))" ) else: merge_result += ( f"LET {part_result}=({mg_query} FOR r in {mg_crs} RETURN DISTINCT UNSET(r, {unset_props}))" )
@make_parser def not_term() -> Parser: yield not_p term = yield filter_term_parser return NotTerm(term) # A fulltext term should not read any keywords of the language fulltext_term = quoted_string_p.map(FulltextTerm) literal_list_comma_separated_p = (quoted_string_p | literal_p).sep_by(comma_p, min=1) literal_list_in_square_brackets = l_bracket_p >> literal_list_comma_separated_p << r_bracket_p literal_list_optional_brackets = literal_list_in_square_brackets | literal_list_comma_separated_p is_term = lexeme(string("is") >> lparen_p >> literal_list_optional_brackets << rparen_p).map(IsTerm) id_term = lexeme(string("id") >> lparen_p >> (quoted_string_p | literal_p) << rparen_p).map(IdTerm) match_all_term = lexeme(string("all")).map(lambda _: AllTerm()) leaf_term_p = is_term | id_term | match_all_term | function_term | predicate_term | not_term | fulltext_term bool_op_p = lexeme(string("and") | string("or")) not_p = lexeme(string("not")) @make_parser def combined_term() -> Parser: left = yield simple_term_p result = left while True: op = yield bool_op_p.optional() if op is None: break right = yield simple_term_p result = CombinedTerm(result, op, right)
def part(p: Part, in_cursor: str, part_idx: int) -> Tuple[Part, str, str, str]: query_part = "" filtered_out = "" def filter_statement(current_cursor: str, part_term: Term, limit: Optional[Limit]) -> str: if isinstance(part_term, AllTerm) and limit is None and not p.sort: return current_cursor nonlocal query_part, filtered_out crsr = next_crs() filtered_out = next_crs("filter") md = f"NOT_NULL({crsr}.metadata, {{}})" f_res = f'MERGE({crsr}, {{metadata:MERGE({md}, {{"query_tag": "{p.tag}"}})}})' if p.tag else crsr limited = f" LIMIT {limit.offset}, {limit.length} " if limit else " " sort_by = sort(crsr, p.sort) if p.sort else " " for_stmt = f"FOR {crsr} in {current_cursor} FILTER {term(crsr, part_term)}{sort_by}{limited}" return_stmt = f"RETURN {f_res}" reverse = "REVERSE" if p.reverse_result else "" query_part += f"LET {filtered_out} = {reverse}({for_stmt}{return_stmt})" return filtered_out def with_clause(in_crsr: str, clause: WithClause) -> str: nonlocal query_part # this is the general structure of the with_clause that is created # # FOR cloud in foo FILTER @0 in cloud.kinds # FOR account IN 0..1 OUTBOUND cloud foo_default # OPTIONS { bfs: true, uniqueVertices: 'global' } # FILTER (cloud._key==account._key) or (@1 in account.kinds) # FOR region in 0..1 OUTBOUND account foo_default # OPTIONS { bfs: true, uniqueVertices: 'global' } # FILTER (cloud._key==region._key) or (@2 in region.kinds) # FOR zone in 0..1 OUTBOUND region foo_default # OPTIONS { bfs: true, uniqueVertices: 'global' } # FILTER (cloud._key==zone._key) or (@3 in zone.kinds) # COLLECT l4_cloud = cloud, l4_account=account, l4_region=region WITH COUNT INTO counter3 # FILTER (l4_cloud._key==l4_region._key) or (counter3>=0) # COLLECT l3_cloud = l4_cloud, l3_account=l4_account WITH COUNT INTO counter2 # FILTER (l3_cloud._key==l3_account._key) or (counter2>=0) // ==2 regions # COLLECT l2_cloud = l3_cloud WITH COUNT INTO counter1 # FILTER (counter1>=0) //counter is +1 since the node itself is always bypassed # RETURN ({cloud: l2_cloud._key, count:counter1}) current = next_counter("with_clause") def cursor_in(depth: int) -> str: return f"c{current}_{depth}" l0crsr = cursor_in(0) def traversal_filter(cl: WithClause, in_crs: str, depth: int) -> str: nav = cl.navigation crsr = cursor_in(depth) direction = "OUTBOUND" if nav.direction == Direction.outbound else "INBOUND" unique = "uniqueEdges: 'path'" if with_edges else "uniqueVertices: 'global'" filter_clause = f"({term(crsr, cl.term)})" if cl.term else "true" inner = traversal_filter(cl.with_clause, crsr, depth + 1) if cl.with_clause else "" filter_root = f"({l0crsr}._key=={crsr}._key) or " if depth > 0 else "" edge_type_traversals = f", {direction} ".join( db.edge_collection(et) for et in nav.edge_types) return ( f"FOR {crsr} IN 0..{nav.until} {direction} {in_crs} " f"{edge_type_traversals} OPTIONS {{ bfs: true, {unique} }} " f"FILTER {filter_root}{filter_clause} ") + inner def collect_filter(cl: WithClause, depth: int) -> str: fltr = cl.with_filter if cl.with_clause: collects = ", ".join( f"l{depth-1}_l{i}_res=l{depth}_l{i}_res" for i in range(0, depth)) else: collects = ", ".join(f"l{depth-1}_l{i}_res={cursor_in(i)}" for i in range(0, depth)) if depth == 1: # note: the traversal starts from 0 (only 0 and 1 is allowed) # when we start from 1: increase the count by one to not count the start node # when we start from 0: the start node is expected in the count already filter_term = f"FILTER counter1{fltr.op}{fltr.num + cl.navigation.start}" else: root_key = f"l{depth-1}_l0_res._key==l{depth-1}_l{depth-1}_res._key" filter_term = f"FILTER ({root_key}) or (counter{depth}{fltr.op}{fltr.num})" inner = collect_filter(cl.with_clause, depth + 1) if cl.with_clause else "" return inner + f"COLLECT {collects} WITH COUNT INTO counter{depth} {filter_term} " out = next_crs() query_part += (f"LET {out} =( FOR {l0crsr} in {in_crsr} " + traversal_filter(clause, l0crsr, 1) + collect_filter(clause, 1) + "RETURN l0_l0_res) ") return out def inout(in_crsr: str, start: int, until: int, edge_type: str, direction: str) -> str: nonlocal query_part in_c = next_crs("io_in") out = next_crs("io_out") out_crsr = next_crs("io_crs") link = next_crs("io_link") unique = "uniqueEdges: 'path'" if with_edges else "uniqueVertices: 'global'" link_str = f", {link}" if with_edges else "" dir_bound = "OUTBOUND" if direction == Direction.outbound else "INBOUND" inout_result = ( f"MERGE({out_crsr}, {{_from:{link}._from, _to:{link}._to, _link_id:{link}._id}})" if with_edges else out_crsr) if outer_merge and part_idx == 0: graph_cursor = in_crsr outer_for = "" else: graph_cursor = in_c outer_for = f"FOR {in_c} in {in_crsr} " query_part += ( f"LET {out} =({outer_for}" f"FOR {out_crsr}{link_str} IN {start}..{until} {dir_bound} {graph_cursor} " f"{db.edge_collection(edge_type)} OPTIONS {{ bfs: true, {unique} }} " f"RETURN DISTINCT {inout_result}) ") return out def navigation(in_crsr: str, nav: Navigation) -> str: nonlocal query_part all_walks = [] if nav.direction == Direction.any: for et in nav.edge_types: all_walks.append( inout(in_crsr, nav.start, nav.until, et, Direction.inbound)) for et in nav.maybe_two_directional_outbound_edge_type or nav.edge_types: all_walks.append( inout(in_crsr, nav.start, nav.until, et, Direction.outbound)) else: for et in nav.edge_types: all_walks.append( inout(in_crsr, nav.start, nav.until, et, nav.direction)) if len(all_walks) == 1: return all_walks[0] else: nav_crsr = next_crs() all_walks_combined = ",".join(all_walks) query_part += f"LET {nav_crsr} = UNION_DISTINCT({all_walks_combined})" return nav_crsr if isinstance(p.term, MergeTerm): # do not allow a limit in the prefilter filter_cursor = filter_statement(in_cursor, p.term.pre_filter, None) cursor, merge_part = merge(filter_cursor, p.term.merge) query_part += merge_part post = p.term.post_filter if p.term.post_filter else AllTerm() # always do the post filter in case of sort or limit cursor = filter_statement(cursor, post, p.limit) else: cursor = filter_statement(in_cursor, p.term, p.limit) cursor = with_clause(cursor, p.with_clause) if p.with_clause else cursor cursor = navigation(cursor, p.navigation) if p.navigation else cursor return p, cursor, filtered_out, query_part
"for", "insert", "let", "limit", "remove", "replace", "return", "search", "sort", "update", "upsert", "window", "with", } allowed_first_merge_part = Part(AllTerm()) unset_props = json.dumps(["flat"]) # This list of delimiter is also used in the arango delimiter index. # In case the definition is changed, also the index needs to change! fulltext_delimiter = [" ", "_", "-", "@", ":", "/", "."] fulltext_delimiter_regexp = re.compile("[" + "".join( re.escape(a) for a in fulltext_delimiter) + "]+") # All resolved ancestors attributes have to be treated explicitly. # Queries with /ancestors.kind.xxx have to be treated as merge query parameters. ancestor_merges = { f"ancestors.{p.to_path[1]}" for r in GraphResolver.to_resolve for p in r.resolve if p.to_path[0] == "ancestors" }
async def create_query( self, commands: List[ExecutableCommand], ctx: CLIContext ) -> Tuple[Query, Dict[str, Any], List[ExecutableCommand]]: """ Takes a list of query part commands and combine them to a single executable query command. This process can also introduce new commands that should run after the query is finished. Therefore, a list of executable commands is returned. :param commands: the incoming executable commands, which actions are all instances of SearchCLIPart. :param ctx: the context to execute within. :return: the resulting list of commands to execute. """ # Pass parsed options to execute query # Multiple query commands are possible - so the dict is combined with every parsed query. parsed_options: Dict[str, Any] = {} async def parse_query(query_arg: str) -> Query: nonlocal parsed_options parsed, query_part = ExecuteSearchCommand.parse_known(query_arg) parsed_options = {**parsed_options, **parsed} # section expansion is disabled here: it will happen on the final query after all parts have been combined return await self.dependencies.template_expander.parse_query( "".join(query_part), None, omit_section_expansion=True, **ctx.env ) query: Query = Query.by(AllTerm()) additional_commands: List[ExecutableCommand] = [] # We need to remember the first head/tail, since tail will reverse the sort order first_head_tail_in_a_row: Optional[CLICommand] = None head_tail_keep_order = True for command in commands: part = command.command arg = command.arg if command.arg else "" if isinstance(part, SearchPart): query = query.combine(await parse_query(arg)) elif isinstance(part, PredecessorsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_in(origin, 1, edge) elif isinstance(part, SuccessorsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_out(origin, 1, edge) elif isinstance(part, AncestorsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_in(origin, Navigation.Max, edge) elif isinstance(part, DescendantsPart): origin, edge = PredecessorsPart.parse_args(arg, ctx) query = query.traverse_out(origin, Navigation.Max, edge) elif isinstance(part, AggregatePart): group_vars, group_function_vars = aggregate_parameter_parser.parse(arg) query = replace(query, aggregate=Aggregate(group_vars, group_function_vars)) elif isinstance(part, CountCommand): # count command followed by a query: make it an aggregation # since the output of aggregation is not exactly the same as count # we also add the aggregate_to_count command after the query assert query.aggregate is None, "Can not combine aggregate and count!" group_by = [AggregateVariable(AggregateVariableName(arg), "name")] if arg else [] aggregate = Aggregate(group_by, [AggregateFunction("sum", 1, [], "count")]) # If the query should be explained, we want the output as is if "explain" not in parsed_options: additional_commands.append(self.command("aggregate_to_count", None, ctx)) query = replace(query, aggregate=aggregate) query = query.add_sort(f"{PathRoot}count") elif isinstance(part, HeadCommand): size = HeadCommand.parse_size(arg) limit = query.parts[0].limit or Limit(0, size) if first_head_tail_in_a_row and head_tail_keep_order: query = query.with_limit(Limit(limit.offset, min(limit.length, size))) elif first_head_tail_in_a_row and not head_tail_keep_order: length = min(limit.length, size) query = query.with_limit(Limit(limit.offset + limit.length - length, length)) else: query = query.with_limit(size) elif isinstance(part, TailCommand): size = HeadCommand.parse_size(arg) limit = query.parts[0].limit or Limit(0, size) if first_head_tail_in_a_row and head_tail_keep_order: query = query.with_limit(Limit(limit.offset + max(0, limit.length - size), min(limit.length, size))) elif first_head_tail_in_a_row and not head_tail_keep_order: query = query.with_limit(Limit(limit.offset, min(limit.length, size))) else: head_tail_keep_order = False query = query.with_limit(size) p = query.current_part # the limit might have created a new part - make sure there is a sort order p = p if p.sort else replace(p, sort=DefaultSort) # reverse the sort order -> limit -> reverse the result query.parts[0] = replace(p, sort=[s.reversed() for s in p.sort], reverse_result=True) else: raise AttributeError(f"Do not understand: {part} of type: {class_fqn(part)}") # Remember the first head tail in a row of head tails if isinstance(part, (HeadCommand, TailCommand)): if not first_head_tail_in_a_row: first_head_tail_in_a_row = part else: first_head_tail_in_a_row = None head_tail_keep_order = True # Define default sort order, if not already defined # A sort order is required to always return the result in a deterministic way to the user. # Deterministic order is required for head/tail to work parts = [pt if pt.sort else replace(pt, sort=DefaultSort) for pt in query.parts] query = replace(query, parts=parts) # If the last part is a navigation, we need to add sort which will ingest a new part. with_sort = query.set_sort(DefaultSort) if query.current_part.navigation else query # When all parts are combined, interpret the result on defined section. final_query = with_sort.on_section(ctx.env.get("section", PathRoot)) options = ExecuteSearchCommand.argument_string(parsed_options) query_string = str(final_query) execute_search = self.command("execute_search", options + query_string, ctx) return final_query, parsed_options, [execute_search, *additional_commands]