Ejemplo n.º 1
0
def test_merge_query_creation() -> None:
    inbound = Navigation(1, Navigation.Max, direction=Direction.inbound)
    for_foo = Query(
        [Part(IsTerm(["foo"])),
         Part(AllTerm(), navigation=inbound)])
    merge_foo = [MergeQuery("ancestors.foo", for_foo)]

    # merge_foo is created automatically
    assert Part(AllTerm()).merge_queries_for(["ancestors.foo.reported.bla"
                                              ]) == merge_foo
    # merge_foo is already included and not added
    assert Part(MergeTerm(AllTerm(), merge_foo)).merge_queries_for(
        ["ancestors.foo.reported.bla"]) == merge_foo
    # neither ancestors/descendants
    with pytest.raises(Exception):
        Part(AllTerm()).merge_queries_for(["unknown.foo.reported.bla"])
    # no path is given
    with pytest.raises(Exception):
        Part(AllTerm()).merge_queries_for(["ancestors.foo"])

    # rewrite for ancestors/descendants also work with additional properties
    assert (str(
        Query.by("test").rewrite_for_ancestors_descendants([
            "ancestors.kind.reported.prop", "test", "a"
        ])) == 'is("test") {ancestors.kind: all <-default[1:]- is("kind")}')
    assert (
        str(
            Query.by("test").merge_with(
                "ancestors.cloud", NavigateUntilRoot,
                IsTerm(["cloud"])).rewrite_for_ancestors_descendants(
                    ["ancestors.kind.reported.prop", "test", "a"])) ==
        'is("test") {ancestors.kind: all <-default[1:]- is("kind"), ancestors.cloud: all <-default[1:]- is("cloud")}'
    )
Ejemplo n.º 2
0
def test_simplify() -> None:
    # some_criteria | all => all
    assert str((IsTerm(["test"]) | AllTerm())) == "all"
    # some_criteria & all => some_criteria
    assert str((IsTerm(["test"]) & AllTerm())) == 'is("test")'
    # also works in nested setup
    q = Query.by(AllTerm() & ((P("test") == True) & (IsTerm(["test"]) | AllTerm())))
    assert (str(q)) == "test == true"
Ejemplo n.º 3
0
def merge_query_query(ud: UD) -> Query:
    d = Drawer(ud)
    nav = d.draw(navigation())
    trm = d.draw(term)
    # merge query need to start with navigation part without additional props
    parts = [Part(trm), Part(AllTerm(), navigation=nav)]
    return Query(parts)
Ejemplo n.º 4
0
def test_merge_term() -> None:
    next_foo = Query.by(
        AllTerm()).traverse_in(until=Navigation.Max).filter("foo")
    query = Query.by(
        MergeTerm(Query.mk_term("bla"), [MergeQuery("foo123", next_foo)],
                  Query.mk_term("bla")))
    assert_round_trip(term_parser, query)
Ejemplo n.º 5
0
def part_parser() -> Parser:
    term = yield term_parser.optional()
    yield whitespace
    with_clause = yield with_clause_parser.optional()
    tag = yield tag_parser
    sort = yield sort_parser.optional()
    limit = yield limit_parser.optional()
    nav = yield navigation_parser.optional() if term or sort or limit else navigation_parser
    term = term if term else AllTerm()
    reverse = yield reversed_p
    return Part(term, tag, with_clause, sort if sort else [], limit, nav, reverse)
Ejemplo n.º 6
0
 def combine_fulltext(term: Term) -> Tuple[Term, Term]:
     if not term.contains_term_type(FulltextTerm):
         return AllTerm(), term
     elif isinstance(term, FulltextTerm):
         return term, AllTerm()
     elif isinstance(term, CombinedTerm):
         if ((term.left.contains_term_type(FulltextTerm)
              or term.right.contains_term_type(FulltextTerm))
                 and term.op == "or"
                 and term.find_term(lambda x: not isinstance(
                     x, FulltextTerm) and not isinstance(x, CombinedTerm))):
             # This term can not utilize the search index!
             return AllTerm(), term
         left = isinstance(term.left, FulltextTerm)
         right = isinstance(term.right, FulltextTerm)
         if left and right:
             return term, AllTerm()
         elif left:
             ft, remaining = combine_fulltext(term.right)
             return ft.combine(term.op, term.left), remaining
         elif right:
             ft, remaining = combine_fulltext(term.left)
             return ft.combine(term.op, term.right), remaining
         else:
             lf, remaining_left = combine_fulltext(term.right)
             rf, remaining_right = combine_fulltext(term.left)
             return lf.combine(term.op, rf), remaining_left.combine(
                 term.op, remaining_right)
     elif isinstance(term, NotTerm):
         ft, remaining = combine_fulltext(term.term)
         return NotTerm(ft), remaining if isinstance(
             remaining, AllTerm) else NotTerm(remaining)
     elif isinstance(term, MergeTerm):
         ft, remaining = combine_fulltext(term.pre_filter)
         return ft, replace(term, pre_filter=remaining)
     else:
         raise AttributeError(
             f"Can not handle term of type: {type(term)} ({term})")
Ejemplo n.º 7
0
 def add_merge_query(mq: MergeQuery, part_result: str) -> None:
     nonlocal merge_result
     # make sure the sub query is valid
     f = mq.query.parts[-1]
     assert (f.term == AllTerm() and not f.sort and not f.limit
             and not f.with_clause and
             not f.tag), "Merge query needs to start with navigation!"
     merge_crsr = next_crs("merge_part")
     # make sure the limit only yields one element
     mg_crs, mg_query = query_string(db, mq.query, query_model,
                                     merge_cursor, with_edges,
                                     bind_vars, counters, merge_crsr)
     if mq.only_first:
         merge_result += (
             f"LET {part_result}=FIRST({mg_query} FOR r in {mg_crs} LIMIT 1 RETURN UNSET(r, {unset_props}))"
         )
     else:
         merge_result += (
             f"LET {part_result}=({mg_query} FOR r in {mg_crs} RETURN DISTINCT UNSET(r, {unset_props}))"
         )
Ejemplo n.º 8
0
@make_parser
def not_term() -> Parser:
    yield not_p
    term = yield filter_term_parser
    return NotTerm(term)


# A fulltext term should not read any keywords of the language
fulltext_term = quoted_string_p.map(FulltextTerm)
literal_list_comma_separated_p = (quoted_string_p | literal_p).sep_by(comma_p, min=1)
literal_list_in_square_brackets = l_bracket_p >> literal_list_comma_separated_p << r_bracket_p
literal_list_optional_brackets = literal_list_in_square_brackets | literal_list_comma_separated_p
is_term = lexeme(string("is") >> lparen_p >> literal_list_optional_brackets << rparen_p).map(IsTerm)
id_term = lexeme(string("id") >> lparen_p >> (quoted_string_p | literal_p) << rparen_p).map(IdTerm)
match_all_term = lexeme(string("all")).map(lambda _: AllTerm())
leaf_term_p = is_term | id_term | match_all_term | function_term | predicate_term | not_term | fulltext_term
bool_op_p = lexeme(string("and") | string("or"))
not_p = lexeme(string("not"))


@make_parser
def combined_term() -> Parser:
    left = yield simple_term_p
    result = left
    while True:
        op = yield bool_op_p.optional()
        if op is None:
            break
        right = yield simple_term_p
        result = CombinedTerm(result, op, right)
Ejemplo n.º 9
0
    def part(p: Part, in_cursor: str,
             part_idx: int) -> Tuple[Part, str, str, str]:
        query_part = ""
        filtered_out = ""

        def filter_statement(current_cursor: str, part_term: Term,
                             limit: Optional[Limit]) -> str:
            if isinstance(part_term, AllTerm) and limit is None and not p.sort:
                return current_cursor
            nonlocal query_part, filtered_out
            crsr = next_crs()
            filtered_out = next_crs("filter")
            md = f"NOT_NULL({crsr}.metadata, {{}})"
            f_res = f'MERGE({crsr}, {{metadata:MERGE({md}, {{"query_tag": "{p.tag}"}})}})' if p.tag else crsr
            limited = f" LIMIT {limit.offset}, {limit.length} " if limit else " "
            sort_by = sort(crsr, p.sort) if p.sort else " "
            for_stmt = f"FOR {crsr} in {current_cursor} FILTER {term(crsr, part_term)}{sort_by}{limited}"
            return_stmt = f"RETURN {f_res}"
            reverse = "REVERSE" if p.reverse_result else ""
            query_part += f"LET {filtered_out} = {reverse}({for_stmt}{return_stmt})"
            return filtered_out

        def with_clause(in_crsr: str, clause: WithClause) -> str:
            nonlocal query_part
            # this is the general structure of the with_clause that is created
            #
            # FOR cloud in foo FILTER @0 in cloud.kinds
            #    FOR account IN 0..1 OUTBOUND cloud foo_default
            #    OPTIONS { bfs: true, uniqueVertices: 'global' }
            #    FILTER (cloud._key==account._key) or (@1 in account.kinds)
            #        FOR region in 0..1 OUTBOUND account foo_default
            #        OPTIONS { bfs: true, uniqueVertices: 'global' }
            #         FILTER (cloud._key==region._key) or (@2 in region.kinds)
            #             FOR zone in 0..1 OUTBOUND region foo_default
            #             OPTIONS { bfs: true, uniqueVertices: 'global' }
            #             FILTER (cloud._key==zone._key) or (@3 in zone.kinds)
            #         COLLECT l4_cloud = cloud, l4_account=account, l4_region=region WITH COUNT INTO counter3
            #         FILTER (l4_cloud._key==l4_region._key) or (counter3>=0)
            #     COLLECT l3_cloud = l4_cloud, l3_account=l4_account WITH COUNT INTO counter2
            #     FILTER (l3_cloud._key==l3_account._key) or (counter2>=0) // ==2 regions
            # COLLECT l2_cloud = l3_cloud WITH COUNT INTO counter1
            # FILTER (counter1>=0) //counter is +1 since the node itself is always bypassed
            # RETURN ({cloud: l2_cloud._key, count:counter1})
            current = next_counter("with_clause")

            def cursor_in(depth: int) -> str:
                return f"c{current}_{depth}"

            l0crsr = cursor_in(0)

            def traversal_filter(cl: WithClause, in_crs: str,
                                 depth: int) -> str:
                nav = cl.navigation
                crsr = cursor_in(depth)
                direction = "OUTBOUND" if nav.direction == Direction.outbound else "INBOUND"
                unique = "uniqueEdges: 'path'" if with_edges else "uniqueVertices: 'global'"
                filter_clause = f"({term(crsr, cl.term)})" if cl.term else "true"
                inner = traversal_filter(cl.with_clause, crsr, depth +
                                         1) if cl.with_clause else ""
                filter_root = f"({l0crsr}._key=={crsr}._key) or " if depth > 0 else ""
                edge_type_traversals = f", {direction} ".join(
                    db.edge_collection(et) for et in nav.edge_types)
                return (
                    f"FOR {crsr} IN 0..{nav.until} {direction} {in_crs} "
                    f"{edge_type_traversals} OPTIONS {{ bfs: true, {unique} }} "
                    f"FILTER {filter_root}{filter_clause} ") + inner

            def collect_filter(cl: WithClause, depth: int) -> str:
                fltr = cl.with_filter
                if cl.with_clause:
                    collects = ", ".join(
                        f"l{depth-1}_l{i}_res=l{depth}_l{i}_res"
                        for i in range(0, depth))
                else:
                    collects = ", ".join(f"l{depth-1}_l{i}_res={cursor_in(i)}"
                                         for i in range(0, depth))

                if depth == 1:
                    # note: the traversal starts from 0 (only 0 and 1 is allowed)
                    # when we start from 1: increase the count by one to not count the start node
                    # when we start from 0: the start node is expected in the count already
                    filter_term = f"FILTER counter1{fltr.op}{fltr.num + cl.navigation.start}"
                else:
                    root_key = f"l{depth-1}_l0_res._key==l{depth-1}_l{depth-1}_res._key"
                    filter_term = f"FILTER ({root_key}) or (counter{depth}{fltr.op}{fltr.num})"

                inner = collect_filter(cl.with_clause, depth +
                                       1) if cl.with_clause else ""
                return inner + f"COLLECT {collects} WITH COUNT INTO counter{depth} {filter_term} "

            out = next_crs()

            query_part += (f"LET {out} =( FOR {l0crsr} in {in_crsr} " +
                           traversal_filter(clause, l0crsr, 1) +
                           collect_filter(clause, 1) + "RETURN l0_l0_res) ")
            return out

        def inout(in_crsr: str, start: int, until: int, edge_type: str,
                  direction: str) -> str:
            nonlocal query_part
            in_c = next_crs("io_in")
            out = next_crs("io_out")
            out_crsr = next_crs("io_crs")
            link = next_crs("io_link")
            unique = "uniqueEdges: 'path'" if with_edges else "uniqueVertices: 'global'"
            link_str = f", {link}" if with_edges else ""
            dir_bound = "OUTBOUND" if direction == Direction.outbound else "INBOUND"
            inout_result = (
                f"MERGE({out_crsr}, {{_from:{link}._from, _to:{link}._to, _link_id:{link}._id}})"
                if with_edges else out_crsr)
            if outer_merge and part_idx == 0:
                graph_cursor = in_crsr
                outer_for = ""
            else:
                graph_cursor = in_c
                outer_for = f"FOR {in_c} in {in_crsr} "

            query_part += (
                f"LET {out} =({outer_for}"
                f"FOR {out_crsr}{link_str} IN {start}..{until} {dir_bound} {graph_cursor} "
                f"{db.edge_collection(edge_type)} OPTIONS {{ bfs: true, {unique} }} "
                f"RETURN DISTINCT {inout_result}) ")
            return out

        def navigation(in_crsr: str, nav: Navigation) -> str:
            nonlocal query_part
            all_walks = []
            if nav.direction == Direction.any:
                for et in nav.edge_types:
                    all_walks.append(
                        inout(in_crsr, nav.start, nav.until, et,
                              Direction.inbound))
                for et in nav.maybe_two_directional_outbound_edge_type or nav.edge_types:
                    all_walks.append(
                        inout(in_crsr, nav.start, nav.until, et,
                              Direction.outbound))
            else:
                for et in nav.edge_types:
                    all_walks.append(
                        inout(in_crsr, nav.start, nav.until, et,
                              nav.direction))

            if len(all_walks) == 1:
                return all_walks[0]
            else:
                nav_crsr = next_crs()
                all_walks_combined = ",".join(all_walks)
                query_part += f"LET {nav_crsr} = UNION_DISTINCT({all_walks_combined})"
                return nav_crsr

        if isinstance(p.term, MergeTerm):
            # do not allow a limit in the prefilter
            filter_cursor = filter_statement(in_cursor, p.term.pre_filter,
                                             None)
            cursor, merge_part = merge(filter_cursor, p.term.merge)
            query_part += merge_part
            post = p.term.post_filter if p.term.post_filter else AllTerm()
            # always do the post filter in case of sort or limit
            cursor = filter_statement(cursor, post, p.limit)
        else:
            cursor = filter_statement(in_cursor, p.term, p.limit)
        cursor = with_clause(cursor,
                             p.with_clause) if p.with_clause else cursor
        cursor = navigation(cursor, p.navigation) if p.navigation else cursor
        return p, cursor, filtered_out, query_part
Ejemplo n.º 10
0
    "for",
    "insert",
    "let",
    "limit",
    "remove",
    "replace",
    "return",
    "search",
    "sort",
    "update",
    "upsert",
    "window",
    "with",
}

allowed_first_merge_part = Part(AllTerm())
unset_props = json.dumps(["flat"])
# This list of delimiter is also used in the arango delimiter index.
# In case the definition is changed, also the index needs to change!
fulltext_delimiter = [" ", "_", "-", "@", ":", "/", "."]
fulltext_delimiter_regexp = re.compile("[" + "".join(
    re.escape(a) for a in fulltext_delimiter) + "]+")

# All resolved ancestors attributes have to be treated explicitly.
# Queries with /ancestors.kind.xxx have to be treated as merge query parameters.
ancestor_merges = {
    f"ancestors.{p.to_path[1]}"
    for r in GraphResolver.to_resolve for p in r.resolve
    if p.to_path[0] == "ancestors"
}
Ejemplo n.º 11
0
    async def create_query(
        self, commands: List[ExecutableCommand], ctx: CLIContext
    ) -> Tuple[Query, Dict[str, Any], List[ExecutableCommand]]:
        """
        Takes a list of query part commands and combine them to a single executable query command.
        This process can also introduce new commands that should run after the query is finished.
        Therefore, a list of executable commands is returned.
        :param commands: the incoming executable commands, which actions are all instances of SearchCLIPart.
        :param ctx: the context to execute within.
        :return: the resulting list of commands to execute.
        """

        # Pass parsed options to execute query
        # Multiple query commands are possible - so the dict is combined with every parsed query.
        parsed_options: Dict[str, Any] = {}

        async def parse_query(query_arg: str) -> Query:
            nonlocal parsed_options
            parsed, query_part = ExecuteSearchCommand.parse_known(query_arg)
            parsed_options = {**parsed_options, **parsed}
            # section expansion is disabled here: it will happen on the final query after all parts have been combined
            return await self.dependencies.template_expander.parse_query(
                "".join(query_part), None, omit_section_expansion=True, **ctx.env
            )

        query: Query = Query.by(AllTerm())
        additional_commands: List[ExecutableCommand] = []
        # We need to remember the first head/tail, since tail will reverse the sort order
        first_head_tail_in_a_row: Optional[CLICommand] = None
        head_tail_keep_order = True
        for command in commands:
            part = command.command
            arg = command.arg if command.arg else ""
            if isinstance(part, SearchPart):
                query = query.combine(await parse_query(arg))
            elif isinstance(part, PredecessorsPart):
                origin, edge = PredecessorsPart.parse_args(arg, ctx)
                query = query.traverse_in(origin, 1, edge)
            elif isinstance(part, SuccessorsPart):
                origin, edge = PredecessorsPart.parse_args(arg, ctx)
                query = query.traverse_out(origin, 1, edge)
            elif isinstance(part, AncestorsPart):
                origin, edge = PredecessorsPart.parse_args(arg, ctx)
                query = query.traverse_in(origin, Navigation.Max, edge)
            elif isinstance(part, DescendantsPart):
                origin, edge = PredecessorsPart.parse_args(arg, ctx)
                query = query.traverse_out(origin, Navigation.Max, edge)
            elif isinstance(part, AggregatePart):
                group_vars, group_function_vars = aggregate_parameter_parser.parse(arg)
                query = replace(query, aggregate=Aggregate(group_vars, group_function_vars))
            elif isinstance(part, CountCommand):
                # count command followed by a query: make it an aggregation
                # since the output of aggregation is not exactly the same as count
                # we also add the aggregate_to_count command after the query
                assert query.aggregate is None, "Can not combine aggregate and count!"
                group_by = [AggregateVariable(AggregateVariableName(arg), "name")] if arg else []
                aggregate = Aggregate(group_by, [AggregateFunction("sum", 1, [], "count")])
                # If the query should be explained, we want the output as is
                if "explain" not in parsed_options:
                    additional_commands.append(self.command("aggregate_to_count", None, ctx))
                query = replace(query, aggregate=aggregate)
                query = query.add_sort(f"{PathRoot}count")
            elif isinstance(part, HeadCommand):
                size = HeadCommand.parse_size(arg)
                limit = query.parts[0].limit or Limit(0, size)
                if first_head_tail_in_a_row and head_tail_keep_order:
                    query = query.with_limit(Limit(limit.offset, min(limit.length, size)))
                elif first_head_tail_in_a_row and not head_tail_keep_order:
                    length = min(limit.length, size)
                    query = query.with_limit(Limit(limit.offset + limit.length - length, length))
                else:
                    query = query.with_limit(size)
            elif isinstance(part, TailCommand):
                size = HeadCommand.parse_size(arg)
                limit = query.parts[0].limit or Limit(0, size)
                if first_head_tail_in_a_row and head_tail_keep_order:
                    query = query.with_limit(Limit(limit.offset + max(0, limit.length - size), min(limit.length, size)))
                elif first_head_tail_in_a_row and not head_tail_keep_order:
                    query = query.with_limit(Limit(limit.offset, min(limit.length, size)))
                else:
                    head_tail_keep_order = False
                    query = query.with_limit(size)
                    p = query.current_part
                    # the limit might have created a new part - make sure there is a sort order
                    p = p if p.sort else replace(p, sort=DefaultSort)
                    # reverse the sort order -> limit -> reverse the result
                    query.parts[0] = replace(p, sort=[s.reversed() for s in p.sort], reverse_result=True)
            else:
                raise AttributeError(f"Do not understand: {part} of type: {class_fqn(part)}")

            # Remember the first head tail in a row of head tails
            if isinstance(part, (HeadCommand, TailCommand)):
                if not first_head_tail_in_a_row:
                    first_head_tail_in_a_row = part
            else:
                first_head_tail_in_a_row = None
                head_tail_keep_order = True

            # Define default sort order, if not already defined
            # A sort order is required to always return the result in a deterministic way to the user.
            # Deterministic order is required for head/tail to work
            parts = [pt if pt.sort else replace(pt, sort=DefaultSort) for pt in query.parts]
            query = replace(query, parts=parts)

        # If the last part is a navigation, we need to add sort which will ingest a new part.
        with_sort = query.set_sort(DefaultSort) if query.current_part.navigation else query
        # When all parts are combined, interpret the result on defined section.
        final_query = with_sort.on_section(ctx.env.get("section", PathRoot))
        options = ExecuteSearchCommand.argument_string(parsed_options)
        query_string = str(final_query)
        execute_search = self.command("execute_search", options + query_string, ctx)
        return final_query, parsed_options, [execute_search, *additional_commands]