async def test_query_aggregate(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: agg_query = parse_query( "aggregate(kind: count(identifier) as instances): is(foo)").on_section( "reported") async with await filled_graph_db.search_aggregation( QueryModel(agg_query, foo_model)) as gen: assert [x async for x in gen] == [{ "group": { "kind": "foo" }, "instances": 11 }] agg_combined_var_query = parse_query( 'aggregate("test_{kind}_{some_int}_{does_not_exist}" as kind: count(identifier) as instances): is("foo")' ).on_section("reported") async with await filled_graph_db.search_aggregation( QueryModel(agg_combined_var_query, foo_model)) as g: assert [x async for x in g] == [{ "group": { "kind": "test_foo_0_" }, "instances": 11 }] agg_multi_fn_same_prop = parse_query( 'aggregate(sum(f) as a, max(f) as b): is("bla")').on_section( "reported") async with await filled_graph_db.search_aggregation( QueryModel(agg_multi_fn_same_prop, foo_model)) as g: assert [x async for x in g] == [{"a": 2300, "b": 23}]
def test_special_cases() -> None: with pytest.raises(error.ParseError): # parser was able to read: is(instance) and sort in "stance_cores" parse_query("is(instance) and sort instance_cores") # parser read the reversed option as separate part, so following query became 3 parts q = parse_query( "all sort kind desc limit 1 reversed -default-> all sort kind asc") assert len(q.parts) == 2
def test_query_with_preamble() -> None: parse_query('id("root")') # no preamble # edge type can be defined in preamble q1 = parse_query('(edge_type=delete): id("root") -[0:1]->') assert q1.parts[0].navigation.edge_types == ["delete"] # type: ignore # edge type can be defined via kwargs q2 = parse_query('id("root") -[0:1]->', edge_type="delete") assert q2.parts[0].navigation.edge_types == ["delete"] # type: ignore # aggregation and preamble q3 = parse_query( 'aggregate(region: sum(cpu))(edge_type=delete): id("root") -[0:1]->') assert q3.aggregate.group_by[0].name == AggregateVariableName( "region") # type: ignore assert q3.aggregate.group_func[0].name == "cpu" # type: ignore
def test_ancestors_kind_lookup(foo_model: Model, graph_db: GraphDB) -> None: # 1234 is coerced to a string query = "ancestors.account.reported.name==1234" assert to_query(graph_db, QueryModel(parse_query(query), foo_model))[1] == { "b0": "1234" }
def test_term_contains() -> None: term = parse_query('("test" or "fest") and (p>1 or p<2) {a: <-- is(foo)} not(a>23)').parts[0].term assert term.contains_term_type(IdTerm) is False assert term.contains_term_type(IsTerm) is True assert term.contains_term_type(FulltextTerm) is True assert term.contains_term_type(Predicate) is True assert term.contains_term_type(NotTerm) is True assert term.contains_term_type(FunctionTerm) is False
def test_fulltext_term() -> None: part = parse_query('(a>0 and ("foo" and (b>1 and c>2 and "d")))').parts[0] ft, remaining = fulltext_term_combine(part.term) assert str(remaining) == "((b > 1 and c > 2) and a > 0)" assert str(ft) == '("d" and "foo")' # there are 2 fulltext terms or combined with something else ft, remaining = fulltext_term_combine( parse_query('(a>0 and "b") or ("c" and "d")').parts[0].term) assert ft is None # fulltext index can not be utilized ft, remaining = fulltext_term_combine( parse_query('a>0 {c: <--} "fulltext"').parts[0].term) assert ft is None # fulltext index can not be utilized ft, remaining = fulltext_term_combine( parse_query('a>0 {c: <-- "fulltext" }').parts[0].term) assert ft is None # fulltext index can not be utilized ft, remaining = fulltext_term_combine( parse_query('"a" and "b" or "c" and "d"').parts[0].term) assert str(ft) == '((("a" and "b") or "c") and "d")'
async def parse_query(self, to_parse: str, on_section: Optional[str], *, omit_section_expansion: bool = False, **env: str) -> Query: expanded, _ = await self.expand(to_parse) result = query_parser.parse_query(expanded, **env) return result if omit_section_expansion else result.on_section( on_section)
def test_combine() -> None: query1 = Query.by(P("test") == True).traverse_out().combine( Query.by("foo")).combine(Query.by("bla")) assert str(query1) == 'test == true -default-> (is("foo") and is("bla"))' query2 = (Query.by(P("test") == True).traverse_out().combine( Query.by("foo").traverse_out()).combine( Query.by("bla").traverse_out())) assert str( query2 ) == 'test == true -default-> is("foo") -default-> is("bla") -default->' query3 = (Query.by(P("test") == True).traverse_out().filter( "boo").traverse_out().filter("bar").combine(Query.by("foo")).combine( Query.by("bla"))) assert str( query3 ) == 'test == true -default-> is("boo") -default-> ((is("bar") and is("foo")) and is("bla"))' query4 = Query.by("a").with_limit(10).combine(Query.by("b").with_limit(2)) assert query4.current_part.limit == Limit(0, 2) # minimum is taken with pytest.raises(AttributeError): # can not combine 2 aggregations parse_query("aggregate(sum(1)): is(a)").combine( parse_query("aggregate(sum(1)): is(a)")) with pytest.raises(AttributeError): # can not combine 2 with statements parse_query("is(foo) with(empty, -default->)").combine( parse_query("is(bla) with(empty, -default->)"))
async def test_no_null_if_undefined(graph_db: ArangoGraphDB, foo_model: Model) -> None: await graph_db.wipe() # imported graph should not have any desired or metadata sections graph = create_graph("test", 0) for _, node in graph.nodes(True): del node["desired"] del node["metadata"] await graph_db.merge_graph(graph, foo_model) async with await graph_db.search_list( QueryModel(parse_query("all"), foo_model)) as cursor: async for elem in cursor: assert "reported" in elem assert "desired" not in elem assert "metadata" not in elem
async def test_query_list(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: blas = Query.by("foo", P("identifier") == "9").traverse_out().filter( "bla", P("f") == 23) async with await filled_graph_db.search_list( QueryModel(blas.on_section("reported"), foo_model)) as gen: result = [from_js(x["reported"], Bla) async for x in gen] assert len(result) == 10 foos_or_blas = parse_query("is([foo, bla])") async with await filled_graph_db.search_list( QueryModel(foos_or_blas.on_section("reported"), foo_model)) as gen: result = [x async for x in gen] assert len(result) == 111 # 113 minus 1 graph_root, minus one cloud
def test_on_section() -> None: query = parse_query( "aggregate(foo, bla, bar: sum(a) as a, sum(b) as b, sum(1) as c):" '(cpu > 4 and (mem < 23 or mem < 59)) with(any, <-- name == "test") sort mem asc --> ' "(a < 1 and /metadata.b == 23) sort foo asc") on_section = ( "aggregate(r.foo, r.bla, r.bar: sum(r.a) as a, sum(r.b) as b, sum(1) as c):" '(r.cpu > 4 and (r.mem < 23 or r.mem < 59)) with(any, <-default- r.name == "test") sort r.mem asc -default-> ' "(r.a < 1 and metadata.b == 23) sort r.foo asc") with_section_r = query.on_section("r") # all variables are now prefixed with the section name assert str(with_section_r) == on_section # all variables that are prefixed with the section name have the section name removed -> reverse operation assert str(with_section_r.relative_to_section("r")) == str(query) # a query on section root does not change the query assert str(with_section_r.on_section(PathRoot)) == on_section # a query relative to section root does not change the query assert str(with_section_r.relative_to_section(PathRoot)) == on_section
async def test_query_merge(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: q = parse_query("is(foo) --> is(bla) { " "foo.bar.parents[]: <-[1:]-, " "foo.child: -->, " "walk: <-- -->, " "bla.agg: aggregate(sum(1) as count): <-[0:]- " "}") async with await filled_graph_db.search_list(QueryModel(q, foo_model), with_count=True) as cursor: assert cursor.count() == 100 async for bla in cursor: b = AccessJson(bla) assert b.reported.kind == "bla" assert len(b.foo.bar.parents) == 4 for parent in b.foo.bar.parents: assert parent.reported.kind in ["foo", "cloud", "graph_root"] assert b.walk.reported.kind == "bla" assert b.foo.child == AccessNone() assert b.bla.agg == [{"count": 5}]
async def test_query_with_merge(filled_graph_db: ArangoGraphDB, foo_model: Model) -> None: query = parse_query( '(merge_with_ancestors="foo as foobar,bar"): is("bla")') async with await filled_graph_db.search_list(QueryModel(query, foo_model) ) as cursor: async for bla in cursor: js = AccessJson(bla) assert "bar" in js.reported # key exists assert "bar" in js.desired # key exists assert "bar" in js.metadata # key exists assert js.reported.bar.is_none # bla is not a parent of this node assert js.desired.bar.is_none # bla is not a parent of this node assert js.metadata.bar.is_none # bla is not a parent of this node assert js.reported.foobar is not None # foobar is merged into reported assert js.desired.foobar is not None # foobar is merged into reported assert js.metadata.foobar is not None # foobar is merged into reported # make sure the correct parent is merged (foobar(1) -> bla(1_xxx)) assert js.reported.identifier.startswith( js.reported.foobar.identifier) assert js.reported.identifier.startswith(js.desired.foobar.node_id) assert js.reported.identifier.startswith( js.metadata.foobar.node_id)
async def query(q: str) -> List[Json]: agg_query = parse_query(q) async with await filled_graph_db.search_list( QueryModel(agg_query.on_section("reported"), foo_model)) as cursor: return [bla async for bla in cursor]
async def search(query: str) -> List[JsonElement]: async with await filled_graph_db.search_list( QueryModel(parse_query(query), foo_model)) as cursor: return [elem async for elem in cursor]
def test_marshal_query() -> None: q = Query.by("ec2", P("foo") > 23, P("test") >= "bummer", P("das") < "set") again = parse_query(str(q)) assert str(q) == str(again)
async def assert_result(query: str, nodes: int, edges: int) -> None: q = parse_query(query) graph = await filled_graph_db.search_graph(QueryModel(q, foo_model)) assert len(graph.nodes) == nodes assert len(graph.edges) == edges
def test_escape_property_path(foo_model: Model, graph_db: GraphDB) -> None: raw = "metadata.replace.with.filter.sort.bla==true" query = to_query(graph_db, QueryModel(parse_query(raw), foo_model))[0] # aql keywords are escaped with backslashes assert "m0.metadata.`replace`.`with`.`filter`.`sort`.bla" in query
def query_string(query: str) -> str: query_str, _ = to_query(graph_db, QueryModel(parse_query(query), foo_model)) return query_str
def test_special_queries() -> None: # unquoted date like test_date < @YESTERDAY assert str( parse_query("test_date < 2021-12-09")) == 'test_date < "2021-12-09"'
async def cost(query_str: str) -> EstimatedSearchCost: query = parse_query(query_str) return await query_cost(graph_db, QueryModel(query, foo_model), False)
def test_generated_query(q: Query) -> None: assert str(q) == str(parse_query(str(q)))
def test_aggregation() -> None: q = parse_query( 'aggregate("{a.a}_{a.b}" as a, a.c.d as v: sum(a.c.e) as c): all') assert q.aggregate.property_paths() == {"a.a", "a.b", "a.c.d", "a.c.e"} # type: ignore
def test_rewrite_ancestors_descendants() -> None: # a query without ancestor/descendants is not changed assert str(parse_query( "(a<1 and b>1) or c==3")) == "((a < 1 and b > 1) or c == 3)" # a query with resolved ancestor is not changed assert (str( parse_query('a<1 and ancestors.cloud.reported.name=="test"'). on_section()) == '(a < 1 and ancestors.cloud.reported.name == "test")') # the merge name is interpreted relative to the section assert (str( parse_query("a<1 {test: <-[1:]- is(account)}").on_section("reported") ) == 'reported.a < 1 {reported.test: all <-default[1:]- is("account")}') # the merge name is not interpreted relative to the section, when defined absolute assert (str( parse_query("a<1 {/test: <-[1:]- is(account)}").on_section("reported")) == 'reported.a < 1 {test: all <-default[1:]- is("account")}') # a query with unknown ancestor creates a merge query assert ( str( parse_query('a<1 and ancestors.cloud.reported.kind=="cloud"'). on_section()) == 'a < 1 {ancestors.cloud: all <-default[1:]- is("cloud")} ancestors.cloud.reported.kind == "cloud"' ) # multiple ancestors are put into one merge query assert (str( parse_query( 'a<1 and ancestors.cloud.reported.kind=="c" and ancestors.account.reported.kind=="a"' ).on_section()) == 'a < 1 {ancestors.cloud: all <-default[1:]- is("cloud"), ' 'ancestors.account: all <-default[1:]- is("account")} ' '(ancestors.cloud.reported.kind == "c" and ancestors.account.reported.kind == "a")' ) # existing merge queries are preserved assert ( str( parse_query( 'a<1 {children[]: --> all} ancestors.cloud.reported.kind=="c"' ).on_section()) == 'a < 1 {ancestors.cloud: all <-default[1:]- is("cloud"), children[]: all -default-> all} ' 'ancestors.cloud.reported.kind == "c"') # predefined merge queries are preserved assert (str( parse_query( 'a<1 {ancestors.cloud: --> is(region)} ancestors.cloud.reported.kind=="c"' ).on_section() ) == 'a < 1 {ancestors.cloud: all -default-> is("region")} ancestors.cloud.reported.kind == "c"' ) # This is an example of a horrible query: all entries have to be merged, before a filter can be applied assert ( str(parse_query("(a<1 and b>1) or ancestors.d.c<1").on_section()) == 'all {ancestors.d: all <-default[1:]- is("d")} ((a < 1 and b > 1) or ancestors.d.c < 1)' ) # Test some special examples assert ( str( parse_query("ancestors.d.c<1 and (a<1 or b>1) and /ancestors.a.b>1" ).on_section()) == '(a < 1 or b > 1) {ancestors.d: all <-default[1:]- is("d"), ancestors.a: all <-default[1:]- is("a")} ' "(ancestors.d.c < 1 and ancestors.a.b > 1)") # the independent query terms are always in the pre-filter before the merge is applied assert ( str( parse_query("(a<1 and b>1) and (c<d or /ancestors.d.c<1)"). on_section()) == str( parse_query("(c<d or /ancestors.d.c<1) and (a<1 and b>1)"). on_section()) == '(a < 1 and b > 1) {ancestors.d: all <-default[1:]- is("d")} (c < "d" or ancestors.d.c < 1)' ) # multiple filters to the same kind only create one merge query assert ( str( parse_query( "/ancestors.a.b<1 and ancestors.a.c>1 and ancestors.a.d=3 and ancestors.b.c>1 and a==1" ).on_section()) == 'a == 1 {ancestors.a: all <-default[1:]- is("a"), ancestors.b: all <-default[1:]- is("b")} ' "(((ancestors.a.b < 1 and ancestors.a.c > 1) and ancestors.a.d == 3) and ancestors.b.c > 1)" ) # aggregation queries with ancestors in the group variable trigger a merge assert (str( parse_query( "aggregate(/ancestors.a.reported.name as a: sum(1)): is(volume)"). on_section() ) == 'aggregate(ancestors.a.reported.name as a: sum(1)):is("volume") {ancestors.a: all <-default[1:]- is("a")}' )