def es_deepop(es, query): schema = query.frum.schema query_path = schema.query_path[0] # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT post_expressions = {} es_query, es_filters = es_query_template(query_path) # SPLIT WHERE CLAUSE BY DEPTH wheres = split_expression_by_depth(query.where, schema) for f, w in zip_longest(es_filters, wheres): script = ES52[AndOp(w)].partial_eval().to_esfilter(schema) set_default(f, script) if not wheres[1]: # INCLUDE DOCS WITH NO NESTED DOCS more_filter = { "bool": { "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)], "must_not": { "nested": { "path": query_path, "query": MATCH_ALL } } } } else: more_filter = None es_query.size = coalesce(query.limit, DEFAULT_LIMIT) map_to_es_columns = schema.map_to_es() query_for_es = query.map(map_to_es_columns) es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema) es_query.stored_fields = [] is_list = is_list_(query.select) selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)]) new_select = FlatList() put_index = 0 for select in selects: if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable): # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS leaves = schema.leaves(select.value.term.var) col_names = set() for c in leaves: if c.nested_path[0] == ".": if c.jx_type == NESTED: continue es_query.stored_fields += [c.es_column] c_name = untype_path(relative_field(c.name, query_path)) col_names.add(c_name) new_select.append({ "name": concat_field(select.name, c_name), "nested_path": c.nested_path[0], "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."}, "pull": get_pull_function(c) }) put_index += 1 # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS for n in new_select: if n.name.startswith("..") and n.name.lstrip(".") not in col_names: n.put.name = n.name = n.name.lstrip(".") col_names.add(n.name) elif is_op(select.value, Variable): net_columns = schema.leaves(select.value.var) if not net_columns: new_select.append({ "name": select.name, "nested_path": ".", "put": {"name": select.name, "index": put_index, "child": "."}, "pull": NULL }) else: for n in net_columns: pull = get_pull_function(n) if n.nested_path[0] == ".": if n.jx_type == NESTED: continue es_query.stored_fields += [n.es_column] # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child for np in n.nested_path: c_name = untype_path(relative_field(n.name, np)) if startswith_field(c_name, select.value.var): # PREFER THE MOST-RELATIVE NAME child = relative_field(c_name, select.value.var) break else: continue new_select.append({ "name": select.name, "pull": pull, "nested_path": n.nested_path[0], "put": { "name": select.name, "index": put_index, "child": child } }) put_index += 1 else: expr = select.value for v in expr.vars(): for c in schema[v.var]: if c.nested_path[0] == ".": es_query.stored_fields += [c.es_column] # else: # Log.error("deep field not expected") pull_name = EXPRESSION_PREFIX + select.name map_to_local = MapToLocal(schema) pull = jx_expression_to_function(pull_name) post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local)) new_select.append({ "name": select.name if is_list else ".", "pull": pull, "value": expr.__data__(), "put": {"name": select.name, "index": put_index, "child": "."} }) put_index += 1 es_query.stored_fields = sorted(es_query.stored_fields) # <COMPLICATED> ES needs two calls to get all documents more = [] def get_more(please_stop): more.append(es_post( es, Data( query=more_filter, stored_fields=es_query.stored_fields ), query.limit )) if more_filter: need_more = Thread.run("get more", target=get_more) with Timer("call to ES") as call_timer: data = es_post(es, es_query, query.limit) # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED def inners(): for t in data.hits.hits: for i in t.inner_hits[literal_field(query_path)].hits.hits: t._inner = i._source for k, e in post_expressions.items(): t[k] = e(t) yield t if more_filter: Thread.join(need_more) for t in more[0].hits.hits: yield t # </COMPLICATED> try: formatter, groupby_formatter, mime_type = format_dispatch[query.format] output = formatter(inners(), new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)
def es_deepop(es, query): schema = query.frum.schema query_path = schema.query_path[0] # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT post_expressions = {} es_query, es_filters = es_query_template(query_path) # SPLIT WHERE CLAUSE BY DEPTH wheres = split_expression_by_depth(query.where, schema) for f, w in zip_longest(es_filters, wheres): script = ES52[AndOp(w)].partial_eval().to_esfilter(schema) set_default(f, script) if not wheres[1]: # INCLUDE DOCS WITH NO NESTED DOCS more_filter = { "bool": { "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)], "must_not": { "nested": { "path": query_path, "query": { "match_all": {} } } } } } else: more_filter = None es_query.size = coalesce(query.limit, DEFAULT_LIMIT) # es_query.sort = jx_sort_to_es_sort(query.sort) map_to_es_columns = schema.map_to_es() # {c.name: c.es_column for c in schema.leaves(".")} query_for_es = query.map(map_to_es_columns) es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema) es_query.stored_fields = [] is_list = is_list_(query.select) selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)]) new_select = FlatList() put_index = 0 for select in selects: if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable): # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS leaves = schema.leaves(select.value.term.var) col_names = set() for c in leaves: if c.nested_path[0] == ".": if c.jx_type == NESTED: continue es_query.stored_fields += [c.es_column] c_name = untype_path(relative_field(c.name, query_path)) col_names.add(c_name) new_select.append({ "name": concat_field(select.name, c_name), "nested_path": c.nested_path[0], "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."}, "pull": get_pull_function(c) }) put_index += 1 # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS for n in new_select: if n.name.startswith("..") and n.name.lstrip(".") not in col_names: n.put.name = n.name = n.name.lstrip(".") col_names.add(n.name) elif is_op(select.value, Variable): net_columns = schema.leaves(select.value.var) if not net_columns: new_select.append({ "name": select.name, "nested_path": ".", "put": {"name": select.name, "index": put_index, "child": "."}, "pull": NULL }) else: for n in net_columns: pull = get_pull_function(n) if n.nested_path[0] == ".": if n.jx_type == NESTED: continue es_query.stored_fields += [n.es_column] # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child for np in n.nested_path: c_name = untype_path(relative_field(n.name, np)) if startswith_field(c_name, select.value.var): child = relative_field(c_name, select.value.var) break else: continue # REMOVED BECAUSE SELECTING INNER PROPERTIES IS NOT ALLOWED # child = relative_field(untype_path(relative_field(n.name, n.nested_path[0])), s.value.var) new_select.append({ "name": select.name, "pull": pull, "nested_path": n.nested_path[0], "put": { "name": select.name, "index": put_index, "child": child } }) put_index += 1 else: expr = select.value for v in expr.vars(): for c in schema[v.var]: if c.nested_path[0] == ".": es_query.stored_fields += [c.es_column] # else: # Log.error("deep field not expected") pull_name = EXPRESSION_PREFIX + select.name map_to_local = MapToLocal(schema) pull = jx_expression_to_function(pull_name) post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local)) new_select.append({ "name": select.name if is_list else ".", "pull": pull, "value": expr.__data__(), "put": {"name": select.name, "index": put_index, "child": "."} }) put_index += 1 # <COMPLICATED> ES needs two calls to get all documents more = [] def get_more(please_stop): more.append(es_post( es, Data( query=more_filter, stored_fields=es_query.stored_fields ), query.limit )) if more_filter: need_more = Thread.run("get more", target=get_more) with Timer("call to ES") as call_timer: data = es_post(es, es_query, query.limit) # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED def inners(): for t in data.hits.hits: for i in t.inner_hits[literal_field(query_path)].hits.hits: t._inner = i._source for k, e in post_expressions.items(): t[k] = e(t) yield t if more_filter: Thread.join(need_more) for t in more[0].hits.hits: yield t # </COMPLICATED> try: formatter, groupby_formatter, mime_type = format_dispatch[query.format] output = formatter(inners(), new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)