def to_esfilter(self, schema): # OR(x) == NOT(AND(NOT(xi) for xi in x)) output = es_not(es_and([ NotOp("not", t).partial_eval().to_esfilter(schema) for t in self.terms ])) return output
def to_es14_filter(self, schema): # OR(x) == NOT(AND(NOT(xi) for xi in x)) output = es_not(es_and([ NotOp("not", t).partial_eval().to_es14_filter(schema) for t in self.terms ])) return output
def to_esfilter(self, schema): if isinstance(self.expr, Variable): cols = schema.leaves(self.expr.var) if not cols: return {"match_all": {}} elif len(cols) == 1: return es_missing(cols[0].es_column) else: return es_and([es_missing(c.es_column) for c in cols]) else: return ScriptOp( "script", self.to_es_script(schema).script(schema)).to_esfilter(schema)
def to_esfilter(self, schema): if isinstance(self.expr, Variable): cols = schema.leaves(self.expr.var) if not cols: return {"match_all": {}} elif len(cols) == 1: return es_missing(cols[0].es_column) else: return es_and([ es_missing(c.es_column) for c in cols ]) else: return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema)
def to_esfilter(self, schema): if not len(self.terms): return {"match_all": {}} else: return es_and([t.to_esfilter(schema) for t in self.terms])
def _normalize(esfilter): """ TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS """ if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: return esfilter # Log.note("from: " + convert.value2json(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter['and']: terms = esfilter['and'] for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): if i0 == i1: continue # SAME, IGNORE # TERM FILTER ALREADY ASSUMES EXISTENCE with suppress_exception: if t0.exists.field != None and t0.exists.field == t1.term.items( )[0][0]: terms[i0] = MATCH_ALL continue # IDENTICAL CAN BE REMOVED with suppress_exception: if t0 == t1: terms[i0] = MATCH_ALL continue # MERGE range FILTER WITH SAME FIELD if i0 > i1: continue # SAME, IGNORE with suppress_exception: f0, tt0 = t0.range.items()[0] f1, tt1 = t1.range.items()[0] if f0 == f1: set_default(terms[i0].range[literal_field(f1)], tt1) terms[i1] = MATCH_ALL output = [] for a in terms: if isinstance(a, (list, set)): from mo_logs import Log Log.error("and clause is not allowed a list inside a list") a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == MATCH_ALL: isDiff = True continue if a == MATCH_NONE: return MATCH_NONE if a['and']: isDiff = True a.isNormal = None output.extend(a['and']) else: a.isNormal = None output.append(a) if not output: return MATCH_ALL elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = es_and(output) continue if esfilter.bool.should: output = [] for a in esfilter.bool.should: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a.bool.should: a.isNormal = None isDiff = True output.extend(a.bool.should) else: a.isNormal = None output.append(a) if not output: return MATCH_NONE elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap({"bool": {"should": output}}) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return MATCH_ALL if esfilter.terms: for k, v in esfilter.terms.items(): if len(v) > 0: if OR(vv == None for vv in v): rest = [vv for vv in v if vv != None] if len(rest) > 0: output = es_or( [es_missing(k), { "terms": { k: rest } }]) else: output = es_missing(k) output.isNormal = True return output else: esfilter.isNormal = True return esfilter return MATCH_NONE if esfilter['not']: _sub = esfilter['not'] sub = _normalize(_sub) if sub == MATCH_NONE: return MATCH_ALL elif sub == MATCH_ALL: return MATCH_NONE elif sub is not _sub: sub.isNormal = None return wrap({"not": sub, "isNormal": True}) else: sub.isNormal = None esfilter.isNormal = True return esfilter
def _normalize(esfilter): """ TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS """ if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: return esfilter # Log.note("from: " + convert.value2json(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter['and']: terms = esfilter['and'] for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): if i0 == i1: continue # SAME, IGNORE # TERM FILTER ALREADY ASSUMES EXISTENCE with suppress_exception: if t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]: terms[i0] = MATCH_ALL continue # IDENTICAL CAN BE REMOVED with suppress_exception: if t0 == t1: terms[i0] = MATCH_ALL continue # MERGE range FILTER WITH SAME FIELD if i0 > i1: continue # SAME, IGNORE with suppress_exception: f0, tt0 = t0.range.items()[0] f1, tt1 = t1.range.items()[0] if f0 == f1: set_default(terms[i0].range[literal_field(f1)], tt1) terms[i1] = MATCH_ALL output = [] for a in terms: if isinstance(a, (list, set)): from mo_logs import Log Log.error("and clause is not allowed a list inside a list") a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == MATCH_ALL: isDiff = True continue if a == MATCH_NONE: return MATCH_NONE if a['and']: isDiff = True a.isNormal = None output.extend(a['and']) else: a.isNormal = None output.append(a) if not output: return MATCH_ALL elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = es_and(output) continue if esfilter.bool.should: output = [] for a in esfilter.bool.should: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a.bool.should: a.isNormal = None isDiff = True output.extend(a.bool.should) else: a.isNormal = None output.append(a) if not output: return MATCH_NONE elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap({"bool": {"should": output}}) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return MATCH_ALL if esfilter.terms: for k, v in esfilter.terms.items(): if len(v) > 0: if OR(vv == None for vv in v): rest = [vv for vv in v if vv != None] if len(rest) > 0: output = es_or([ es_missing(k), {"terms": {k: rest}} ]) else: output = es_missing(k) output.isNormal = True return output else: esfilter.isNormal = True return esfilter return MATCH_NONE if esfilter['not']: _sub = esfilter['not'] sub = _normalize(_sub) if sub == MATCH_NONE: return MATCH_ALL elif sub == MATCH_ALL: return MATCH_NONE elif sub is not _sub: sub.isNormal = None return wrap({"not": sub, "isNormal": True}) else: sub.isNormal = None esfilter.isNormal = True return esfilter
def es_setop(es, query): schema = query.frum.schema es_query, filters = es_query_template(schema.query_path[0]) nested_filter = None set_default(filters[0], query.where.partial_eval().to_es14_filter(schema)) es_query.size = coalesce(query.limit, DEFAULT_LIMIT) es_query.fields = FlatList() selects = wrap([s.copy() for s in listwrap(query.select)]) new_select = FlatList() schema = query.frum.schema # columns = schema.columns # nested_columns = set(c.name for c in columns if c.nested_path[0] != ".") es_query.sort = jx_sort_to_es_sort(query.sort, schema) put_index = 0 for select in selects: # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS if isinstance(select.value, LeavesOp) and isinstance( select.value.term, Variable): term = select.value.term leaves = schema.leaves(term.var) for c in leaves: full_name = concat_field( select.name, relative_field(untype_path(c.name), term.var)) if c.jx_type == NESTED: es_query.fields = ["_source"] new_select.append({ "name": full_name, "value": Variable(c.es_column), "put": { "name": literal_field(full_name), "index": put_index, "child": "." }, "pull": get_pull_source(c.es_column) }) put_index += 1 elif c.nested_path[0] != ".": pass # THE NESTED PARENT WILL CAPTURE THIS else: es_query.fields += [c.es_column] new_select.append({ "name": full_name, "value": Variable(c.es_column), "put": { "name": literal_field(full_name), "index": put_index, "child": "." } }) put_index += 1 elif isinstance(select.value, Variable): s_column = select.value.var # LEAVES OF OBJECT leaves = schema.leaves(s_column) nested_selects = {} if leaves: if s_column == "." or any(c.jx_type == NESTED for c in leaves): # PULL WHOLE NESTED ARRAYS es_query.fields = ["_source"] for c in leaves: if len(c.nested_path) == 1: jx_name = untype_path(c.name) new_select.append({ "name": select.name, "value": Variable(c.es_column), "put": { "name": select.name, "index": put_index, "child": relative_field(jx_name, s_column) }, "pull": get_pull_source(c.es_column) }) else: # PULL ONLY WHAT'S NEEDED for c in leaves: if len(c.nested_path) == 1: jx_name = untype_path(c.name) if c.jx_type == NESTED: es_query.fields = ["_source"] new_select.append({ "name": select.name, "value": Variable(c.es_column), "put": { "name": select.name, "index": put_index, "child": relative_field(jx_name, s_column) }, "pull": get_pull_source(c.es_column) }) else: es_query.fields += [c.es_column] new_select.append({ "name": select.name, "value": Variable(c.es_column), "put": { "name": select.name, "index": put_index, "child": relative_field(jx_name, s_column) } }) else: if not nested_filter: where = filters[0].copy() nested_filter = [where] for k in filters[0].keys(): filters[0][k] = None set_default( filters[0], es_and([where, es_or(nested_filter)])) nested_path = c.nested_path[0] if nested_path not in nested_selects: where = nested_selects[nested_path] = Data() nested_filter += [where] where.nested.path = nested_path where.nested.query.match_all = {} where.nested.inner_hits._source = False where.nested.inner_hits.fields += [c.es_column] child = relative_field( untype_path( relative_field(c.name, schema.query_path[0])), s_column) pull = accumulate_nested_doc( nested_path, Variable( relative_field( s_column, unnest_path(nested_path)))) new_select.append({ "name": select.name, "value": select.value, "put": { "name": select.name, "index": put_index, "child": child }, "pull": pull }) else: nested_selects[ nested_path].nested.inner_hits.fields += [ c.es_column ] else: new_select.append({ "name": select.name, "value": Variable("$dummy"), "put": { "name": select.name, "index": put_index, "child": "." } }) put_index += 1 else: painless = select.value.partial_eval().to_es14_script(schema) es_query.script_fields[literal_field(select.name)] = es_script( painless.script(schema)) new_select.append({ "name": select.name, "pull": jx_expression_to_function("fields." + literal_field(select.name)), "put": { "name": select.name, "index": put_index, "child": "." } }) put_index += 1 for n in new_select: if n.pull: continue elif isinstance(n.value, Variable): if es_query.fields[0] == "_source": es_query.fields = ["_source"] n.pull = get_pull_source(n.value.var) if n.value.var == "_id": n.pull = jx_expression_to_function("_id") else: n.pull = jx_expression_to_function( concat_field("fields", literal_field(n.value.var))) else: Log.error("Do not know what to do") with Timer("call to ES", silent=not DEBUG) as call_timer: data = es_post(es, es_query, query.limit) T = data.hits.hits try: formatter, groupby_formatter, mime_type = format_dispatch[query.format] output = formatter(T, new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)