def to_esfilter(self, schema): if is_op(self.expr, Variable_): cols = schema.leaves(self.expr.var) if not cols: return MATCH_ALL elif len(cols) == 1: return es_missing(first(cols).es_column) else: return es_and([es_missing(c.es_column) for c in cols]) else: return PainlessMissingOp.to_es_script(self, schema).to_esfilter(schema)
def to_esfilter(self, schema): if isinstance(self.expr, Variable): cols = schema.leaves(self.expr.var) if not cols: return {"match_all": {}} elif len(cols) == 1: return es_missing(cols[0].es_column) else: return es_and([ es_missing(c.es_column) for c in cols ]) else: return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema)
def append_query(self, es_query, start): self.start = start for i, v in enumerate(self.fields): nest = wrap({"aggs": { "_match": set_default({"terms": { "field": v, "size": self.domain.limit }}, es_query), "_missing": set_default( {"filter": es_missing(v)}, es_query ) }}) es_query = nest return es_query
def append_query(self, es_query, start): self.start = start for i, v in enumerate(self.fields): nest = wrap({ "aggs": { "_match": set_default( {"terms": { "field": v, "size": self.domain.limit }}, es_query), "_missing": set_default({"filter": es_missing(v)}, es_query) } }) es_query = nest return es_query
def _normalize(esfilter): """ TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS """ if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: return esfilter # Log.note("from: " + convert.value2json(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter.bool.filter: terms = esfilter.bool.filter for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): if i0 == i1: continue # SAME, IGNORE # TERM FILTER ALREADY ASSUMES EXISTENCE with suppress_exception: if (t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]): terms[i0] = MATCH_ALL continue # IDENTICAL CAN BE REMOVED with suppress_exception: if t0 == t1: terms[i0] = MATCH_ALL continue # MERGE range FILTER WITH SAME FIELD if i0 > i1: continue # SAME, IGNORE with suppress_exception: f0, tt0 = t0.range.items()[0] f1, tt1 = t1.range.items()[0] if f0 == f1: set_default(terms[i0].range[literal_field(f1)], tt1) terms[i1] = MATCH_ALL output = [] for a in terms: if is_container(a): from mo_logs import Log Log.error("and clause is not allowed a list inside a list") a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == MATCH_ALL: isDiff = True continue if a == MATCH_NONE: return MATCH_NONE if a.bool.filter: isDiff = True a.isNormal = None output.extend(a.bool.filter) else: a.isNormal = None output.append(a) if not output: return MATCH_ALL elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = es_and(output) continue if esfilter.bool.should: output = [] for a in esfilter.bool.should: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a.bool.should: a.isNormal = None isDiff = True output.extend(a.bool.should) else: a.isNormal = None output.append(a) if not output: return MATCH_NONE elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap(es_or(output)) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return MATCH_ALL if esfilter.terms: for k, v in esfilter.terms.items(): if len(v) > 0: if OR(vv == None for vv in v): rest = [vv for vv in v if vv != None] if len(rest) > 0: output = es_or( [es_missing(k), { "terms": { k: rest } }]) else: output = es_missing(k) output.isNormal = True return output else: esfilter.isNormal = True return esfilter return MATCH_NONE if esfilter.bool.must_not: _sub = esfilter.bool.must_not sub = _normalize(_sub) if sub == MATCH_NONE: return MATCH_ALL elif sub == MATCH_ALL: return MATCH_NONE elif sub is not _sub: sub.isNormal = None return wrap({"bool": {"must_not": sub, "isNormal": True}}) else: sub.isNormal = None esfilter.isNormal = True return esfilter
def _normalize(esfilter): """ TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS """ if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: return esfilter # Log.note("from: " + convert.value2json(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter.bool.filter: terms = esfilter.bool.filter for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): if i0 == i1: continue # SAME, IGNORE # TERM FILTER ALREADY ASSUMES EXISTENCE with suppress_exception: if t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]: terms[i0] = MATCH_ALL continue # IDENTICAL CAN BE REMOVED with suppress_exception: if t0 == t1: terms[i0] = MATCH_ALL continue # MERGE range FILTER WITH SAME FIELD if i0 > i1: continue # SAME, IGNORE with suppress_exception: f0, tt0 = t0.range.items()[0] f1, tt1 = t1.range.items()[0] if f0 == f1: set_default(terms[i0].range[literal_field(f1)], tt1) terms[i1] = MATCH_ALL output = [] for a in terms: if isinstance(a, (list, set)): from mo_logs import Log Log.error("and clause is not allowed a list inside a list") a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == MATCH_ALL: isDiff = True continue if a == MATCH_NONE: return MATCH_NONE if a.bool.filter: isDiff = True a.isNormal = None output.extend(a.bool.filter) else: a.isNormal = None output.append(a) if not output: return MATCH_ALL elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = es_and(output) continue if esfilter.bool.should: output = [] for a in esfilter.bool.should: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a.bool.should: a.isNormal = None isDiff = True output.extend(a.bool.should) else: a.isNormal = None output.append(a) if not output: return MATCH_NONE elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap({"bool": {"should": output}}) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return MATCH_ALL if esfilter.terms: for k, v in esfilter.terms.items(): if len(v) > 0: if OR(vv == None for vv in v): rest = [vv for vv in v if vv != None] if len(rest) > 0: output = es_or([ es_missing(k), {"terms": {k: rest}} ]) else: output = es_missing(k) output.isNormal = True return output else: esfilter.isNormal = True return esfilter return MATCH_NONE if esfilter.bool.must_not: _sub = esfilter.bool.must_not sub = _normalize(_sub) if sub == MATCH_NONE: return MATCH_ALL elif sub == MATCH_ALL: return MATCH_NONE elif sub is not _sub: sub.isNormal = None return wrap({"bool": {"must_not": sub, "isNormal": True}}) else: sub.isNormal = None esfilter.isNormal = True return esfilter