def to_esfilter(self, schema): if is_op(self.value, Variable_): var = self.value.var cols = schema.leaves(var) if not cols: Log.error("expecting {{var}} to be a column", var=var) col = first(cols) var = col.es_column if col.jx_type == BOOLEAN: if is_literal(self.superset) and not is_sequence( self.superset.value): return {"term": {var: value2boolean(self.superset.value)}} else: return { "terms": { var: map(value2boolean, self.superset.value) } } else: if is_literal(self.superset) and not is_sequence( self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} else: return Painless[self].to_es_script(schema).to_esfilter(schema)
def to_esfilter(self, schema): if is_op(self.value, Variable_): var = self.value.var cols = schema.leaves(var) if not cols: return MATCH_NONE col = first(cols) var = col.es_column if col.jx_type == BOOLEAN: if is_literal( self.superset) and not is_many(self.superset.value): return {"term": {var: value2boolean(self.superset.value)}} else: return { "terms": { var: map(value2boolean, self.superset.value) } } else: if is_literal( self.superset) and not is_many(self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} else: return Painless[self].to_es_script(schema).to_esfilter(schema)
def partial_eval(self): lhs = ES52[self.lhs].partial_eval() rhs = ES52[self.rhs].partial_eval() if is_literal(lhs): if is_literal(rhs): return FALSE if value_compare(lhs.value, rhs.value) else TRUE else: lhs, rhs = rhs, lhs # FLIP SO WE CAN USE TERMS FILTER if is_literal(rhs) and same_json_type(lhs.type, BOOLEAN): # SPECIAL CASE true == "T" rhs = string2boolean(rhs.value) if rhs is None: return FALSE rhs = Literal(rhs) return EqOp([lhs, rhs]) if lhs.type != OBJECT and rhs.type != OBJECT and not same_json_type( lhs.type, rhs.type): # OBJECT MEANS WE REALLY DO NOT KNOW THE TYPE return FALSE if is_op(lhs, NestedOp): return self.lang[NestedOp(path=lhs.frum, where=AndOp( [lhs.where, EqOp([lhs.select, rhs])]))] return EqOp([lhs, rhs])
def to_sql(self, schema, not_null=False, boolean=False): lhs = SQLang[self.lhs].partial_eval() rhs = SQLang[self.rhs].partial_eval() lhs_sql = lhs.to_sql(schema, not_null=True) rhs_sql = rhs.to_sql(schema, not_null=True) if is_literal(rhs) and lhs_sql[0].sql.b != None and rhs.value in ("T", "F"): rhs_sql = BooleanOp(rhs).to_sql(schema) if is_literal(lhs) and rhs_sql[0].sql.b != None and lhs.value in ("T", "F"): lhs_sql = BooleanOp(lhs).to_sql(schema) if len(lhs_sql) != len(rhs_sql): Log.error("lhs and rhs have different dimensionality!?") acc = [] for l, r in zip(lhs_sql, rhs_sql): for t in "bsnj": if r.sql[t] == None: if l.sql[t] == None: pass else: acc.append(ConcatSQL(l.sql[t], SQL_IS_NULL)) elif l.sql[t] == None: acc.append(ConcatSQL(r.sql[t], SQL_IS_NULL)) else: acc.append( ConcatSQL(sql_iso(l.sql[t]), SQL_EQ, sql_iso(r.sql[t]))) if not acc: return FALSE.to_sql(schema) else: return wrap([{"name": ".", "sql": {"b": JoinSQL(SQL_OR, acc)}}])
def partial_eval(self): lhs = ES52[self.lhs].partial_eval() rhs = ES52[self.rhs].partial_eval() if is_literal(lhs): if is_literal(rhs): return FALSE if value_compare(lhs.value, rhs.value) else TRUE else: return EqOp([rhs, lhs]) # FLIP SO WE CAN USE TERMS FILTER return EqOp([lhs, rhs])
def to_es_script(self, schema, not_null=False, boolean=False, many=True): if is_op(self.expr, Variable_): if self.expr.var == "_id": return EsScript(type=BOOLEAN, expr="false", frum=self, schema=schema) else: columns = schema.leaves(self.expr.var) return ( AndOp( [ EsScript( type=BOOLEAN, expr="doc[" + quote(c.es_column) + "].empty", frum=self, schema=schema, ) for c in columns ] ) .partial_eval() .to_es_script(schema) ) elif is_literal(self.expr): return self.expr.missing().to_es_script(schema) else: return self.expr.missing().partial_eval().to_es_script(schema)
def to_esfilter(self, schema): if is_op(self.value, Variable_) and is_literal( self.find) and self.default is NULL and is_literal( self.start) and self.start.value == 0: columns = [ c for c in schema.leaves(self.value.var) if c.jx_type == STRING ] if len(columns) == 1: return { "regexp": { columns[0].es_column: ".*" + re.escape(self.find.value) + ".*" } } # CONVERT TO SCRIPT, SIMPLIFY, AND THEN BACK TO FILTER self.simplified = False return ES52[Painless[self].partial_eval()].to_esfilter(schema)
def to_es(self, schema): value = self.value if is_op(value, Variable): var = value.var cols = schema.leaves(var) if not cols: return MATCH_NONE col = first(cols) var = col.es_column if is_literal(self.superset): if col.jx_type == BOOLEAN: if is_literal(self.superset) and not is_many( self.superset.value): return { "term": { var: value2boolean(self.superset.value) } } else: return { "terms": { var: list(map(value2boolean, self.superset.value)) } } else: if is_literal(self.superset) and not is_many( self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} elif is_op(self.superset, TupleOp): return (OrOp([EqOp([value, s]) for s in self.superset.terms ]).partial_eval().to_es(schema)) if (is_op(value, NestedOp) and is_literal(self.superset) and is_op(value.select, Variable)): output = (ES52[NestedOp( path=value.path, select=NULL, where=AndOp([value.where, InOp([value.select, self.superset])]), )].exists().partial_eval().to_es(schema)) return output # THE HARD WAY return Painless[self].to_es_script(schema).to_es(schema)
def to_es(self, schema): if not self.suffix: return MATCH_ALL elif is_op(self.expr, Variable_) and is_literal(self.suffix): var = first(schema.leaves(self.expr.var)).es_column return {"regexp": {var: ".*" + string2regexp(self.suffix.value)}} else: return PainlessSuffixOp.to_es_script(self, schema).to_es(schema)
def to_sql(self, schema, not_null=False, boolean=False): default = self.default.to_sql(schema) if len(self.terms) == 0: return default len_sep = LengthOp(self.separator).partial_eval() no_sep = is_literal(len_sep) and len_sep.value == 0 sep = SQLang[self.separator].to_sql(schema)[0].sql.s acc = [] for t in self.terms: t = SQLang[t] missing = t.missing().partial_eval() term = t.to_sql(schema, not_null=True)[0].sql if term.s: term_sql = term.s elif term.n: term_sql = "cast(" + term.n + " as text)" else: term_sql = (SQL_CASE + SQL_WHEN + term.b + SQL_THEN + quote_value("true") + SQL_ELSE + quote_value("false") + SQL_END) if no_sep: sep_term = term_sql else: sep_term = sql_iso(sql_concat_text([sep, term_sql])) if isinstance(missing, TrueOp): acc.append(SQL_EMPTY_STRING) elif missing: acc.append( SQL_CASE + SQL_WHEN + sql_iso(missing.to_sql(schema, boolean=True)[0].sql.b) + SQL_THEN + SQL_EMPTY_STRING + SQL_ELSE + sep_term + SQL_END) else: acc.append(sep_term) if no_sep: expr_ = sql_concat_text(acc) else: expr_ = sql_call( "SUBSTR", sql_concat_text(acc), ConcatSQL( LengthOp(self.separator).to_sql(schema)[0].sql.n, SQL_PLUS, SQL_ONE)) return SQLScript( expr=expr_, data_type=STRING, frum=self, miss=self.missing(), many=False, schema=schema, )
def to_esfilter(self, schema): if is_op(self.value, Variable_) and is_literal(self.find): return { "regexp": { self.value.var: ".*" + string2regexp(self.find.value) + ".*" } } else: return self.to_es_script(schema).script(schema).to_esfilter(schema)
def to_esfilter(self, schema): if is_literal(self.prefix) and not self.prefix.value: return MATCH_ALL expr = self.expr if expr is NULL: return es_not(MATCH_ALL) elif not expr: return MATCH_ALL if is_op(expr, StringOp_): expr = expr.term if is_op(expr, Variable_) and is_literal(self.prefix): var = first(schema.leaves(expr.var)).es_column return {"prefix": {var: self.prefix.value}} else: return PainlessPrefixOp.to_es_script(self, schema).to_esfilter(schema)
def to_esfilter(self, schema): if not self.value: return MATCH_ALL elif is_op(self.value, Variable_) and is_literal(self.prefix): var = first(schema.leaves(self.value.var)).es_column return {"prefix": {var: self.prefix.value}} else: output = PainlessBasicStartsWithOp.self.to_es_script(self, schema) if output is false_script: return MATCH_NONE return output
def to_esfilter(self, schema): if is_literal(self.pattern) and is_op(self.var, Variable_): cols = schema.leaves(self.var.var) if len(cols) == 0: return MATCH_NONE elif len(cols) == 1: return {"regexp": {first(cols).es_column: self.pattern.value}} else: Log.error("regex on not supported ") else: Log.error("regex only accepts a variable and literal pattern")
def to_bq(self, schema, not_null=False, boolean=False): term = BQLang[self.term].partial_eval() if term.type == "boolean": sql = term.to_bq(schema) return sql elif is_literal(term) and term.value in ("T", "F"): if term.value == "T": return TRUE.to_bq(schema) else: return FALSE.to_bq(schema) else: sql = term.exists().partial_eval().to_bq(schema) return sql
def to_sql(self, schema, not_null=False, boolean=False): term = SQLang[self.term].partial_eval() if is_literal(term): val = term.value if isinstance(val, text): sql = quote_value(len(val)) elif isinstance(val, (float, int)): sql = quote_value(len(convert.value2json(val))) else: return Null else: value = term.to_sql(schema, not_null=not_null)[0].sql.s sql = ConcatSQL((SQL("LENGTH"), sql_iso(value))) return wrap([{"name": ".", "sql": {"n": sql}}])
def to_esfilter(self, schema): if is_op(self.value, Variable_): var = self.value.var cols = schema.leaves(var) if not cols: return MATCH_NONE col = first(cols) var = col.es_column if is_literal(self.superset): if col.jx_type == BOOLEAN: if is_literal(self.superset) and not is_many( self.superset.value): return { "term": { var: value2boolean(self.superset.value) } } else: return { "terms": { var: list(map(value2boolean, self.superset.value)) } } else: if is_literal(self.superset) and not is_many( self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} elif is_op(self.superset, TupleOp): return (OrOp([ EqOp([self.value, s]) for s in self.superset.terms ]).partial_eval().to_esfilter(schema)) # THE HARD WAY return Painless[self].to_es_script(schema).to_esfilter(schema)
def to_es(self, schema): if is_literal(self.prefix) and not self.prefix.value: return MATCH_ALL expr = self.expr if expr is NULL: return MATCH_NONE elif not expr: return MATCH_ALL if is_op(expr, StringOp_): expr = expr.term if is_op(expr, Variable_) and is_literal(self.prefix): cols = schema.values(expr.var, exclude_type=INTERNAL) if not cols: return MATCH_NONE acc = [] for col in cols: if col.jx_type == STRING: acc.append({"prefix": {col.es_column: self.prefix.value}}) else: Log.error( 'do not know how to {"prefix":{{column|quote}}} of type' " {{type}}", column=col.name, type=col.jx_type, ) if len(acc) == 0: return MATCH_NONE elif len(acc) == 1: return acc[0] else: return es_or(acc) else: return Painless[self].to_es_script(schema).to_es(schema)
def to_sql(self, schema, not_null=False, boolean=False): prefix = SQLang[self.prefix].partial_eval() if is_literal(prefix): value = SQLang[self.value].partial_eval().to_sql(schema)[0].sql.s prefix = prefix.value if "%" in prefix or "_" in prefix: for r in "\\_%": prefix = prefix.replaceAll(r, "\\" + r) sql = ConcatSQL(value, SQL_LIKE, quote_value(prefix + "%"), SQL_ESCAPE, SQL("\\")) else: sql = ConcatSQL(value, SQL_LIKE, quote_value(prefix + "%")) return wrap([{"name": ".", "sql": {"b": sql}}]) else: return (SqlEqOp([SqlInstrOp([self.value, prefix]), SQL_ONE]).partial_eval().to_sql())
def to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): lhs = self.lhs.var cols = schema.leaves(lhs) if cols: lhs = first(cols).es_column rhs = self.rhs.value if is_list(rhs): if len(rhs) == 1: return {"term": {lhs: rhs[0]}} else: return {"terms": {lhs: rhs}} else: return {"term": {lhs: rhs}} else: return Painless[self].to_es_script(schema).to_esfilter(schema)
def _inequality_to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): cols = schema.leaves(self.lhs.var) if not cols: lhs = self.lhs.var # HAPPENS DURING DEBUGGING, AND MAYBE IN REAL LIFE TOO elif len(cols) == 1: lhs = first(cols).es_column else: Log.error("operator {{op|quote}} does not work on objects", op=self.op) return {"range": {lhs: {self.op: self.rhs.value}}} else: script = Painless[self].to_es_script(schema) if script.miss is not FALSE: Log.error("inequality must be decisive") return {"script": es_script(script.expr)}
def to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): rhs = self.rhs.value lhs = self.lhs.var cols = schema.leaves(lhs) if not cols: Log.warning( "{{col}} does not exist while processing {{expr}}", col=lhs, expr=self.__data__(), ) if is_container(rhs): if len(rhs) == 1: rhs = rhs[0] else: types = Data() # MAP JSON TYPE TO LIST OF LITERALS for r in rhs: types[python_type_to_json_type[r.__class__]] += [r] if len(types) == 1: jx_type, values = first(types.items()) for c in cols: if jx_type == c.jx_type or (jx_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): return {"terms": {c.es_column: values}} return FALSE.to_esfilter(schema) else: return (OrOp([ EqOp([self.lhs, values]) for t, values in types.items() ]).partial_eval().to_esfilter(schema)) for c in cols: if c.jx_type == BOOLEAN: rhs = pull_functions[c.jx_type](rhs) rhs_type = python_type_to_json_type[rhs.__class__] if rhs_type == c.jx_type or (rhs_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): return {"term": {c.es_column: rhs}} return FALSE.to_esfilter(schema) else: return (ES52[CaseOp([ WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), WhenOp(self.rhs.missing(), **{"then": FALSE}), BasicEqOp([self.lhs, self.rhs]), ]).partial_eval()].to_esfilter(schema))
def to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): rhs = self.rhs.value lhs = self.lhs.var cols = schema.leaves(lhs) if is_list(rhs): if len(rhs) == 1: rhs = rhs[0] else: types = Data() # MAP JSON TYPE TO LIST OF LITERALS for r in rhs: types[python_type_to_json_type[rhs.__class__]] += [r] if len(types) == 1: jx_type, values = first(types.items()) for c in cols: if jx_type == c.jx_type: return {"terms": {c.es_column: values}} return FALSE.to_esfilter(schema) else: return (OrOp([ EqOp([self.lhs, values]) for t, values in types.items() ]).partial_eval().to_esfilter(schema)) for c in cols: if c.jx_type == BOOLEAN: rhs = pull_functions[c.jx_type](rhs) if python_type_to_json_type[rhs.__class__] == c.jx_type: return {"term": {c.es_column: rhs}} return FALSE.to_esfilter(schema) else: return (ES52[CaseOp([ WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), WhenOp(self.rhs.missing(), **{"then": FALSE}), BasicEqOp([self.lhs, self.rhs]), ])].partial_eval().to_esfilter(schema))
def to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): columns = schema.values(self.lhs.var) if len(columns) == 0: return MATCH_ALL elif len(columns) == 1: return es_not( {"term": { first(columns).es_column: self.rhs.value }}) else: Log.error("column split to multiple, not handled") else: lhs = self.lhs.partial_eval().to_es_script(schema) rhs = self.rhs.partial_eval().to_es_script(schema) if lhs.many: if rhs.many: return es_not( ScriptOp(("(" + lhs.expr + ").size()==(" + rhs.expr + ").size() && " + "(" + rhs.expr + ").containsAll(" + lhs.expr + ")")).to_esfilter(schema)) else: return es_not( ScriptOp("(" + lhs.expr + ").contains(" + rhs.expr + ")").to_esfilter(schema)) else: if rhs.many: return es_not( ScriptOp("(" + rhs.expr + ").contains(" + lhs.expr + ")").to_esfilter(schema)) else: return es_not( ScriptOp("(" + lhs.expr + ") != (" + rhs.expr + ")").to_esfilter(schema))
def to_es_script(self, schema, not_null=False, boolean=False, many=True): simple_rhs = Painless[self.rhs].partial_eval() lhs = Painless[self.lhs].partial_eval().to_es_script(schema) rhs = simple_rhs.to_es_script(schema) if lhs.many: if rhs.many: return AndOp( [ EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").size()==(" + rhs.expr + ").size()", frum=self, schema=schema, ), EsScript( type=BOOLEAN, expr="(" + rhs.expr + ").containsAll(" + lhs.expr + ")", frum=self, schema=schema, ), ] ).to_es_script(schema) else: if lhs.type == BOOLEAN: if is_literal(simple_rhs) and simple_rhs.value in ( "F", False, ): return EsScript( type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema ) elif is_literal(simple_rhs) and simple_rhs.value in ( "T", True, ): return EsScript( type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").contains(" + rhs.expr + ")", frum=self, schema=schema, ) elif rhs.many: return EsScript( type=BOOLEAN, expr="(" + rhs.expr + ").contains(" + lhs.expr + ")", frum=self, schema=schema, ) else: if lhs.type == BOOLEAN: if is_literal(simple_rhs) and simple_rhs.value in ("F", False): return EsScript( type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema ) elif is_literal(simple_rhs) and simple_rhs.value in ( "T", True, ): return EsScript( type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + "==" + rhs.expr + ")", frum=self, schema=schema, )
def to_esfilter(self, schema): if not is_op(self.lhs, Variable_) or not is_literal(self.rhs): return self.to_es_script(schema).to_esfilter(schema) return es_not({"term": {self.lhs.var: self.rhs.to_esfilter(schema)}})
def to_es_script(self, schema, not_null=False, boolean=False, many=True): simple_rhs = Painless[self.rhs].partial_eval() lhs = Painless[self.lhs].partial_eval().to_es_script(schema) rhs = simple_rhs.to_es_script(schema) if lhs.many: if rhs.many: return AndOp([ EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").size()==(" + rhs.expr + ").size()", frum=self, schema=schema, ), EsScript( type=BOOLEAN, expr="(" + rhs.expr + ").containsAll(" + lhs.expr + ")", frum=self, schema=schema, ), ]).to_es_script(schema) else: if lhs.type == BOOLEAN: if is_literal(simple_rhs) and simple_rhs.value in ("F", False): return EsScript(type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema) elif is_literal(simple_rhs) and simple_rhs.value in ("T", True): return EsScript(type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, ) elif lhs.type == rhs.type: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").contains(" + rhs.expr + ")", frum=self, schema=schema, ) elif lhs.type == NUMBER and rhs.type == INTEGER: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").contains((double)" + rhs.expr + ")", frum=self, schema=schema, ) else: Log.error( "type mismatch not expected while converting to painless" ) elif rhs.many: return EsScript( type=BOOLEAN, expr="(" + rhs.expr + ").contains(" + lhs.expr + ")", frum=self, schema=schema, ) else: if lhs is null_script: if rhs is null_script: return TRUE.to_es_script(schema) return FALSE.to_es_script(schema) elif lhs.type == BOOLEAN: if is_literal(simple_rhs) and simple_rhs.value in ("F", False): return EsScript(type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema) elif is_literal(simple_rhs) and simple_rhs.value in ("T", True): return EsScript(type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, )
def to_es_script(self, schema, not_null=False, boolean=False, many=True): simple_rhs = Painless[self.rhs].partial_eval() lhs = Painless[self.lhs].partial_eval().to_es_script(schema) rhs = simple_rhs.to_es_script(schema) if lhs.many: if rhs.many: return AndOp([ EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").size()==(" + rhs.expr + ").size()", frum=self, schema=schema, ), EsScript( type=BOOLEAN, expr="(" + rhs.expr + ").containsAll(" + lhs.expr + ")", frum=self, schema=schema, ), ]).to_es_script(schema) else: if lhs.type == BOOLEAN: if is_literal(simple_rhs) and simple_rhs.value in ( "F", False, ): return EsScript(type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema) elif is_literal(simple_rhs) and simple_rhs.value in ( "T", True, ): return EsScript(type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ").contains(" + rhs.expr + ")", frum=self, schema=schema, ) elif rhs.many: return EsScript( type=BOOLEAN, expr="(" + rhs.expr + ").contains(" + lhs.expr + ")", frum=self, schema=schema, ) else: if lhs.type == BOOLEAN: if is_literal(simple_rhs) and simple_rhs.value in ("F", False): return EsScript(type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema) elif is_literal(simple_rhs) and simple_rhs.value in ( "T", True, ): return EsScript(type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + ")==(" + rhs.expr + ")", frum=self, schema=schema, ) else: return EsScript( type=BOOLEAN, expr="(" + lhs.expr + "==" + rhs.expr + ")", frum=self, schema=schema, )