def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort] }) return output
def _normalize_range(range): if range == None: return None return Data(min=None if range.min == None else jx_expression(range.min), max=None if range.max == None else jx_expression(range.max), mode=range.mode)
def _normalize_select(select, frum, schema=None): """ :param select: ONE SELECT COLUMN :param frum: TABLE TO get_columns() :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS :return: AN ARRAY OF SELECT COLUMNS """ if not _Column: _late_import() if is_text(select): canonical = select = Data(value=select) else: select = wrap(select) canonical = select.copy() canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") canonical.default = coalesce( select.default, canonical_aggregates[canonical.aggregate].default) if hasattr(unwrap(frum), "_normalize_select"): return frum._normalize_select(canonical) output = [] if not select.value or select.value == ".": output.extend([ set_default( { "name": c.name, "value": jx_expression(c.name, schema=schema) }, canonical) for c in frum.get_leaves() ]) elif is_text(select.value): if select.value.endswith(".*"): canonical.name = coalesce(select.name, ".") value = jx_expression(select[:-2], schema=schema) if not is_op(value, Variable): Log.error("`*` over general expression not supported yet") output.append([ set_default( { "value": LeavesOp(value, prefix=select.prefix), "format": "dict" # MARKUP FOR DECODING }, canonical) for c in frum.get_columns() if c.jx_type not in STRUCT ]) else: Log.error("do not know what to do") else: canonical.name = coalesce(select.name, select.value, select.aggregate) canonical.value = jx_expression(select.value, schema=schema) output.append(canonical) output = wrap(output) if any(n == None for n in output.name): Log.error("expecting select to have a name: {{select}}", select=select) return output
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if isinstance(s, text_type): output.append({"value": jx_expression(s), "sort": 1}) elif isinstance(s, Expression): output.append({"value": s, "sort": 1}) elif Math.is_integer(s): output.append({"value": OffsetOp("offset", s), "sort": 1}) elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value: for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1) }) return output
def _normalize_group(edge, dim_index, schema=None): """ :param edge: Not normalized groupby :param dim_index: Dimensions are ordered; this is this groupby's index into that order :param schema: for context :return: a normalized groupby """ if isinstance(edge, basestring): if edge.endswith(".*"): prefix = edge[:-1] if schema: output = wrap([{ "name": literal_field(k), "value": jx_expression(k), "allowNulls": True, "domain": { "type": "default" } } for k, cs in schema.items() if k.startswith(prefix) for c in cs if c.type not in STRUCT]) return output else: return wrap([{ "name": edge[:-2], "value": jx_expression(edge[:-2]), "allowNulls": True, "dim": dim_index, "domain": { "type": "default" } }]) return wrap([{ "name": edge, "value": jx_expression(edge), "allowNulls": True, "dim": dim_index, "domain": { "type": "default" } }]) else: edge = wrap(edge) if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None: Log.error("groupby does not accept complicated domains") if not edge.name and not isinstance(edge.value, basestring): Log.error("You must name compound edges: {{edge}}", edge=edge) return wrap([{ "name": coalesce(edge.name, edge.value), "value": jx_expression(edge.value), "allowNulls": True, "dim": dim_index, "domain": { "type": "default" } }])
def _normalize_range(range): if range == None: return None return Data( min=None if range.min == None else jx_expression(range.min), max=None if range.max == None else jx_expression(range.max), mode=range.mode )
def test_concat_serialization(self): expecting = {"concat": ["a", "b", "c"], "separator": {"literal": ", "}} op1 = jx_expression(expecting) output = op1.__data__() self.assertAlmostEqual(output, expecting) expecting = {"concat": {"a": "b"}} op1 = jx_expression(expecting) output = op1.__data__() self.assertAlmostEqual(output, expecting)
def _normalize_select_no_context(select, schema=None): """ SAME NORMALIZE, BUT NO SOURCE OF COLUMNS """ if not _Column: _late_import() if is_text(select): select = Data(value=select) else: select = wrap(select) output = select.copy() if not select.value: output.name = coalesce(select.name, select.aggregate) if output.name: output.value = jx_expression(".", schema=schema) elif len(select): Log.error(BAD_SELECT, select=select) else: return Null elif is_text(select.value): if select.value.endswith(".*"): name = select.value[:-2].lstrip(".") output.name = coalesce(select.name, name) output.value = LeavesOp(Variable(name), prefix=coalesce(select.prefix, name)) else: if select.value == ".": output.name = coalesce(select.name, select.aggregate, ".") output.value = jx_expression(select.value, schema=schema) elif select.value == "*": output.name = coalesce(select.name, select.aggregate, ".") output.value = LeavesOp(Variable(".")) else: output.name = coalesce(select.name, select.value.lstrip("."), select.aggregate) output.value = jx_expression(select.value, schema=schema) elif is_number(output.value): if not output.name: output.name = text(output.value) output.value = jx_expression(select.value, schema=schema) else: output.value = jx_expression(select.value, schema=schema) if not output.name: Log.error("expecting select to have a name: {{select}}", select=select) if output.name.endswith(".*"): Log.error("{{name|quote}} is invalid select", name=output.name) output.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") output.default = coalesce(select.default, canonical_aggregates[output.aggregate].default) return output
def _normalize_group(edge, dim_index, limit, schema=None): """ :param edge: Not normalized groupby :param dim_index: Dimensions are ordered; this is this groupby's index into that order :param schema: for context :return: a normalized groupby """ if is_text(edge): if edge.endswith(".*"): prefix = edge[:-2] if schema: output = wrap([ { # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE "name": concat_field(prefix, literal_field(relative_field(untype_path(c.name), prefix))), "put": {"name": literal_field(untype_path(c.name))}, "value": jx_expression(c.es_column, schema=schema), "allowNulls": True, "domain": {"type": "default"} } for c in schema.leaves(prefix) ]) return output else: return wrap([{ "name": untype_path(prefix), "put": {"name": literal_field(untype_path(prefix))}, "value": LeavesOp(Variable(prefix)), "allowNulls": True, "dim":dim_index, "domain": {"type": "default"} }]) return wrap([{ "name": edge, "value": jx_expression(edge, schema=schema), "allowNulls": True, "dim": dim_index, "domain": Domain(type="default", limit=limit) }]) else: edge = wrap(edge) if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None: Log.error("groupby does not accept complicated domains") if not edge.name and not is_text(edge.value): Log.error("You must name compound edges: {{edge}}", edge= edge) return wrap([{ "name": coalesce(edge.name, edge.value), "value": jx_expression(edge.value, schema=schema), "allowNulls": True, "dim":dim_index, "domain": {"type": "default"} }])
def _normalize_group(edge, dim_index, limit, schema=None): """ :param edge: Not normalized groupby :param dim_index: Dimensions are ordered; this is this groupby's index into that order :param schema: for context :return: a normalized groupby """ if isinstance(edge, text_type): if edge.endswith(".*"): prefix = edge[:-2] if schema: output = wrap([ { "name": concat_field(prefix, literal_field(relative_field(untype_path(c.names["."]), prefix))), "put": {"name": literal_field(untype_path(c.names["."]))}, "value": jx_expression(c.es_column, schema=schema), "allowNulls": True, "domain": {"type": "default"} } for c in schema.leaves(prefix) ]) return output else: return wrap([{ "name": untype_path(prefix), "put": {"name": literal_field(untype_path(prefix))}, "value": jx_expression(prefix, schema=schema), "allowNulls": True, "dim":dim_index, "domain": {"type": "default"} }]) return wrap([{ "name": edge, "value": jx_expression(edge, schema=schema), "allowNulls": True, "dim": dim_index, "domain": Domain(type="default", limit=limit) }]) else: edge = wrap(edge) if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None: Log.error("groupby does not accept complicated domains") if not edge.name and not isinstance(edge.value, text_type): Log.error("You must name compound edges: {{edge}}", edge= edge) return wrap([{ "name": coalesce(edge.name, edge.value), "value": jx_expression(edge.value, schema=schema), "allowNulls": True, "dim":dim_index, "domain": {"type": "default"} }])
def _normalize_select_no_context(select, schema=None): """ SAME NORMALIZE, BUT NO SOURCE OF COLUMNS """ if not _Column: _late_import() if isinstance(select, basestring): select = Data(value=select) else: select = wrap(select) output = select.copy() if not select.value: output.name = coalesce(select.name, select.aggregate) if output.name: output.value = jx_expression(".") else: return output elif isinstance(select.value, basestring): if select.value.endswith(".*"): output.name = coalesce(select.name, select.value[:-2], select.aggregate) output.value = LeavesOp("leaves", Variable(select.value[:-2])) else: if select.value == ".": output.name = coalesce(select.name, select.aggregate, ".") output.value = jx_expression(select.value) elif select.value == "*": output.name = coalesce(select.name, select.aggregate, ".") output.value = LeavesOp("leaves", Variable(".")) else: output.name = coalesce(select.name, select.value, select.aggregate) output.value = jx_expression(select.value) elif isinstance(select.value, (int, float)): if not output.name: output.name = text_type(select.value) output.value = jx_expression(select.value) else: output.value = jx_expression(select.value) if not output.name: Log.error("expecting select to have a name: {{select}}", select=select) if output.name.endswith(".*"): Log.error("{{name|quote}} is invalid select", name=output.name) output.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") output.default = coalesce(select.default, canonical_aggregates[output.aggregate].default) return output
def _normalize_select_no_context(select, schema=None): """ SAME NORMALIZE, BUT NO SOURCE OF COLUMNS """ if not _Column: _late_import() if isinstance(select, text_type): select = Data(value=select) else: select = wrap(select) output = select.copy() if not select.value: output.name = coalesce(select.name, select.aggregate) if output.name: output.value = jx_expression(".", schema=schema) else: return Null elif isinstance(select.value, text_type): if select.value.endswith(".*"): name = select.value[:-2] output.name = coalesce(select.name, name) output.value = LeavesOp("leaves", Variable(name), prefix=coalesce(select.prefix, name)) else: if select.value == ".": output.name = coalesce(select.name, select.aggregate, ".") output.value = jx_expression(select.value, schema=schema) elif select.value == "*": output.name = coalesce(select.name, select.aggregate, ".") output.value = LeavesOp("leaves", Variable(".")) else: output.name = coalesce(select.name, select.value, select.aggregate) output.value = jx_expression(select.value, schema=schema) elif isinstance(select.value, (int, float)): if not output.name: output.name = text_type(select.value) output.value = jx_expression(select.value, schema=schema) else: output.value = jx_expression(select.value, schema=schema) if not output.name: Log.error("expecting select to have a name: {{select}}", select= select) if output.name.endswith(".*"): Log.error("{{name|quote}} is invalid select", name=output.name) output.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") output.default = coalesce(select.default, canonical_aggregates[output.aggregate].default) return output
def where(self, filter): """ WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES :param filter: jx_expression filter :return: list of objects that match """ select = [] column_names = [] for cname, cs in self.columns.items(): cs = [c for c in cs if c.type not in STRUCT and len(c.nested_path) == 1] if len(cs) == 0: continue column_names.append(cname) if len(cs) == 1: select.append(quote_column(c.es_column) + " " + quote_column(c.name)) else: select.append( "coalesce(" + sql_list(quote_column(c.es_column) for c in cs) + ") " + quote_column(c.name) ) result = self.db.query( SQL_SELECT + SQL("\n,").join(select) + SQL_FROM + quote_column(self.sf.fact) + SQL_WHERE + jx_expression(filter).to_sql() ) return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
def delete(self, where): filter = SQLang[jx_expression(where)].to_sql(self.schema) with self.db.transaction() as t: t.execute( ConcatSQL(SQL_DELETE, SQL_FROM, quote_column(self.snowflake.fact_name), SQL_WHERE, filter))
def groupby(data, keys=None, contiguous=False): """ :param data: list of data to group :param keys: (list of) property path name :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES :return: return list of (keys, values) PAIRS, WHERE keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR values IS GENERATOR OF ALL VALUE THAT MATCH keys """ if isinstance(data, Container): return data.groupby(keys) try: if not data: return Null keys = listwrap(keys) if not contiguous: from jx_python import jx data = jx.sort(data, keys) if len(keys) == 0 or len(keys) == 1 and keys[0] == '.': return _groupby_value(data) if any(is_expression(k) for k in keys): raise Log.error("can not handle expressions") accessor = jx_expression_to_function(jx_expression( {"tuple": keys})) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__ return _groupby_keys(data, keys, accessor) except Exception as e: Log.error("Problem grouping", cause=e)
def test_date_literal(self): expr = {"date": {"literal": "today-month"}} from jx_python.expression_compiler import compile_expression result = compile_expression(Python[jx_expression(expr).partial_eval()].to_python())(None) expected = (Date.today()-MONTH).unix self.assertEqual(result, expected)
def wrap(query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) output = QueryOp("from", None) output.format = query.format from jx_python import wrap_from output.frum = wrap_from(query["from"], schema=schema) if not schema and isinstance(output.frum, Schema): schema = output.frum if not schema and hasattr(output.frum, "schema"): schema = output.frum.schema if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = Data(name="count", value=jx_expression("."), aggregate="count", default=0) else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def es_fieldop(es, query): FromES = es09.util.build_es_query(query) select = listwrap(query.select) FromES.query = { "bool": { "query": { "match_all": {} }, "filter": jx_expression(query.where).to_esfilter() } } FromES.size = coalesce(query.limit, 200000) FromES.fields = FlatList() for s in select.value: if s == "*": FromES.fields = None elif isinstance(s, list): FromES.fields.extend(s) elif isinstance(s, Mapping): FromES.fields.extend(s.values()) else: FromES.fields.append(s) FromES.sort = [{ s.field: "asc" if s.sort >= 0 else "desc" } for s in query.sort] data = es_post(es, FromES, query.limit) T = data.hits.hits matricies = {} for s in select: if s.value == "*": matricies[s.name] = Matrix.wrap([t._source for t in T]) elif isinstance(s.value, Mapping): # for k, v in s.value.items(): # matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T]) matricies[s.name] = Matrix.wrap([{ k: unwrap(t.fields).get(v, None) for k, v in s.value.items() } for t in T]) elif isinstance(s.value, list): matricies[s.name] = Matrix.wrap([ tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T ]) elif not s.value: matricies[s.name] = Matrix.wrap( [unwrap(t.fields).get(s.value, None) for t in T]) else: try: matricies[s.name] = Matrix.wrap( [unwrap(t.fields).get(s.value, None) for t in T]) except Exception as e: Log.error("", e) cube = Cube(query.select, query.edges, matricies, frum=query) cube.frum = query return cube
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_properties() # GET IDS OF DOCUMENTS results = self._es.search({ "stored_fields": listwrap(schema._routing.path), "query": {"bool": { "filter": jx_expression(command.where).to_esfilter(Null) }}, "size": 10000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: v = scrub(v) scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_painless(schema).script(schema)}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8') response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self.es.get_properties() # GET IDS OF DOCUMENTS results = self.es.search({ "fields": listwrap(schema._routing.path), "query": {"filtered": { "filter": jx_expression(command.where).to_esfilter(Null) }}, "size": 10000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: v = scrub(v) scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(value2json(c) for c in updates) + "\n") response = self.es.cluster.post( self.es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def where(self, where): if isinstance(where, Mapping): temp = compile_expression(jx_expression(where).to_python()) elif isinstance(where, Expression): temp = compile_expression(where.to_python()) else: temp = where return ListContainer("from "+self.name, filter(temp, self.data), self.schema)
def test_value_not_a_variable(self): result = jx_expression({ "eq": { "result.test": "/XMLHttpRequest/send-entity-body-document.htm" } }).vars() expected = {"result.test"} self.assertEqual(result, expected, "expecting the one and only variable")
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False): """ :param data: :param keys: :param size: :param min_size: :param max_size: :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES :return: return list of (keys, values) PAIRS, WHERE keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR values IS GENERATOR OF ALL VALUE THAT MATCH keys contiguous - """ if isinstance(data, Container): return data.groupby(keys) if size != None or min_size != None or max_size != None: if size != None: max_size = size return groupby_min_max_size(data, min_size=min_size, max_size=max_size) try: keys = listwrap(keys) if not contiguous: from jx_python import jx data = jx.sort(data, keys) if not data: return Null if any(isinstance(k, Expression) for k in keys): Log.error("can not handle expressions") else: accessor = jx_expression_to_function(jx_expression({"tuple": keys})) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__ def _output(): start = 0 prev = accessor(data[0]) for i, d in enumerate(data): curr = accessor(d) if curr != prev: group = {} for k, gg in zip(keys, prev): group[k] = gg yield Data(group), data[start:i:] start = i prev = curr group = {} for k, gg in zip(keys, prev): group[k] = gg yield Data(group), data[start::] return _output() except Exception as e: Log.error("Problem grouping", cause=e)
def test_null_startswith_null(self): filter = jx_expression({ "prefix": [{ "null": {} }, { "literal": "" }] }).partial_eval() expected = TRUE self.assertEqual(filter, expected) self.assertEqual(expected, filter)
def test_null_startswith(self): filter = jx_expression({ "prefix": [{ "null": {} }, { "literal": "something" }] }).partial_eval(JX) expected = FALSE self.assertEqual(filter, expected) self.assertEqual(expected, filter)
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) table = self.get_table(command['update']) es_index = self.es.cluster.get_index(read_only=False, alias=None, kwargs=self.es.settings) schema = table.schema es_filter = jx_expression(command.where).to_esfilter(schema) # GET IDS OF DOCUMENTS query = { "from": command['update'], "select": ["_id"] + [ {"name": k, "value": v} for k, v in command.set.items() ], "where": command.where, "format": "list", "limit": 10000 } results = self.query(query) if results.data: content = "".join( t for r in results.data for _id, row in [(r._id, r)] for _ in [row.__setitem__('_id', None)] # WARNING! DESTRUCTIVE TO row for update in map(value2json, ({"update": {"_id": _id}}, {"doc": row})) for t in (update, "\n") ) response = self.es.cluster.post( es_index.path + "/" + "_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)]) # DELETE BY QUERY, IF NEEDED if '.' in listwrap(command.clear): self.es.delete_record(es_filter) return es_index.flush()
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if isinstance(expr, Expression): if isinstance(expr, ScriptOp) and not isinstance(expr.script, text_type): return expr.script else: return compile_expression(expr.to_python()) if expr != None and not isinstance(expr, (Mapping, list)) and hasattr(expr, "__call__"): return expr return compile_expression(jx_expression(expr).to_python())
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if is_expression(expr): if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: return compile_expression(Python[expr].to_python()) if (expr != None and not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__")): return expr return compile_expression(Python[jx_expression(expr)].to_python())
def test_eq3(self): where = {"eq": {"a": 1, "b": [2, 3]}} result = simplify_esfilter(ES52[jx_expression( where)].partial_eval().to_esfilter(identity_schema)) self.assertEqual( result, es_and([{ "term": { "a": 1 } }, { "terms": { "b": [2, 3] } }]))
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if isinstance(expr, Expression): if isinstance(expr, ScriptOp) and not isinstance(expr.script, text_type): return expr.script else: return compile_expression(expr.to_python()) if expr != None and not isinstance(expr, (Mapping, list)) and hasattr( expr, "__call__"): return expr return compile_expression(jx_expression(expr).to_python())
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort==None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if isinstance(s, text_type): output.append({"value": jx_expression(s), "sort": 1}) elif isinstance(s, Expression): output.append({"value": s, "sort": 1}) elif Math.is_integer(s): output.append({"value": OffsetOp("offset", s), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({"value": jx_expression(v), "sort": sort_direction[d]}) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)}) return output
def _normalize_window(window, schema=None): v = window.value try: expr = jx_expression(v, schema=schema) except Exception: expr = ScriptOp("script", v) return Data(name=coalesce(window.name, window.value), value=expr, edges=[ n for e in listwrap(window.edges) for n in _normalize_edge(e, schema) ], sort=_normalize_sort(window.sort), aggregate=window.aggregate, range=_normalize_range(window.range), where=_normalize_where(window.where, schema=schema))
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if is_expression(expr): if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: return compile_expression(Python[expr].to_python()) if ( expr != None and not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__") ): return expr return compile_expression(Python[jx_expression(expr)].to_python())
def es_deepop(es, mvel, query): FromES = es09.util.build_es_query(query) select = query.edges temp_query = query.copy() temp_query.select = select temp_query.edges = FlatList() FromES.facets.mvel = { "terms": { "script_field": mvel.code(temp_query), "size": query.limit }, "facet_filter": jx_expression(query.where).to_esfilter() } data = es_post(es, FromES, query.limit) rows = unpack_terms(data.facets.mvel, query.edges) terms = transpose(*rows) # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING edges = query.edges for f, e in enumerate(edges): for r in terms[f]: e.domain.getPartByKey(r) e.index = f for p, part in enumerate(e.domain.partitions): part.dataIndex = p e.domain.NULL.dataIndex = len(e.domain.partitions) # MAKE CUBE dims = [len(e.domain.partitions) for e in query.edges] output = Matrix(*dims) # FILL CUBE for r in rows: term_coord = [ e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges) ] output[term_coord] = SUM(output[term_coord], r[-1]) cube = Cube(query.select, query.edges, {query.select.name: output}) cube.frum = query return cube
def _normalize_window(window, schema=None): v = window.value try: expr = jx_expression(v, schema=schema) except Exception: expr = ScriptOp("script", v) return Data( name=coalesce(window.name, window.value), value=expr, edges=[n for e in listwrap(window.edges) for n in _normalize_edge(e, schema)], sort=_normalize_sort(window.sort), aggregate=window.aggregate, range=_normalize_range(window.range), where=_normalize_where(window.where, schema=schema) )
def _normalize_window(window, schema=None): v = window.value try: expr = jx_expression(v, schema=schema) except Exception: if hasattr(v, "__call__"): expr = v else: expr = ScriptOp(v) return Data( name=coalesce(window.name, window.value), value=expr, edges=[n for i, e in enumerate(listwrap(window.edges)) for n in _normalize_edge(e, i, limit=None, schema=schema)], sort=_normalize_sort(window.sort), aggregate=window.aggregate, range=_normalize_range(window.range), where=_normalize_where(window.where, schema=schema) )
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if expr == None: return Null if is_expression(expr): # ALREADY AN EXPRESSION OBJECT if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: func = compile_expression((expr).to_python()) return JXExpression(func, expr.__data__()) if (not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__")): # THIS APPEARS TO BE A FUNCTION ALREADY return expr expr = jx_expression(expr) func = compile_expression((expr).to_python()) return JXExpression(func, expr)
def where(self, filter): """ WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES :param filter: jx_expression filter :return: list of objects that match """ select = [] column_names = [] for c in self.schema.columns: if c.jx_type in STRUCT: continue if len(c.nested_path) != 1: continue column_names.append(c.name) select.append(sql_alias(quote_column(c.es_column), c.name)) where_sql = SQLang[jx_expression(filter)].to_sql(self.schema)[0].sql.b result = self.db.query(ConcatSQL( SQL_SELECT, JoinSQL(SQL_COMMA, select), SQL_FROM, quote_column(self.snowflake.fact_name), SQL_WHERE, where_sql )) return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
def _normalize_edge(edge, dim_index, limit, schema=None): """ :param edge: Not normalized edge :param dim_index: Dimensions are ordered; this is this edge's index into that order :param schema: for context :return: a normalized edge """ if not _Column: _late_import() if edge == None: Log.error("Edge has no value, or expression is empty") elif isinstance(edge, text_type): if schema: leaves = unwraplist(list(schema.leaves(edge))) if not leaves or isinstance(leaves, (list, set)): return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index ) ] elif isinstance(leaves, _Column): return [Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(domain=leaves, limit=limit, schema=schema) )] elif isinstance(leaves.fields, list) and len(leaves.fields) == 1: return [Data( name=leaves.name, value=jx_expression(leaves.fields[0], schema=schema), allowNulls=True, dim=dim_index, domain=leaves.getDomain() )] else: return [Data( name=leaves.name, allowNulls=True, dim=dim_index, domain=leaves.getDomain() )] else: return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index ) ] else: edge = wrap(edge) if not edge.name and not isinstance(edge.value, text_type): Log.error("You must name compound and complex edges: {{edge}}", edge=edge) if isinstance(edge.value, (list, set)) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain = _normalize_domain(schema=schema) domain.dimension = Data(fields=edge.value) return [Data( name=edge.name, value=jx_expression(edge.value, schema=schema), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain )] domain = _normalize_domain(edge.domain, schema=schema) return [Data( name=coalesce(edge.name, edge.value), value=jx_expression(edge.value, schema=schema), range=_normalize_range(edge.range), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain )]
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() self.primitive = True # True IF DOMAIN IS A PRIMITIVE VALUE SET if isinstance(self.key, set): Log.error("problem") if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text_type, Number, tuple))): # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.map = {} self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i if isinstance(p, (int, float)): text_part = text_type(float(p)) # ES CAN NOT HANDLE NUMERIC PARTS self.map[text_part] = part self.order[text_part] = i self.label = coalesce(self.label, "name") self.primitive = True return if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and is_data(desc.partitions[0][desc.key]): # LOOKS LIKE OBJECTS # sorted = desc.partitions[desc.key] self.key = desc.key self.map = UniqueIndex(keys=desc.key) self.order = {p[self.key]: p.dataIndex for p in desc.partitions} self.partitions = desc.partitions elif len(desc.partitions) == 0: # CREATE AN EMPTY DOMAIN self.key = "value" self.map = {} self.order[None] = 0 self.label = coalesce(self.label, "name") return elif desc.key == None: if desc.partitions and all(desc.partitions.where) or all(desc.partitions.esfilter): if not all(desc.partitions.name): Log.error("Expecting all partitions to have a name") self.key = "name" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.partitions.append({ "where": jx_expression(coalesce(p.where, p.esfilter)), "name": p.name, "dataIndex": i }) self.map[p.name] = p self.order[p.name] = i return elif desc.partitions and len(set(desc.partitions.value)-{None}) == len(desc.partitions): # TRY A COMMON KEY CALLED "value". IT APPEARS UNIQUE self.key = "value" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Domains must have keys, or partitions") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name") if hasattr(desc.partitions, "__iter__"): self.partitions = wrap(list(desc.partitions)) else: Log.error("expecting a list of partitions")
def _normalize_where(where, schema=None): if where == None: return TRUE return jx_expression(where, schema=schema)
def DataClass(name, columns, constraint=None): """ Use the DataClass to define a class, but with some extra features: 1. restrict the datatype of property 2. restrict if `required`, or if `nulls` are allowed 3. generic constraints on object properties It is expected that this class become a real class (or be removed) in the long term because it is expensive to use and should only be good for verifying program correctness, not user input. :param name: Name of the class we are creating :param columns: Each columns[i] has properties { "name", - (required) name of the property "required", - False if it must be defined (even if None) "nulls", - True if property can be None, or missing "default", - A default value, if none is provided "type" - a Python datatype } :param constraint: a JSON query Expression for extra constraints (return true if all constraints are met) :return: The class that has been created """ columns = wrap( [ {"name": c, "required": True, "nulls": False, "type": object} if is_text(c) else c for c in columns ] ) slots = columns.name required = wrap( filter(lambda c: c.required and not c.nulls and not c.default, columns) ).name nulls = wrap(filter(lambda c: c.nulls, columns)).name defaults = {c.name: coalesce(c.default, None) for c in columns} types = {c.name: coalesce(c.jx_type, object) for c in columns} code = expand_template( """ from __future__ import unicode_literals from mo_future import is_text, is_binary from collections import Mapping meta = None types_ = {{types}} defaults_ = {{defaults}} class {{class_name}}(Mapping): __slots__ = {{slots}} def _constraint(row, rownum, rows): try: return {{constraint_expr}} except Exception as e: return False def __init__(self, **kwargs): if not kwargs: return for s in {{slots}}: object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None))) missed = {{required}}-set(kwargs.keys()) if missed: Log.error("Expecting properties {"+"{missed}}", missed=missed) illegal = set(kwargs.keys())-set({{slots}}) if illegal: Log.error("{"+"{names}} are not a valid properties", names=illegal) if not self._constraint(0, [self]): Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self) def __getitem__(self, item): return getattr(self, item) def __setitem__(self, item, value): setattr(self, item, value) return self def __setattr__(self, item, value): if item not in {{slots}}: Log.error("{"+"{item|quote}} not valid attribute", item=item) object.__setattr__(self, item, value) if not self._constraint(0, [self]): Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self) def __getattr__(self, item): Log.error("{"+"{item|quote}} not valid attribute", item=item) def __hash__(self): return object.__hash__(self) def __eq__(self, other): if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other: Log.error("expecting to be same object") return self is other def __dict__(self): return {k: getattr(self, k) for k in {{slots}}} def items(self): return ((k, getattr(self, k)) for k in {{slots}}) def __copy__(self): _set = object.__setattr__ output = object.__new__({{class_name}}) {{assign}} return output def __iter__(self): return {{slots}}.__iter__() def __len__(self): return {{len_slots}} def __str__(self): return str({{dict}}) """, { "class_name": name, "slots": "(" + (", ".join(quote(s) for s in slots)) + ")", "required": "{" + (", ".join(quote(s) for s in required)) + "}", "nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}", "defaults": Literal(defaults).to_python(), "len_slots": len(slots), "dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}", "assign": "; ".join( "_set(output, " + quote(s) + ", self." + s + ")" for s in slots ), "types": "{" + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + "}", "constraint_expr": Python[jx_expression(constraint)].to_python(), "constraint": value2json(constraint), }, ) output = _exec(code, name) register_data(output) return output
from jx_base.dimensions import Dimension from jx_base.domains import Domain, SetDomain, DefaultDomain from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE from jx_base.queries import is_variable_name from mo_dots import Data, relative_field, concat_field from mo_dots import coalesce, Null, set_default, unwraplist, literal_field from mo_dots import wrap, unwrap, listwrap from mo_dots.lists import FlatList from mo_future import text_type from mo_json.typed_encoder import untype_path, STRUCT from mo_logs import Log from mo_math import AND, UNION, Math DEFAULT_LIMIT = 10 MAX_LIMIT = 10000 DEFAULT_SELECT = Data(name="count", value=jx_expression("."), aggregate="count", default=0) _jx = None _Column = None def _late_import(): global _jx global _Column from jx_python.meta import Column as _Column from jx_python import jx as _jx _ = _jx _ = _Column
def _normalize_select(select, frum, schema=None): """ :param select: ONE SELECT COLUMN :param frum: TABLE TO get_columns() :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS :return: AN ARRAY OF SELECT COLUMNS """ if not _Column: _late_import() if isinstance(select, text_type): canonical = select = Data(value=select) else: select = wrap(select) canonical = select.copy() canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") canonical.default = coalesce(select.default, canonical_aggregates[canonical.aggregate].default) if hasattr(unwrap(frum), "_normalize_select"): return frum._normalize_select(canonical) output = [] if not select.value or select.value == ".": output.extend([ set_default( { "name": c.name, "value": jx_expression(c.name, schema=schema) }, canonical ) for c in frum.get_leaves() ]) elif isinstance(select.value, text_type): if select.value.endswith(".*"): canonical.name = coalesce(select.name, ".") value = jx_expression(select[:-2], schema=schema) if not isinstance(value, Variable): Log.error("`*` over general expression not supported yet") output.append([ set_default( { "value": LeavesOp("leaves", value, prefix=select.prefix), "format": "dict" # MARKUP FOR DECODING }, canonical ) for c in frum.get_columns() if c.jx_type not in STRUCT ]) else: Log.error("do not know what to do") else: canonical.name = coalesce(select.name, select.value, select.aggregate) canonical.value = jx_expression(select.value, schema=schema) output.append(canonical) output = wrap(output) if any(n==None for n in output.name): Log.error("expecting select to have a name: {{select}}", select=select) return output