def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return Null output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort] }) return output
def _normalize_range(range): if range == None: return None return Data(min=None if range.min == None else jx_expression(range.min), max=None if range.max == None else jx_expression(range.max), mode=range.mode)
def define(cls, expr): term = expr.get('prefix') if not term: return PrefixOp(NULL, NULL) elif is_data(term): expr, const = first(term.items()) return PrefixOp(Variable(expr), Literal(const)) else: expr, const = term return PrefixOp(jx_expression(expr), jx_expression(const))
def define(cls, expr): """ GENERAL SUPPORT FOR BUILDING EXPRESSIONS FROM JSON EXPRESSIONS OVERRIDE THIS IF AN OPERATOR EXPECTS COMPLICATED PARAMETERS :param expr: Data representing a JSON Expression :return: parse tree """ try: lang = cls.lang items = items_(expr) for item in items: op, term = item full_op = operators.get(op) if full_op: class_ = lang.ops[full_op.get_id()] clauses = { k: jx_expression(v) for k, v in expr.items() if k != op } break else: if not items: return NULL raise Log.error("{{operator|quote}} is not a known operator", operator=expr) if term == None: return class_([], **clauses) elif is_container(term): terms = [jx_expression(t) for t in term] return class_(terms, **clauses) elif is_data(term): items = items_(term) if class_.has_simple_form: if len(items) == 1: k, v = items[0] return class_([Variable(k), Literal(v)], **clauses) else: return class_({k: Literal(v) for k, v in items}, **clauses) else: return class_(_jx_expression(term, lang), **clauses) else: if op in ["literal", "date", "offset"]: return class_(term, **clauses) else: return class_(_jx_expression(term, lang), **clauses) except Exception as e: Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e)
def _normalize_select_no_context(select, schema=None): """ SAME NORMALIZE, BUT NO SOURCE OF COLUMNS """ if is_text(select): select = Data(value=select) else: select = to_data(select) output = select.copy() if not select.value: output.name = coalesce(select.name, select.aggregate) if output.name: output.value = jx_expression(".", schema=schema) elif len(select): Log.error(BAD_SELECT, select=select) else: return Null elif is_text(select.value): if select.value.endswith(".*"): name = select.value[:-2].lstrip(".") output.name = coalesce(select.name, name) output.value = LeavesOp(Variable(name), prefix=coalesce(select.prefix, name)) else: if select.value == ".": output.name = coalesce(select.name, select.aggregate, ".") output.value = jx_expression(select.value, schema=schema) elif select.value == "*": output.name = coalesce(select.name, select.aggregate, ".") output.value = LeavesOp(Variable(".")) else: output.name = coalesce(select.name, select.value.lstrip("."), select.aggregate) output.value = jx_expression(select.value, schema=schema) elif is_number(output.value): if not output.name: output.name = text(output.value) output.value = jx_expression(select.value, schema=schema) else: output.value = jx_expression(select.value, schema=schema) if not output.name: Log.error("expecting select to have a name: {{select}}", select=select) if output.name.endswith(".*"): Log.error("{{name|quote}} is invalid select", name=output.name) output.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") output.default = coalesce(select.default, canonical_aggregates[output.aggregate].default) return output
def define(cls, expr): terms = expr["concat"] if is_data(terms): k, v = first(terms.items()) terms = [Variable(k), Literal(v)] else: terms = [jx_expression(t) for t in terms] return ConcatOp( terms, **{ k: Literal(v) if is_text(v) and not is_variable_name(v) else jx_expression(v) for k, v in expr.items() if k in ["default", "separator"] })
def define(cls, expr): term = expr.between if is_sequence(term): return cls.lang[BetweenOp( value=jx_expression(term[0]), prefix=jx_expression(term[1]), suffix=jx_expression(term[2]), default=jx_expression(expr.default), start=jx_expression(expr.start), )] elif is_data(term): var, vals = term.items()[0] if is_sequence(vals) and len(vals) == 2: return cls.lang[BetweenOp( value=Variable(var), prefix=Literal(vals[0]), suffix=Literal(vals[1]), default=jx_expression(expr.default), start=jx_expression(expr.start), )] else: Log.error( "`between` parameters are expected to be in {var: [prefix, suffix]} form" ) else: Log.error( "`between` parameters are expected to be in {var: [prefix, suffix]} form" )
def _normalize_window(window, schema=None): v = window.value try: expr = jx_expression(v, schema=schema) except Exception: if hasattr(v, "__call__"): expr = v else: expr = ScriptOp(v) return Data(name=coalesce(window.name, window.value), value=expr, edges=[ n for i, e in enumerate(listwrap(window.edges)) for n in _normalize_edge(e, i, limit=None, schema=schema) ], sort=_normalize_sort(window.sort), aggregate=window.aggregate, range=_normalize_range(window.range), where=_normalize_where(window.where, schema=schema))
def _normalize_where(where, schema=None): return jx_expression(where, schema=schema)
def _normalize_group(edge, dim_index, limit, schema=None): """ :param edge: Not normalized groupby :param dim_index: Dimensions are ordered; this is this groupby's index into that order :param schema: for context :return: a normalized groupby """ if is_text(edge): if edge.endswith(".*"): prefix = edge[:-2] if schema: output = list_to_data([ { # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE "name": concat_field( prefix, literal_field( relative_field(untype_path(c.name), prefix))), "put": { "name": literal_field(untype_path(c.name)) }, "value": jx_expression(c.es_column, schema=schema), "allowNulls": True, "domain": { "type": "default" } } for c in schema.leaves(prefix) ]) return output else: return list_to_data([{ "name": untype_path(prefix), "put": { "name": literal_field(untype_path(prefix)) }, "value": LeavesOp(Variable(prefix)), "allowNulls": True, "dim": dim_index, "domain": { "type": "default" } }]) return list_to_data([{ "name": edge, "value": jx_expression(edge, schema=schema), "allowNulls": True, "dim": dim_index, "domain": Domain(type="default", limit=limit) }]) else: edge = to_data(edge) if (edge.domain and edge.domain.type != "default"): Log.error("groupby does not accept complicated domains") if not edge.name and not is_text(edge.value): Log.error("You must name compound edges: {{edge}}", edge=edge) return list_to_data([{ "name": coalesce(edge.name, edge.value), "value": jx_expression(edge.value, schema=schema), "allowNulls": True, "dim": dim_index, "domain": { "type": "default" } }])
def _normalize_edge(edge, dim_index, limit, schema=None): """ :param edge: Not normalized edge :param dim_index: Dimensions are ordered; this is this edge's index into that order :param schema: for context :return: a normalized edge """ if not edge: Log.error("Edge has no value, or expression is empty") elif is_text(edge): if schema: leaves = unwraplist(list(schema.leaves(edge))) if not leaves or is_container(leaves): return [ Data(name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(None, limit)) ] elif isinstance(leaves, Column): return [ Data(name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(domain=leaves, limit=limit, schema=schema)) ] elif is_list(leaves.fields) and len(leaves.fields) == 1: return [ Data(name=leaves.name, value=jx_expression(leaves.fields[0], schema=schema), allowNulls=True, dim=dim_index, domain=leaves.getDomain()) ] else: return [ Data(name=leaves.name, allowNulls=True, dim=dim_index, domain=leaves.getDomain()) ] else: return [ Data(name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=DefaultDomain()) ] else: edge = to_data(edge) if not edge.name and not is_text(edge.value): Log.error("You must name compound and complex edges: {{edge}}", edge=edge) if is_container(edge.value) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain = _normalize_domain(schema=schema) domain.dimension = Data(fields=edge.value) return [ Data(name=edge.name, value=jx_expression(edge.value, schema=schema), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain) ] domain = _normalize_domain(edge.domain, schema=schema) return [ Data(name=coalesce(edge.name, edge.value), value=jx_expression(edge.value, schema=schema), range=_normalize_range(edge.range), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain) ]
def _normalize_select(select, frum, schema=None): """ :param select: ONE SELECT COLUMN :param frum: TABLE TO get_columns() :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS :return: AN ARRAY OF SELECT COLUMNS """ if is_text(select): canonical = select = Data(value=select) else: select = to_data(select) canonical = select.copy() canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") canonical.default = coalesce( select.default, canonical_aggregates[canonical.aggregate].default) if hasattr(unwrap(frum), "_normalize_select"): return frum._normalize_select(canonical) output = [] if len(select) and not select.value: Log.error(BAD_SELECT, select=select) elif not select.value or select.value == ".": output.extend([ set_default( { "name": c.name, "value": jx_expression(c.name, schema=schema) }, canonical) for c in schema.leaves('.') # TOP LEVEL COLUMNS ONLY if len(c.nested_path) == 1 ]) elif is_text(select.value): if select.value.endswith(".*"): canonical.name = coalesce(select.name, ".") value = jx_expression(select[:-2], schema=schema) if not is_op(value, Variable): Log.error("`*` over general expression not supported yet") output.append([ set_default( { "value": LeavesOp(value, prefix=select.prefix), "format": "dict" # MARKUP FOR DECODING }, canonical) for c in frum.get_columns() if c.jx_type not in INTERNAL ]) else: Log.error("do not know what to do") else: canonical.name = coalesce(select.name, select.value, select.aggregate) canonical.value = jx_expression(select.value, schema=schema) output.append(canonical) output = to_data(output) if any(n == None for n in output.name): Log.error("expecting select to have a name: {{select}}", select=select) return output
from mo_dots import Data, FlatList, Null, coalesce, concat_field, is_container, is_data, is_list, listwrap, \ literal_field, relative_field, set_default, unwrap, unwraplist, is_many, dict_to_data, to_data, list_to_data from mo_future import is_text, text from mo_imports import expect from mo_json import INTERNAL from mo_json.typed_encoder import untype_path from mo_logs import Log from mo_math import AND, UNION, is_number Column = expect("Column") BAD_SELECT = "Expecting `value` or `aggregate` in select clause not {{select}}" DEFAULT_LIMIT = 10 MAX_LIMIT = 10000 DEFAULT_SELECT = Data(name="count", value=jx_expression("."), aggregate="count", default=0) class QueryOp(Expression): __slots__ = [ "frum", "select", "edges", "groupby", "where", "window", "sort", "limit", "format", "chunk_size", "destination" ] def __init__(self, frum, select=None, edges=None, groupby=None,
def _normalize_where(where, schema=None): if is_many(where): where = {"and": where} elif not where: where = TRUE return jx_expression(where, schema=schema)