def __init__(self, args): self.simplified = False # SOME BASIC VERIFICATION THAT THESE ARE REASONABLE PARAMETERS if is_sequence(args): bad = [t for t in args if t != None and not is_expression(t)] if bad: Log.error("Expecting an expression, not {{bad}}", bad=bad) elif is_data(args): if not all(is_op(k, Variable) and is_literal(v) for k, v in args.items()): Log.error("Expecting an {<variable>: <literal>}") elif args == None: pass else: if not is_expression(args): Log.error("Expecting an expression")
def groupby(data, keys=None, contiguous=False): """ :param data: list of data to group :param keys: (list of) property path name :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES :return: return list of (keys, values) PAIRS, WHERE keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR values IS GENERATOR OF ALL VALUE THAT MATCH keys """ if isinstance(data, Container): return data.groupby(keys) try: if not data: return Null keys = listwrap(keys) if not contiguous: from jx_python import jx data = jx.sort(data, keys) if len(keys) == 0 or len(keys) == 1 and keys[0] == '.': return _groupby_value(data) if any(is_expression(k) for k in keys): raise Log.error("can not handle expressions") accessor = jx_expression_to_function(jx_expression( {"tuple": keys})) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__ return _groupby_keys(data, keys, accessor) except Exception as e: Log.error("Problem grouping", cause=e)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort] }) return output
def where(self, where): if is_data(where): temp = jx_expression_to_function(where) elif is_expression(where): temp = jx_expression_to_function(where) else: temp = where return ListContainer("from "+self.name, filter(temp, self.data), self.schema)
def where(self, where): if is_data(where): temp = jx_expression_to_function(where) elif is_expression(where): temp = jx_expression_to_function(where) else: temp = where return ListContainer("from "+self.name, filter(temp, self.data), self.schema)
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False): """ :param data: :param keys: :param size: :param min_size: :param max_size: :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES :return: return list of (keys, values) PAIRS, WHERE keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR values IS GENERATOR OF ALL VALUE THAT MATCH keys contiguous - """ if isinstance(data, Container): return data.groupby(keys) if size != None or min_size != None or max_size != None: if size != None: max_size = size return groupby_min_max_size(data, min_size=min_size, max_size=max_size) try: keys = listwrap(keys) if not contiguous: from jx_python import jx data = jx.sort(data, keys) if not data: return Null if any(is_expression(k) for k in keys): Log.error("can not handle expressions") else: accessor = jx_expression_to_function(jx_expression({"tuple": keys})) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__ def _output(): start = 0 prev = accessor(data[0]) for i, d in enumerate(data): curr = accessor(d) if curr != prev: group = {} for k, gg in zip(keys, prev): group[k] = gg yield Data(group), data[start:i:] start = i prev = curr group = {} for k, gg in zip(keys, prev): group[k] = gg yield Data(group), data[start::] return _output() except Exception as e: Log.error("Problem grouping", cause=e)
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if is_expression(expr): if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: return compile_expression(Python[expr].to_python()) if (expr != None and not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__")): return expr return compile_expression(Python[jx_expression(expr)].to_python())
def _jx_expression(expr, lang): """ WRAP A JSON EXPRESSION WITH OBJECT REPRESENTATION """ if is_expression(expr): # CONVERT TO lang new_op = lang[expr] if not new_op: # CAN NOT BE FOUND, TRY SOME PARTIAL EVAL return language[expr.get_id()].partial_eval() return expr # return new_op(expr.args) # THIS CAN BE DONE, BUT IT NEEDS MORE CODING, AND I WOULD EXPECT IT TO BE SLOW if expr is None: return TRUE elif is_text(expr): return Variable(expr) elif expr in (True, False, None) or expr == None or is_number(expr): return Literal(expr) elif expr.__class__ is Date: return Literal(expr.unix) elif is_sequence(expr): return lang[TupleOp([_jx_expression(e, lang) for e in expr])] # expr = to_data(expr) try: items = items_(expr) for op, term in items: # ONE OF THESE IS THE OPERATOR full_op = operators.get(op) if full_op: class_ = lang.ops[full_op.get_id()] if class_: return class_.define(expr) # THIS LANGUAGE DOES NOT SUPPORT THIS OPERATOR, GOTO BASE LANGUAGE AND GET THE MACRO class_ = language[op.get_id()] output = class_.define(expr).partial_eval() return _jx_expression(output, lang) else: if not items: return NULL raise Log.error("{{instruction|json}} is not known", instruction=expr) except Exception as e: Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e)
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if is_expression(expr): if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: return compile_expression(Python[expr].to_python()) if ( expr != None and not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__") ): return expr return compile_expression(Python[jx_expression(expr)].to_python())
def edges_get_all_vars(e): output = set() if is_text(e.value): output.add(e.value) if is_expression(e.value): output |= e.value.vars() if e.domain.key: output.add(e.domain.key) if e.domain.where: output |= e.domain.where.vars() if e.range: output |= e.range.min.vars() output |= e.range.max.vars() if e.domain.partitions: for p in e.domain.partitions: if p.where: output |= p.where.vars() return output
def edges_get_all_vars(e): output = set() if is_text(e.value): output.add(e.value) if is_expression(e.value): output |= e.value.vars() if e.domain.key: output.add(e.domain.key) if e.domain.where: output |= e.domain.where.vars() if e.range: output |= e.range.min.vars() output |= e.range.max.vars() if e.domain.partitions: for p in e.domain.partitions: if p.where: output |= p.where.vars() return output
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if expr == None: return Null if is_expression(expr): # ALREADY AN EXPRESSION OBJECT if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: func = compile_expression((expr).to_python()) return JXExpression(func, expr.__data__()) if (not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__")): # THIS APPEARS TO BE A FUNCTION ALREADY return expr expr = jx_expression(expr) func = compile_expression((expr).to_python()) return JXExpression(func, expr)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort==None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({"value": jx_expression(v), "sort": sort_direction[d]}) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort]}) return output
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False): """ :param data: :param keys: :param size: :param min_size: :param max_size: :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES :return: return list of (keys, values) PAIRS, WHERE keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR values IS GENERATOR OF ALL VALUE THAT MATCH keys contiguous - """ if isinstance(data, Container): return data.groupby(keys) if size != None or min_size != None or max_size != None: if size != None: max_size = size return groupby_min_max_size(data, min_size=min_size, max_size=max_size) try: keys = listwrap(keys) if not contiguous: from jx_python import jx data = jx.sort(data, keys) if not data: return Null if any(is_expression(k) for k in keys): Log.error("can not handle expressions") else: accessor = jx_expression_to_function(jx_expression({ "tuple": keys })) # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__ def _output(): start = 0 prev = accessor(data[0]) for i, d in enumerate(data): curr = accessor(d) if curr != prev: group = {} for k, gg in zip(keys, prev): group[k] = gg yield Data(group), data[start:i:] start = i prev = curr group = {} for k, gg in zip(keys, prev): group[k] = gg yield Data(group), data[start::] return _output() except Exception as e: Log.error("Problem grouping", cause=e)