def find_container(frum, after): """ :param frum: :return: """ global namespace if not namespace: if not container.config.default.settings: Log.error( "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info" ) namespace = ElasticsearchMetadata(container.config.default.settings) if not frum: Log.error("expecting json query expression with from clause") # FORCE A RELOAD namespace.get_columns(frum, after=after) if is_text(frum): if frum in container_cache: return container_cache[frum] path = split_field(frum) if path[0] == "meta": if path[1] == "columns": return namespace.meta.columns.denormalized() elif path[1] == "tables": return namespace.meta.tables else: fact_table_name = join_field(path[:2]) else: fact_table_name = path[0] type_ = container.config.default.type settings = set_default( { "alias": fact_table_name, "name": frum, "exists": True }, container.config.default.settings, ) settings.type = None output = container.type2container[type_](settings) container_cache[frum] = output return output elif is_data(frum) and frum.type and container.type2container[frum.type]: # TODO: Ensure the frum.name is set, so we capture the deep queries if not frum.type: Log.error("Expecting from clause to have a 'type' property") return container.type2container[frum.type](frum.settings) elif is_data(frum) and (frum["from"] or is_container(frum["from"])): from jx_base.query import QueryOp return QueryOp.wrap(frum) elif is_container(frum): return ListContainer("test_list", frum) else: return frum
def _map_term_using_schema(master, path, term, schema_edges): """ IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM """ output = FlatList() for k, v in term.items(): dimension = schema_edges[k] if isinstance(dimension, Dimension): domain = dimension.getDomain() if dimension.fields: if is_data(dimension.fields): # EXPECTING A TUPLE for local_field, es_field in dimension.fields.items(): local_value = v[local_field] if local_value == None: output.append({"missing": {"field": es_field}}) else: output.append({"term": {es_field: local_value}}) continue if len(dimension.fields) == 1 and is_variable_name(dimension.fields[0]): # SIMPLE SINGLE-VALUED FIELD if domain.getPartByKey(v) is domain.NULL: output.append({"missing": {"field": dimension.fields[0]}}) else: output.append({"term": {dimension.fields[0]: v}}) continue if AND(is_variable_name(f) for f in dimension.fields): # EXPECTING A TUPLE if not isinstance(v, tuple): Log.error("expecing {{name}}={{value}} to be a tuple", name= k, value= v) for i, f in enumerate(dimension.fields): vv = v[i] if vv == None: output.append({"missing": {"field": f}}) else: output.append({"term": {f: vv}}) continue if len(dimension.fields) == 1 and is_variable_name(dimension.fields[0]): if domain.getPartByKey(v) is domain.NULL: output.append({"missing": {"field": dimension.fields[0]}}) else: output.append({"term": {dimension.fields[0]: v}}) continue if domain.partitions: part = domain.getPartByKey(v) if part is domain.NULL or not part.esfilter: Log.error("not expected to get NULL") output.append(part.esfilter) continue else: Log.error("not expected") elif is_data(v): sub = _map_term_using_schema(master, path + [k], v, schema_edges[k]) output.append(sub) continue output.append({"term": {k: v}}) return {"and": output}
def w_bound_method(*given_args, **given_kwargs): if (len(given_args) == 2 and len(given_kwargs) == 0 and is_data(given_args[1])): # ASSUME SECOND UNNAMED PARAM IS kwargs a, k = params_pack( params, defaults, given_args[1], {params[0]: given_args[0]}, given_kwargs, ) elif kwargs in given_kwargs and is_data(given_kwargs[kwargs]): # PUT args INTO given_kwargs a, k = params_pack( params, defaults, given_kwargs[kwargs], dict(zip(params, given_args)), given_kwargs, ) else: a, k = params_pack(params, defaults, dict(zip(params, given_args)), given_kwargs) try: return func(*a, **k) except TypeError as e: tb = getattr(e, "__traceback__", None) if tb is not None: trace = _parse_traceback(tb) else: trace = get_traceback(0) raise_error(e, a, k)
def typed_encode(self, record): """ :param record: expecting id and value properties :return: dict with id and json properties """ try: value = record.get('value') if "json" in record: value = json2value(record["json"]) elif is_data(value) or value != None: pass else: from mo_logs import Log raise Log.error( "Expecting every record given to have \"value\" or \"json\" property" ) _buffer = UnicodeBuilder(1024) net_new_properties = [] path = [] if is_data(value): given_id = self.get_id(value) if given_id != None and not isinstance(given_id, text): given_id = value2json(given_id) value['_id'] = None version = self.get_version(value) else: given_id = None version = None if given_id: record_id = record.get('id') if record_id and record_id != given_id: from mo_logs import Log raise Log.error( "expecting {{property}} of record ({{record_id|quote}}) to match one given ({{given|quote}})", property=self.id_info, record_id=record_id, given=given_id) else: record_id = record.get('id') if record_id: given_id = record_id else: given_id = random_id() typed_encode(value, self.schema, path, net_new_properties, _buffer) json = _buffer.build() return given_id, version, json except Exception as e: # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS from mo_logs import Log Log.error("Serialization of JSON problems", cause=e)
def scrub_literal(candidate): # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL if all(isinstance(r, number_types) for r in candidate): pass elif all( isinstance(r, number_types) or (is_data(r) and "literal" in r.keys()) for r in candidate ): candidate = {"literal": [r["literal"] if is_data(r) else r for r in candidate]} return candidate
def _where_terms(master, where, schema): """ USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ if is_data(where): if where.term: # MAP TERM try: output = _map_term_using_schema(master, [], where.term, schema.edges) return output except Exception as e: Log.error("programmer problem?", e) elif where.terms: # MAP TERM output = FlatList() for k, v in where.terms.items(): if not is_container(v): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: if is_text(edge): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: domain = edge.getDomain() except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields if is_data(fields): or_agg = [] for vv in v: and_agg = [] for local_field, es_field in fields.items(): vvv = vv[local_field] if vvv != None: and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) return {"and": output} elif where["or"]: return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]} elif where["and"]: return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]} elif where["not"]: return {"not": unwrap(_where_terms(master, where["not"], schema))} return where
def w_bound_method(*args, **kwargs): if len(args) == 2 and len(kwargs) == 0 and is_data(args[1]): # ASSUME SECOND UNNAMED PARAM IS kwargs packed = params_pack(params, defaults, args[1], {params[0]: args[0]}, kwargs) elif KWARGS in kwargs and is_data(kwargs[KWARGS]): # PUT args INTO kwargs packed = params_pack(params, defaults, kwargs[KWARGS], dict_zip(params, args), kwargs) else: packed = params_pack(params, defaults, dict_zip(params, args), kwargs) try: return func(**packed) except TypeError as e: raise_error(e, packed)
def w_bound_method(*args, **kwargs): if len(args) == 2 and len(kwargs) == 0 and is_data(args[1]): # ASSUME SECOND UNNAMED PARAM IS kwargs packed = params_pack(params, args[1], defaults) elif "kwargs" in kwargs and is_data(kwargs["kwargs"]): # PUT args INTO kwargs packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults) else: packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults) try: return func(args[0], **packed) except TypeError as e: raise_error(e, packed)
def value2key(keys, val): if len(keys) == 1: if is_data(val): return get_attr(val, keys[0]), elif is_sequence(val): return val[0], return val, else: if is_data(val): return tuple(val[k] for k in keys) elif is_sequence(val): return tuple(val) else: Log.error("do not know what to do here")
def w_kwargs(*args, **kwargs): if len(args) == 1 and len(kwargs) == 0 and is_data(args[0]): # ASSUME SINGLE PARAMETER IS kwargs packed = params_pack(params, defaults, args[0]) elif KWARGS in kwargs and is_data(kwargs[KWARGS]): # PUT args INTO kwargs packed = params_pack(params, defaults, kwargs[KWARGS], dict_zip(params, args), kwargs) else: # PULL kwargs OUT INTO PARAMS packed = params_pack(params, defaults, dict_zip(params, args), kwargs) try: return func(**packed) except TypeError as e: raise_error(e, packed)
def w_kwargs(*args, **kwargs): if len(args) == 1 and len(kwargs) == 0 and is_data(args[0]): # ASSUME SINGLE PARAMETER IS kwargs packed = params_pack(params, args[0], defaults) elif "kwargs" in kwargs and is_data(kwargs["kwargs"]): # PUT args INTO kwargs packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults) else: # PULL kwargs OUT INTO PARAMS packed = params_pack(params, kwargs, dict_zip(params, args), defaults) try: return func(**packed) except TypeError as e: raise_error(e, packed)
def value2key(keys, val): if len(keys) == 1: if is_data(val): return val[keys[0]] elif is_sequence(val): return val[0] else: return val else: if is_data(val): return datawrap({k: val[k] for k in keys}) elif is_sequence(val): return datawrap(dict(zip(keys, val))) else: Log.error("do not know what to do here")
def convert_list(operator, operand): if operand == None: return None elif is_data(operand): return operator(operand) else: return list(map(operator, operand))
def _update_meta(self): if not self.dirty: return for mcl in self.data.get("meta.columns").values(): for mc in mcl: count = 0 values = set() objects = 0 multi = 1 for column in self._all_columns(): value = column[mc.name] if value == None: pass else: count += 1 if is_list(value): multi = max(multi, len(value)) try: values |= set(value) except Exception: objects += len(value) elif is_data(value): objects += 1 else: values.add(value) mc.count = count mc.cardinality = len(values) + objects mc.partitions = jx.sort(values) mc.multi = multi mc.last_updated = Date.now() self.dirty = False
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return { k: _deep_json_to_string(v, depth - 1) for k, v in value.items() } elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def _scrub(result): if is_text(result): return result elif is_binary(result): return result.decode('utf8') elif isinstance(result, number_types): return result elif not result: return {} elif isinstance(result, (list, ParseResults)): if not result: return None elif len(result) == 1: return _scrub(result[0]) else: output = [rr for r in result for rr in [_scrub(r)] if rr != None] # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL if all(is_data(r) and "literal" in r.keys() for r in output): output = {"literal": [r['literal'] for r in output]} return output elif not list(result.items()): return {} else: return { k: vv for k, v in result.items() for vv in [_scrub(v)] if vv != None }
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return wrap([self.convert(value) for value in expr]) else: return expr
def convert_list(operator, operand): if operand==None: return None elif is_data(operand): return operator(operand) else: return map(operator, operand)
def wrap(cls, e, stack_depth=0): """ ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except :param e: AN EXCEPTION OF ANY TYPE :param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE :return: A Except OBJECT OF THE SAME """ if e == None: return Null elif isinstance(e, (list, Except)): return e elif is_data(e): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: tb = getattr(e, '__traceback__', None) if tb is not None: trace = _parse_traceback(tb) else: trace = get_traceback(0) cause = Except.wrap(getattr(e, '__cause__', None)) if hasattr(e, "message") and e.message: output = Except(context=ERROR, template=text(e.message), trace=trace, cause=cause) else: output = Except(context=ERROR, template=text(e), trace=trace, cause=cause) trace = get_stacktrace(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() output.trace.extend(trace) return output
def typed_encode(value, flake): """ RETURN (typed_value, flake_update, added_nested) TUPLES :param value: THE RECORD TO CONVERT TO STRICT TYPED FORM :param flake: LOOKUP SCHEMA, WILL BE UPDATED WITH CHANGES :return: (record, update, nested) TUPLE """ _ = flake.columns # ENSURE WE HAVE INTERNAL STRUCTURES FILLED output, update, nested = _typed_encode(value, flake.schema) if update: # REFRESH COLUMNS flake._columns = None _ = flake.columns worker = to_data(output) for path, field in flake._top_level_fields.items(): worker[field] = worker[path] worker[path] = None # DO NOT LEAVE ANY EMPTY OBJECT RESIDUE _path = split_field(path) for i, _ in jx.reverse(enumerate(_path)): sub_path = join_field(_path[:i]) v = worker[sub_path] if is_data(v) and not worker[sub_path].keys(): worker[sub_path] = None else: break return output, update, nested
def __init__(self, json, query_path, expected_vars=NO_VARS): if hasattr(json, "read"): # ASSUME IT IS A STREAM temp = json def get_more(): return temp.read(MIN_READ_SIZE) self.json = List_usingStream(get_more) elif hasattr(json, "__call__"): self.json = List_usingStream(json) elif isinstance(json, GeneratorType): self.json = List_usingStream(NEXT(json)) else: Log.error( "Expecting json to be a stream, or a function that will return more bytes" ) if is_data(query_path) and query_path.get("items"): self.path_list = split_field(query_path.get("items")) + [ "$items" ] # INSERT A MARKER SO THAT OBJECT IS STREAM DECODED else: self.path_list = split_field(query_path) self.expected_vars = expected_vars self.destination = [None] * len(expected_vars) self.done = [self.path_list + [None]]
def tuple(data, field_name): """ RETURN LIST OF TUPLES """ if isinstance(data, Cube): Log.error("not supported yet") if isinstance(data, FlatList): Log.error("not supported yet") if is_data(field_name) and "value" in field_name: # SIMPLIFY {"value":value} AS STRING field_name = field_name["value"] # SIMPLE PYTHON ITERABLE ASSUMED if is_text(field_name): if len(split_field(field_name)) == 1: return [(d[field_name],) for d in data] else: path = split_field(field_name) output = [] flat_list._tuple1(data, path, 0, output) return output elif is_list(field_name): paths = [_select_a_field(f) for f in field_name] output = FlatList() _tuple((), unwrap(data), paths, 0, output) return output else: paths = [_select_a_field(field_name)] output = FlatList() _tuple((), data, paths, 0, output) return output
def _expand(template, seq): """ seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE """ if is_text(template): return _simple_expand(template, seq) elif is_data(template): # EXPAND LISTS OF ITEMS USING THIS FORM # {"from":from, "template":template, "separator":separator} template = wrap(template) assert template["from"], "Expecting template to have 'from' attribute" assert template.template, "Expecting template to have 'template' attribute" data = seq[-1][template["from"]] output = [] for d in data: s = seq + (d,) output.append(_expand(template.template, s)) return coalesce(template.separator, "").join(output) elif is_list(template): return "".join(_expand(t, seq) for t in template) else: if not _Log: _late_import() _Log.error("can not handle")
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return dict_to_data({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return list_to_data([self.convert(value) for value in expr]) else: return expr
def value2url_param(value): """ :param value: :return: ascii URL """ if value == None: Log.error("Can not encode None into a URL") if is_data(value): from mo_json import value2json value_ = wrap(value) output = "&".join([ value2url_param(k) + "=" + (value2url_param(v) if is_text(v) else value2url_param(value2json(v))) for k, v in value_.leaves() ]) elif is_text(value): output = "".join(_map2url[c] for c in value.encode('utf8')) elif is_binary(value): output = "".join(_map2url[c] for c in value) elif hasattr(value, "__iter__"): output = ",".join(value2url_param(v) for v in value) else: output = str(value) return output
def wrap(cls, e, stack_depth=0): """ ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except :param e: AN EXCEPTION OF ANY TYPE :param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE :return: A Except OBJECT OF THE SAME """ if e == None: return Null elif isinstance(e, (list, Except)): return e elif is_data(e): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: tb = getattr(e, '__traceback__', None) if tb is not None: trace = _parse_traceback(tb) else: trace = _extract_traceback(0) cause = Except.wrap(getattr(e, '__cause__', None)) if hasattr(e, "message") and e.message: output = Except(context=ERROR, template=text_type(e.message), trace=trace, cause=cause) else: output = Except(context=ERROR, template=text_type(e), trace=trace, cause=cause) trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() output.trace.extend(trace) return output
def quote_value(value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL_NULL elif isinstance(value, SQL): return value elif is_text(value): return SQL("'" + "".join(ESCAPE_DCT.get(c, c) for c in value) + "'") elif is_data(value): return quote_value(json_encode(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif is_number(value): return SQL(text(value)) elif hasattr(value, '__iter__'): return quote_value(json_encode(value)) else: return quote_value(text(value)) except Exception as e: Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e)
def _expand(template, seq): """ seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE """ if is_text(template): return _simple_expand(template, seq) elif is_data(template): # EXPAND LISTS OF ITEMS USING THIS FORM # {"from":from, "template":template, "separator":separator} template = to_data(template) assert template["from"], "Expecting template to have 'from' attribute" assert template.template, "Expecting template to have 'template' attribute" data = seq[-1][template["from"]] output = [] for d in data: s = seq + (d, ) output.append(_expand(template.template, s)) return coalesce(template.separator, "").join(output) elif is_list(template): return "".join(_expand(t, seq) for t in template) else: if not _Log: _late_import() _Log.error("can not handle")
def tuple(data, field_name): """ RETURN LIST OF TUPLES """ if isinstance(data, Cube): Log.error("not supported yet") if isinstance(data, FlatList): Log.error("not supported yet") if is_data(field_name) and "value" in field_name: # SIMPLIFY {"value":value} AS STRING field_name = field_name["value"] # SIMPLE PYTHON ITERABLE ASSUMED if is_text(field_name): if len(split_field(field_name)) == 1: return [(d[field_name], ) for d in data] else: path = split_field(field_name) output = [] flat_list._tuple1(data, path, 0, output) return output elif is_list(field_name): paths = [_select_a_field(f) for f in field_name] output = FlatList() _tuple((), unwrap(data), paths, 0, output) return output else: paths = [_select_a_field(field_name)] output = FlatList() _tuple((), data, paths, 0, output) return output
def _update_meta(self): if not self.dirty: return now = Date.now() for mc in META_COLUMNS_DESC.columns: count = 0 values = set() objects = 0 multi = 1 for column in self._all_columns(): value = column[mc.name] if value == None: pass else: count += 1 if is_list(value): multi = max(multi, len(value)) try: values |= set(value) except Exception: objects += len(value) elif is_data(value): objects += 1 else: values.add(value) mc.count = count mc.cardinality = len(values) + objects mc.partitions = jx.sort(values) mc.multi = multi mc.last_updated = now META_COLUMNS_DESC.last_updated = now self.dirty = False
def quote_value(value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL_NULL elif isinstance(value, SQL): return quote_sql(value.template, value.param) elif is_text(value): return SQL("'" + "".join(ESCAPE_DCT.get(c, c) for c in value) + "'") elif is_data(value): return quote_value(json_encode(value)) elif is_number(value): return SQL(text_type(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return quote_value(json_encode(value)) else: return quote_value(text_type(value)) except Exception as e: Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e)
def define(cls, expr): term = expr.between if is_sequence(term): return cls.lang[BetweenOp( value=jx_expression(term[0]), prefix=jx_expression(term[1]), suffix=jx_expression(term[2]), default=jx_expression(expr.default), start=jx_expression(expr.start), )] elif is_data(term): var, vals = term.items()[0] if is_sequence(vals) and len(vals) == 2: return cls.lang[BetweenOp( value=Variable(var), prefix=Literal(vals[0]), suffix=Literal(vals[1]), default=jx_expression(expr.default), start=jx_expression(expr.start), )] else: Log.error( "`between` parameters are expected to be in {var: [prefix, suffix]} form" ) else: Log.error( "`between` parameters are expected to be in {var: [prefix, suffix]} form" )
def __getitem__(self, item): # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]} # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING if is_data(item): coordinates = [None] * len(self.edges) # MAP DICT TO NUMERIC INDICES for name, v in item.items(): ei, parts = first((i, e.domain.partitions) for i, e in enumerate(self.edges) if e.name == name) if not parts: Log.error( "Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet", name=name, value=v) part = first(p for p in parts if p.value == v) if not part: return Null else: coordinates[ei] = part.dataIndex edges = [e for e, v in zip(self.edges, coordinates) if v is None] if not edges: # ZERO DIMENSIONAL VALUE return dict_to_data({ k: v.__getitem__(coordinates) for k, v in self.data.items() }) else: output = Cube(select=self.select, edges=list_to_data([ e for e, v in zip(self.edges, coordinates) if v is None ]), data={ k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items() }) return output elif is_text(item): # RETURN A VALUE CUBE if self.is_value: if item != self.select.name: Log.error("{{name}} not found in cube", name=item) return self if item not in self.select.name: Log.error("{{name}} not found in cube", name=item) output = Cube(select=first(s for s in self.select if s.name == item), edges=self.edges, data={item: self.data[item]}) return output else: Log.error("not implemented yet")
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if is_data(node): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = get_attr(new_value, ref.fragment) DEBUG and Log.note( "Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif is_text(output): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) DEBUG and Log.note("Return {{output}}", output=output) return output elif is_list(node): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def edges2value(*values): if is_data(fields): output = Data() for e, v in transpose(edges, values): output[e.name] = v return output else: return tuple(values)
def __init__(self, terms): Expression.__init__(self, terms) if is_sequence(terms): self.lhs, self.rhs = terms elif is_data(terms): self.rhs, self.lhs = terms.items()[0] else: Log.error("logic error")
def value_to_json_type(value): if is_many(value): return _primitive(_A, union_type(*(value_to_json_type(v) for v in value))) elif is_data(value): return JsonType(**{k: value_to_json_type(v) for k, v in value.items()}) else: return _python_type_to_json_type[value.__class__]
def __init__(self, term): Expression.__init__(self, term) if not term: self.expr = self.suffix = None elif is_data(term): self.expr, self.suffix = term.items()[0] else: self.expr, self.suffix = term
def select(self, fields): if is_data(fields): fields=fields.value if is_text(fields): # RETURN LIST OF VALUES if len(split_field(fields)) == 1: if self.path[0] == fields: return [d[1] for d in self.data] else: return [d[0][fields] for d in self.data] else: keys = split_field(fields) depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX short_key = keys[depth:] output = FlatList() _select1((wrap(d[depth]) for d in self.data), short_key, 0, output) return output if is_list(fields): output = FlatList() meta = [] for f in fields: if hasattr(f.value, "__call__"): meta.append((f.name, f.value)) else: meta.append((f.name, functools.partial(lambda v, d: d[v], f.value))) for row in self._values(): agg = Data() for name, f in meta: agg[name] = f(row) output.append(agg) return output # meta = [] # for f in fields: # keys = split_field(f.value) # depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX # short_key = join_field(keys[depth:]) # # meta.append((f.name, depth, short_key)) # # for row in self._data: # agg = Data() # for name, depth, short_key in meta: # if short_key: # agg[name] = row[depth][short_key] # else: # agg[name] = row[depth] # output.append(agg) # return output Log.error("multiselect over FlatList not supported")
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if is_op(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp( frum=table, format=query.format, chunk_size=query.chunk_size, destination=query.destination, ) _import_temper_limit() output.limit = temper_limit(query.limit, query) if query.select or is_many(query.select) or is_data(query.select): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where({"and": listwrap(query.where)}, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.sort = _normalize_sort(query.sort) if output.limit != None and (not mo_math.is_integer(output.limit) or output.limit < 0): Log.error("Expecting limit >= 0") return output
def where(self, where): if is_data(where): temp = jx_expression_to_function(where) elif is_expression(where): temp = jx_expression_to_function(where) else: temp = where return ListContainer("from "+self.name, filter(temp, self.data), self.schema)
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if is_data(node): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = mo_dots.get_attr(new_value, ref.fragment) DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif is_text(output): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) DEBUG and Log.note("Return {{output}}", output=output) return output elif is_list(node): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def __init__(self, dimensions, source): """ EXPECTING A LIST OF {"name":name, "value":value} OBJECTS TO PERFORM A MAPPING """ dimensions = wrap(dimensions) if is_data(dimensions) and dimensions.name == None: # CONVERT TO A REAL DIMENSION DEFINITION dimensions = {"name": ".", "type": "set", "edges":[{"name": k, "field": v} for k, v in dimensions.items()]} self.dimensions = Dimension(dimensions, None, source)
def set_destination(expected_vars, value): for i, e in enumerate(expected_vars): if e is None: pass elif e == ".": destination[i] = value elif is_data(value): destination[i] = value[e] else: destination[i] = Null
def select(data, field_name): """ return list with values from field_name """ if isinstance(data, Cube): return data._select(_normalize_selects(field_name)) if isinstance(data, PartFlatList): return data.select(field_name) if isinstance(data, UniqueIndex): data = ( data._data.values() ) # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING if is_data(data): return select_one(data, field_name) if is_data(field_name): field_name = wrap(field_name) if field_name.value in ["*", "."]: return data if field_name.value: # SIMPLIFY {"value":value} AS STRING field_name = field_name.value # SIMPLE PYTHON ITERABLE ASSUMED if is_text(field_name): path = split_field(field_name) if len(path) == 1: return FlatList([d[field_name] for d in data]) else: output = FlatList() flat_list._select1(data, path, 0, output) return output elif is_list(field_name): keys = [_select_a_field(wrap(f)) for f in field_name] return _select(Data(), unwrap(data), keys, 0) else: keys = [_select_a_field(field_name)] return _select(Data(), unwrap(data), keys, 0)
def __init__(self, select, edges, data, frum=None): """ data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE ALLOWED, USING THE select AND edges TO DESCRIBE THE data """ self.is_value = False if is_list(select) else True self.select = select self.meta = Data(format="cube") # PUT EXTRA MARKUP HERE self.is_none = False if not all(data.values()): is_none = True # ENSURE frum IS PROPER FORM if is_list(select): if edges and OR(not isinstance(v, Matrix) for v in data.values()): Log.error("Expecting data to be a dict with Matrix values") if not edges: if not data: if is_list(select): Log.error("not expecting a list of records") data = {select.name: Matrix.ZERO} self.edges = FlatList.EMPTY elif is_data(data): # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA length = MAX([len(v) for v in data.values()]) if length >= 1: self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}]) else: self.edges = FlatList.EMPTY elif is_list(data): if is_list(select): Log.error("not expecting a list of records") data = {select.name: Matrix.wrap(data)} self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}]) elif isinstance(data, Matrix): if is_list(select): Log.error("not expecting a list of records") data = {select.name: data} else: if is_list(select): Log.error("not expecting a list of records") data = {select.name: Matrix(value=data)} self.edges = FlatList.EMPTY else: self.edges = wrap(edges) self.data = data
def _convert_clause(self, clause): """ JSON QUERY EXPRESSIONS HAVE MANY CLAUSES WITH SIMILAR COLUMN DELCARATIONS """ clause = wrap(clause) if clause == None: return None elif is_data(clause): return set_default({"value": self.convert(clause.value)}, clause) else: return [set_default({"value": self.convert(c.value)}, c) for c in clause]
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() if isinstance(self.key, set): Log.error("problem") if isinstance(desc.partitions[0], (int, float, text_type)): # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and is_data(desc.partitions[0][desc.key]): self.key = desc.key self.map = UniqueIndex(keys=desc.key) # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) # self.map = UniqueIndex(keys=self.key) elif desc.key == None: Log.error("Domains must have keys") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i elif all(p.esfilter for p in self.partitions): # EVERY PART HAS AN esfilter DEFINED, SO USE THEM for i, p in enumerate(self.partitions): p.dataIndex = i else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name")
def __getitem__(self, item): # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]} # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING if is_data(item): coordinates = [None] * len(self.edges) # MAP DICT TO NUMERIC INDICES for name, v in item.items(): ei, parts = wrap([(i, e.domain.partitions) for i, e in enumerate(self.edges) if e.name == name])[0] if not parts: Log.error("Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet", name= name, value= v) part = wrap([p for p in parts if p.value == v])[0] if not part: return Null else: coordinates[ei] = part.dataIndex edges = [e for e, v in zip(self.edges, coordinates) if v is None] if not edges: # ZERO DIMENSIONAL VALUE return wrap({k: v.__getitem__(coordinates) for k, v in self.data.items()}) else: output = Cube( select=self.select, edges=wrap([e for e, v in zip(self.edges, coordinates) if v is None]), data={k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items()} ) return output elif is_text(item): # RETURN A VALUE CUBE if self.is_value: if item != self.select.name: Log.error("{{name}} not found in cube", name= item) return self if item not in self.select.name: Log.error("{{name}} not found in cube", name= item) output = Cube( select=[s for s in self.select if s.name == item][0], edges=self.edges, data={item: self.data[item]} ) return output else: Log.error("not implemented yet")
def _untype_list(value): if any(is_data(v) for v in value): # MAY BE MORE TYPED OBJECTS IN THIS LIST output = [_untype_value(v) for v in value] else: # LIST OF PRIMITIVE VALUES output = value if len(output) == 0: return None elif len(output) == 1: return output[0] else: return output
def w_constructor(*args, **kwargs): if "kwargs" in kwargs: packed = params_pack(params, dict_zip(params[1:], args[1:]), kwargs, kwargs["kwargs"], defaults) elif len(args) == 2 and len(kwargs) == 0 and is_data(args[1]): # ASSUME SECOND UNNAMED PARAM IS kwargs packed = params_pack(params, args[1], defaults) else: # DO NOT INCLUDE self IN kwargs packed = params_pack(params, dict_zip(params[1:], args[1:]), kwargs, defaults) try: return func(args[0], **packed) except TypeError as e: packed['self'] = args[0] # DO NOT SAY IS MISSING raise_error(e, packed)
def tab(value): """ convert single value to tab-delimited form, including a header :param value: :return: """ if is_data(value): h, d = transpose(*wrap(value).leaves()) return ( "\t".join(map(value2json, h)) + CR + "\t".join(map(value2json, d)) ) else: text_type(value)
def _replace_locals(node, doc_path): if is_data(node): # RECURS, DEEP COPY ref = None output = {} for k, v in node.items(): if k == "$ref": ref = v elif v == None: continue else: output[k] = _replace_locals(v, [v] + doc_path) if not ref: return output # REFER TO SELF frag = ref.fragment if frag[0] == ".": # RELATIVE for i, p in enumerate(frag): if p != ".": if i>len(doc_path): Log.error("{{frag|quote}} reaches up past the root document", frag=frag) new_value = mo_dots.get_attr(doc_path[i-1], frag[i::]) break else: new_value = doc_path[len(frag) - 1] else: # ABSOLUTE new_value = mo_dots.get_attr(doc_path[-1], frag) new_value = _replace_locals(new_value, [new_value] + doc_path) if not output: return new_value # OPTIMIZATION FOR CASE WHEN node IS {} else: return unwrap(set_default(output, new_value)) elif is_list(node): candidate = [_replace_locals(n, [n] + doc_path) for n in node] # if all(p[0] is p[1] for p in zip(candidate, node)): # return node return candidate return node
def jx_expression_to_function(expr): """ RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): """ if is_expression(expr): if is_op(expr, ScriptOp) and not is_text(expr.script): return expr.script else: return compile_expression(Python[expr].to_python()) if ( expr != None and not is_data(expr) and not is_list(expr) and hasattr(expr, "__call__") ): return expr return compile_expression(Python[jx_expression(expr)].to_python())
def quote_sql(value, param=None): """ USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT """ try: if isinstance(value, SQL): if not param: return value param = {k: quote_sql(v) for k, v in param.items()} return SQL(expand_template(value, param)) elif is_text(value): return SQL(value) elif is_data(value): return quote_value(json_encode(value)) elif hasattr(value, '__iter__'): return quote_list(value) else: return text_type(value) except Exception as e: Log.error("problem quoting SQL", e)
def new_instance(type, frum, schema=None): """ Factory! """ if not type2container: _delayed_imports() if isinstance(frum, Container): return frum elif isinstance(frum, _Cube): return frum elif isinstance(frum, _Query): return _run(frum) elif is_many(frum): return _ListContainer(frum) elif is_text(frum): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info") settings = set_default( { "index": join_field(split_field(frum)[:1:]), "name": frum, }, config.default.settings ) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) elif is_data(frum): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) elif frum["from"]: frum = copy(frum) frum["from"] = Container(frum["from"]) return _Query.wrap(frum) else: Log.error("Do not know how to handle {{frum|json}}", frum=frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)