def find_container(frum, after): """ :param frum: :return: """ global namespace if not namespace: if not container.config.default.settings: Log.error( "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info" ) namespace = ElasticsearchMetadata(container.config.default.settings) if not frum: Log.error("expecting json query expression with from clause") # FORCE A RELOAD namespace.get_columns(frum, after=after) if is_text(frum): if frum in container_cache: return container_cache[frum] path = split_field(frum) if path[0] == "meta": if path[1] == "columns": return namespace.meta.columns.denormalized() elif path[1] == "tables": return namespace.meta.tables else: fact_table_name = join_field(path[:2]) else: fact_table_name = path[0] type_ = container.config.default.type settings = set_default( { "alias": fact_table_name, "name": frum, "exists": True }, container.config.default.settings, ) settings.type = None output = container.type2container[type_](settings) container_cache[frum] = output return output elif is_data(frum) and frum.type and container.type2container[frum.type]: # TODO: Ensure the frum.name is set, so we capture the deep queries if not frum.type: Log.error("Expecting from clause to have a 'type' property") return container.type2container[frum.type](frum.settings) elif is_data(frum) and (frum["from"] or is_container(frum["from"])): from jx_base.query import QueryOp return QueryOp.wrap(frum) elif is_container(frum): return ListContainer("test_list", frum) else: return frum
def filter(data, where): """ where - a function that accepts (record, rownum, rows) and returns boolean """ if len(data) == 0 or where == None or where == TRUE: return data if isinstance(data, Container): return data.filter(where) if is_container(data): temp = get(where) dd = wrap(data) return wrap( [unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)]) else: Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__) try: return drill_filter(where, data) except Exception as _: # WOW! THIS IS INEFFICIENT! return wrap([ unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data]) ])
def filter(data, where): """ where - a function that accepts (record, rownum, rows) and returns boolean """ if len(data) == 0 or where == None or where == TRUE: return data if isinstance(data, Container): return data.filter(where) if is_container(data): temp = jx_expression_to_function(where) dd = wrap(data) return wrap([unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)]) else: Log.error( "Do not know how to handle type {{type}}", type=data.__class__.__name__ ) try: return drill_filter(where, data) except Exception as _: # WOW! THIS IS INEFFICIENT! return wrap( [unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])] )
def __init__(self, sep, concat): SQL.__init__(self) if not is_container(concat): concat = list(concat) if DEBUG: if not isinstance(sep, SQL): Log.error("Expecting SQL, not text") if any(not isinstance(s, SQL) for s in concat): Log.error("Can only join other SQL") self.sep = sep self.concat = concat
def _where_terms(master, where, schema): """ USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ if is_data(where): if where.term: # MAP TERM try: output = _map_term_using_schema(master, [], where.term, schema.edges) return output except Exception as e: Log.error("programmer problem?", e) elif where.terms: # MAP TERM output = FlatList() for k, v in where.terms.items(): if not is_container(v): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: if is_text(edge): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: domain = edge.getDomain() except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields if is_data(fields): or_agg = [] for vv in v: and_agg = [] for local_field, es_field in fields.items(): vvv = vv[local_field] if vvv != None: and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) return {"and": output} elif where["or"]: return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]} elif where["and"]: return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]} elif where["not"]: return {"not": unwrap(_where_terms(master, where["not"], schema))} return where
def _where_terms(master, where, schema): """ USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ if is_data(where): if where.term: # MAP TERM try: output = _map_term_using_schema(master, [], where.term, schema.edges) return output except Exception as e: Log.error("programmer problem?", e) elif where.terms: # MAP TERM output = FlatList() for k, v in where.terms.items(): if not is_container(v): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: if is_text(edge): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: domain = edge.getDomain() except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields if is_data(fields): or_agg = [] for vv in v: and_agg = [] for local_field, es_field in fields.items(): vvv = vv[local_field] if vvv != None: and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) return {"and": output} elif where["or"]: return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]} elif where["and"]: return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]} elif where["not"]: return {"not": unwrap(_where_terms(master, where["not"], schema))} return where
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() if isinstance(self.key, set): Log.error("problem") if isinstance(desc.partitions[0], (int, float, text_type)): # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i elif desc.partitions and desc.dimension.fields and len( desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and is_data(desc.partitions[0][desc.key]): self.key = desc.key self.map = UniqueIndex(keys=desc.key) # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) # self.map = UniqueIndex(keys=self.key) elif desc.key == None: Log.error("Domains must have keys") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i elif all(p.esfilter for p in self.partitions): # EVERY PART HAS AN esfilter DEFINED, SO USE THEM for i, p in enumerate(self.partitions): p.dataIndex = i else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name")
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() if isinstance(self.key, set): Log.error("problem") if isinstance(desc.partitions[0], (int, float, text_type)): # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and is_data(desc.partitions[0][desc.key]): self.key = desc.key self.map = UniqueIndex(keys=desc.key) # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) # self.map = UniqueIndex(keys=self.key) elif desc.key == None: Log.error("Domains must have keys") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i elif all(p.esfilter for p in self.partitions): # EVERY PART HAS AN esfilter DEFINED, SO USE THEM for i, p in enumerate(self.partitions): p.dataIndex = i else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name")
def define(cls, expr): """ GENERAL SUPPORT FOR BUILDING EXPRESSIONS FROM JSON EXPRESSIONS OVERRIDE THIS IF AN OPERATOR EXPECTS COMPLICATED PARAMETERS :param expr: Data representing a JSON Expression :return: parse tree """ try: lang = cls.lang items = items_(expr) for item in items: op, term = item full_op = operators.get(op) if full_op: class_ = lang.ops[full_op.get_id()] clauses = { k: jx_expression(v) for k, v in expr.items() if k != op } break else: if not items: return NULL raise Log.error("{{operator|quote}} is not a known operator", operator=expr) if term == None: return class_([], **clauses) elif is_container(term): terms = [jx_expression(t) for t in term] return class_(terms, **clauses) elif is_data(term): items = items_(term) if class_.has_simple_form: if len(items) == 1: k, v = items[0] return class_([Variable(k), Literal(v)], **clauses) else: return class_({k: Literal(v) for k, v in items}, **clauses) else: return class_(_jx_expression(term, lang), **clauses) else: if op in ["literal", "date", "offset"]: return class_(term, **clauses) else: return class_(_jx_expression(term, lang), **clauses) except Exception as e: Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e)
def UNION(values, *others): if len(others) > 0: from mo_logs import Log Log.error("no longer accepting args, use a single list") output = set() for v in values: if values == None: continue if is_container(v): output.update(v) continue else: output.add(v) return output
def to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): rhs = self.rhs.value lhs = self.lhs.var cols = schema.leaves(lhs) if not cols: Log.warning( "{{col}} does not exist while processing {{expr}}", col=lhs, expr=self.__data__(), ) if is_container(rhs): if len(rhs) == 1: rhs = rhs[0] else: types = Data() # MAP JSON TYPE TO LIST OF LITERALS for r in rhs: types[python_type_to_json_type[r.__class__]] += [r] if len(types) == 1: jx_type, values = first(types.items()) for c in cols: if jx_type == c.jx_type or (jx_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): return {"terms": {c.es_column: values}} return FALSE.to_esfilter(schema) else: return (OrOp([ EqOp([self.lhs, values]) for t, values in types.items() ]).partial_eval().to_esfilter(schema)) for c in cols: if c.jx_type == BOOLEAN: rhs = pull_functions[c.jx_type](rhs) rhs_type = python_type_to_json_type[rhs.__class__] if rhs_type == c.jx_type or (rhs_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): return {"term": {c.es_column: rhs}} return FALSE.to_esfilter(schema) else: return (ES52[CaseOp([ WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), WhenOp(self.rhs.missing(), **{"then": FALSE}), BasicEqOp([self.lhs, self.rhs]), ]).partial_eval()].to_esfilter(schema))
def to_esfilter(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): rhs = self.rhs.value lhs = self.lhs.var cols = schema.leaves(lhs) if is_container(rhs): if len(rhs) == 1: rhs = rhs[0] else: types = Data() # MAP JSON TYPE TO LIST OF LITERALS for r in rhs: types[python_type_to_json_type[rhs.__class__]] += [r] if len(types) == 1: jx_type, values = first(types.items()) for c in cols: if jx_type == c.jx_type: return {"terms": {c.es_column: values}} return FALSE.to_esfilter(schema) else: return (OrOp([ EqOp([self.lhs, values]) for t, values in types.items() ]).partial_eval().to_esfilter(schema)) for c in cols: if c.jx_type == BOOLEAN: rhs = pull_functions[c.jx_type](rhs) if python_type_to_json_type[rhs.__class__] == c.jx_type: return {"term": {c.es_column: rhs}} return FALSE.to_esfilter(schema) else: return (ES52[CaseOp([ WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), WhenOp(self.rhs.missing(), **{"then": FALSE}), BasicEqOp([self.lhs, self.rhs]), ])].partial_eval().to_esfilter(schema))
def define(cls, expr): expr = to_data(expr) term = expr.select terms = [] if not is_container(term): raise Log.error("Expecting a list") for t in term: if is_text(t): if not is_variable_name(t): Log.error( "expecting {{value}} a simple dot-delimited path name", value=t) terms.append({"name": t, "value": _jx_expression(t, cls.lang)}) elif t.name == None: if t.value == None: Log.error( "expecting select parameters to have name and value properties" ) elif is_text(t.value): if not is_variable_name(t): Log.error( "expecting {{value}} a simple dot-delimited path name", value=t.value, ) else: terms.append({ "name": t.value, "value": _jx_expression(t.value, cls.lang), }) else: Log.error("expecting a name property") else: terms.append({"name": t.name, "value": jx_expression(t.value)}) return cls.lang[SelectOp(terms)]
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None): show_detail = True test = unwrap(test) expected = unwrap(expected) try: if test is None and expected is None: return elif test is expected: return elif is_text(expected): assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) elif isinstance(test, UniqueIndex): if test ^ expected: Log.error("Sets do not match") elif is_data(expected) and is_data(test): for k, v2 in unwrap(expected).items(): v1 = test.get(k) assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta) elif is_data(expected): for k, v2 in expected.items(): if is_text(k): v1 = mo_dots.get_attr(test, literal_field(k)) else: v1 = test[k] assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta) elif is_container(test) and isinstance(expected, set): test = set(wrap(t) for t in test) if len(test) != len(expected): Log.error( "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}", test=test, expected=expected ) for e in expected: for t in test: try: assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta) break except Exception as _: pass else: Log.error("Sets do not match. {{value|json}} not found in {{test|json}}", value=e, test=test) elif isinstance(expected, types.FunctionType): return expected(test) elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"): if test.__class__.__name__ == "ndarray": # numpy test = test.tolist() elif test.__class__.__name__ == "DataFrame": # pandas test = test[test.columns[0]].values.tolist() elif test.__class__.__name__ == "Series": # pandas test = test.values.tolist() if not expected and test == None: return if expected == None: expected = [] # REPRESENT NOTHING for a, b in zip_longest(test, expected): assertAlmostEqual(a, b, msg=msg, digits=digits, places=places, delta=delta) else: assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) except Exception as e: Log.error( "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}", test=test if show_detail else "[can not show]", expected=expected if show_detail else "[can not show]", cause=e )
def _get_schema_from_list(frum, table_name, parent, nested_path, columns): """ :param frum: The list :param table_name: Name of the table this list holds records for :param parent: parent path :param nested_path: each nested array, in reverse order :param columns: map from full name to column definition :return: """ for d in frum: row_type = python_type_to_json_type[d.__class__] if row_type != "object": # EXPECTING PRIMITIVE VALUE full_name = parent column = columns[full_name] if not column: column = Column( name=concat_field(table_name, full_name), es_column=full_name, es_index=".", es_type=d.__class__.__name__, jx_type=None, # WILL BE SET BELOW last_updated=Date.now(), nested_path=nested_path, ) columns.add(column) column.es_type = _merge_python_type(column.es_type, d.__class__) column.jx_type = python_type_to_json_type[column.es_type] else: for name, value in d.items(): full_name = concat_field(parent, name) column = columns[full_name] if not column: column = Column( name=concat_field(table_name, full_name), es_column=full_name, es_index=".", es_type=value.__class__.__name__, jx_type=None, # WILL BE SET BELOW last_updated=Date.now(), nested_path=nested_path, ) columns.add(column) if is_container(value): # GET TYPE OF MULTIVALUE v = list(value) if len(v) == 0: this_type = none_type.__name__ elif len(v) == 1: this_type = v[0].__class__.__name__ else: this_type = reduce( _merge_python_type, (vi.__class__.__name__ for vi in value) ) else: this_type = value.__class__.__name__ column.es_type = _merge_python_type(column.es_type, this_type) column.jx_type = python_type_to_json_type[column.es_type] if this_type in {"object", "dict", "Mapping", "Data"}: _get_schema_from_list( [value], table_name, full_name, nested_path, columns ) elif this_type in {"list", "FlatList"}: np = listwrap(nested_path) newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np) _get_schema_from_list( value, table_name, full_name, newpath, columns )
def _normalize_edge(edge, dim_index, limit, schema=None): """ :param edge: Not normalized edge :param dim_index: Dimensions are ordered; this is this edge's index into that order :param schema: for context :return: a normalized edge """ if not _Column: _late_import() if not edge: Log.error("Edge has no value, or expression is empty") elif is_text(edge): if schema: leaves = unwraplist(list(schema.leaves(edge))) if not leaves or is_container(leaves): return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(None, limit) ) ] elif isinstance(leaves, _Column): return [Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(domain=leaves, limit=limit, schema=schema) )] elif is_list(leaves.fields) and len(leaves.fields) == 1: return [Data( name=leaves.name, value=jx_expression(leaves.fields[0], schema=schema), allowNulls=True, dim=dim_index, domain=leaves.getDomain() )] else: return [Data( name=leaves.name, allowNulls=True, dim=dim_index, domain=leaves.getDomain() )] else: return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=DefaultDomain() ) ] else: edge = wrap(edge) if not edge.name and not is_text(edge.value): Log.error("You must name compound and complex edges: {{edge}}", edge=edge) if is_container(edge.value) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain = _normalize_domain(schema=schema) domain.dimension = Data(fields=edge.value) return [Data( name=edge.name, value=jx_expression(edge.value, schema=schema), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain )] domain = _normalize_domain(edge.domain, schema=schema) return [Data( name=coalesce(edge.name, edge.value), value=jx_expression(edge.value, schema=schema), range=_normalize_range(edge.range), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain )]
def _normalize_edge(edge, dim_index, limit, schema=None): """ :param edge: Not normalized edge :param dim_index: Dimensions are ordered; this is this edge's index into that order :param schema: for context :return: a normalized edge """ if not _Column: _late_import() if not edge: Log.error("Edge has no value, or expression is empty") elif is_text(edge): if schema: leaves = unwraplist(list(schema.leaves(edge))) if not leaves or is_container(leaves): return [ Data(name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(None, limit)) ] elif isinstance(leaves, _Column): return [ Data(name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(domain=leaves, limit=limit, schema=schema)) ] elif is_list(leaves.fields) and len(leaves.fields) == 1: return [ Data(name=leaves.name, value=jx_expression(leaves.fields[0], schema=schema), allowNulls=True, dim=dim_index, domain=leaves.getDomain()) ] else: return [ Data(name=leaves.name, allowNulls=True, dim=dim_index, domain=leaves.getDomain()) ] else: return [ Data(name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=DefaultDomain()) ] else: edge = wrap(edge) if not edge.name and not is_text(edge.value): Log.error("You must name compound and complex edges: {{edge}}", edge=edge) if is_container(edge.value) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain = _normalize_domain(schema=schema) domain.dimension = Data(fields=edge.value) return [ Data(name=edge.name, value=jx_expression(edge.value, schema=schema), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain) ] domain = _normalize_domain(edge.domain, schema=schema) return [ Data(name=coalesce(edge.name, edge.value), value=jx_expression(edge.value, schema=schema), range=_normalize_range(edge.range), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain) ]
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() self.primitive = True # True IF DOMAIN IS A PRIMITIVE VALUE SET if isinstance(self.key, set): Log.error("problem") if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text_type, Number, tuple))): # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.map = {} self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i if isinstance(p, (int, float)): text_part = text_type(float(p)) # ES CAN NOT HANDLE NUMERIC PARTS self.map[text_part] = part self.order[text_part] = i self.label = coalesce(self.label, "name") self.primitive = True return if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and is_data(desc.partitions[0][desc.key]): # LOOKS LIKE OBJECTS # sorted = desc.partitions[desc.key] self.key = desc.key self.map = UniqueIndex(keys=desc.key) self.order = {p[self.key]: p.dataIndex for p in desc.partitions} self.partitions = desc.partitions elif len(desc.partitions) == 0: # CREATE AN EMPTY DOMAIN self.key = "value" self.map = {} self.order[None] = 0 self.label = coalesce(self.label, "name") return elif desc.key == None: if desc.partitions and all(desc.partitions.where) or all(desc.partitions.esfilter): if not all(desc.partitions.name): Log.error("Expecting all partitions to have a name") self.key = "name" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.partitions.append({ "where": jx_expression(coalesce(p.where, p.esfilter)), "name": p.name, "dataIndex": i }) self.map[p.name] = p self.order[p.name] = i return elif desc.partitions and len(set(desc.partitions.value)-{None}) == len(desc.partitions): # TRY A COMMON KEY CALLED "value". IT APPEARS UNIQUE self.key = "value" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Domains must have keys, or partitions") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name") if hasattr(desc.partitions, "__iter__"): self.partitions = wrap(list(desc.partitions)) else: Log.error("expecting a list of partitions")
def _normalize(esfilter): """ TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS """ if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: return esfilter # Log.note("from: " + convert.value2json(esfilter)) isDiff = True while isDiff: isDiff = False if esfilter.bool.filter: terms = esfilter.bool.filter for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): if i0 == i1: continue # SAME, IGNORE # TERM FILTER ALREADY ASSUMES EXISTENCE with suppress_exception: if (t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]): terms[i0] = MATCH_ALL continue # IDENTICAL CAN BE REMOVED with suppress_exception: if t0 == t1: terms[i0] = MATCH_ALL continue # MERGE range FILTER WITH SAME FIELD if i0 > i1: continue # SAME, IGNORE with suppress_exception: f0, tt0 = t0.range.items()[0] f1, tt1 = t1.range.items()[0] if f0 == f1: set_default(terms[i0].range[literal_field(f1)], tt1) terms[i1] = MATCH_ALL output = [] for a in terms: if is_container(a): from mo_logs import Log Log.error("and clause is not allowed a list inside a list") a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a == MATCH_ALL: isDiff = True continue if a == MATCH_NONE: return MATCH_NONE if a.bool.filter: isDiff = True a.isNormal = None output.extend(a.bool.filter) else: a.isNormal = None output.append(a) if not output: return MATCH_ALL elif len(output) == 1: # output[0].isNormal = True esfilter = output[0] break elif isDiff: esfilter = es_and(output) continue if esfilter.bool.should: output = [] for a in esfilter.bool.should: a_ = _normalize(a) if a_ is not a: isDiff = True a = a_ if a.bool.should: a.isNormal = None isDiff = True output.extend(a.bool.should) else: a.isNormal = None output.append(a) if not output: return MATCH_NONE elif len(output) == 1: esfilter = output[0] break elif isDiff: esfilter = wrap(es_or(output)) continue if esfilter.term != None: if esfilter.term.keys(): esfilter.isNormal = True return esfilter else: return MATCH_ALL if esfilter.terms: for k, v in esfilter.terms.items(): if len(v) > 0: if OR(vv == None for vv in v): rest = [vv for vv in v if vv != None] if len(rest) > 0: output = es_or( [es_missing(k), { "terms": { k: rest } }]) else: output = es_missing(k) output.isNormal = True return output else: esfilter.isNormal = True return esfilter return MATCH_NONE if esfilter.bool.must_not: _sub = esfilter.bool.must_not sub = _normalize(_sub) if sub == MATCH_NONE: return MATCH_ALL elif sub == MATCH_ALL: return MATCH_NONE elif sub is not _sub: sub.isNormal = None return wrap({"bool": {"must_not": sub, "isNormal": True}}) else: sub.isNormal = None esfilter.isNormal = True return esfilter
def _get_schema_from_list( frum, # The list table_name, # Name of the table this list holds records for parent, # parent path nested_path, # each nested array, in reverse order columns, # map from full name to column definition native_type_to_json_type # dict from storage type name to json type name ): for d in frum: row_type = python_type_to_json_type[d.__class__] if row_type != "object": # EXPECTING PRIMITIVE VALUE full_name = parent column = columns[full_name] if not column: column = Column( name=concat_field(table_name, full_name), es_column=full_name, es_index=".", es_type=d.__class__.__name__, jx_type=None, # WILL BE SET BELOW last_updated=Date.now(), nested_path=nested_path, ) columns.add(column) column.es_type = _merge_python_type(column.es_type, d.__class__) column.jx_type = native_type_to_json_type[column.es_type] else: for name, value in d.items(): full_name = concat_field(parent, name) column = columns[full_name] if not column: column = Column( name=concat_field(table_name, full_name), es_column=full_name, es_index=".", es_type=value.__class__.__name__, jx_type=None, # WILL BE SET BELOW last_updated=Date.now(), nested_path=nested_path, ) columns.add(column) if is_container(value): # GET TYPE OF MULTIVALUE v = list(value) if len(v) == 0: this_type = none_type.__name__ elif len(v) == 1: this_type = v[0].__class__.__name__ else: this_type = reduce(_merge_python_type, (vi.__class__.__name__ for vi in value)) else: this_type = value.__class__.__name__ column.es_type = _merge_python_type(column.es_type, this_type) try: column.jx_type = native_type_to_json_type[column.es_type] except Exception as e: raise e if this_type in {"object", "dict", "Mapping", "Data"}: _get_schema_from_list([value], table_name, full_name, nested_path, columns, native_type_to_json_type) elif this_type in {"list", "FlatList"}: np = listwrap(nested_path) newpath = unwraplist( [join_field(split_field(np[0]) + [name])] + np) _get_schema_from_list(value, table_name, full_name, newpath, columns)
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() self.primitive = True # True IF DOMAIN IS A PRIMITIVE VALUE SET if isinstance(self.key, set): Log.error("problem") if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text, Number, tuple))): # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.map = {} self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i if isinstance(p, (int, float)): text_part = text(float(p)) # ES CAN NOT HANDLE NUMERIC PARTS self.map[text_part] = part self.order[text_part] = i self.label = coalesce(self.label, "name") self.primitive = True return if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and is_data(desc.partitions[0][desc.key]): # LOOKS LIKE OBJECTS # sorted = desc.partitions[desc.key] self.key = desc.key self.map = UniqueIndex(keys=desc.key) self.order = {p[self.key]: p.dataIndex for p in desc.partitions} self.partitions = desc.partitions elif len(desc.partitions) == 0: # CREATE AN EMPTY DOMAIN self.key = "value" self.map = {} self.order[None] = 0 self.label = coalesce(self.label, "name") return elif desc.key == None: if desc.partitions and all(desc.partitions.where) or all(desc.partitions.esfilter): if not all(desc.partitions.name): Log.error("Expecting all partitions to have a name") self.key = "name" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.partitions.append({ "where": jx_expression(coalesce(p.where, p.esfilter)), "name": p.name, "dataIndex": i }) self.map[p.name] = p self.order[p.name] = i return elif desc.partitions and len(set(desc.partitions.value)-{None}) == len(desc.partitions): # TRY A COMMON KEY CALLED "value". IT APPEARS UNIQUE self.key = "value" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Domains must have keys, or partitions") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name") if hasattr(desc.partitions, "__iter__"): self.partitions = wrap(list(desc.partitions)) else: Log.error("expecting a list of partitions")