def __init__( self, host, user, password, table, meta, # REDSHIFT COPY COMMAND REQUIRES A BUCKET TO HOLD PARAMETERS database=None, port=5439, settings=None ): self.settings = settings self.db = Redshift(settings) INDEX_CACHE[settings.table] = wrap({"name":settings.table}) # HACK TO GET parse_columns TO WORK columns = parse_columns(settings.table, settings.mapping.test_result.properties) nested = [c.name for c in columns if c.type == "nested"] self.columns = wrap([c for c in columns if c.type not in ["object"] and not any(c.name.startswith(n+".") for n in nested)]) try: self.db.execute(""" CREATE TABLE {{table_name}} ( "_id" character varying UNIQUE, {{columns}} )""", { "table_name": self.db.quote_column(settings.table), "columns": SQL(",\n".join(self.db.quote_column(c.name) + " " + self.db.es_type2pg_type(c.type) for c in self.columns)) }, retry=False) except Exception, e: if "already exists" in e: Log.alert("Table {{table}} exists in Redshift", table= settings.table) else: Log.error("Could not make table", e)
def convert(self, expr): """ ADD THE ".$value" SUFFIX TO ALL VARIABLES """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_keyword(expr): #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX return expr + ".$value" elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, Query): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.items()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return self.converter_map.get(k, self._convert_bop)(k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr])
def __init__(self, value): self.scheme = None self.host = None self.port = None self.path = "" self.query = "" self.fragment = "" if value == None: return if not _convert: _late_import() if value.startswith("file://") or value.startswith("//"): # urlparse DOES NOT WORK IN THESE CASES scheme, suffix = value.split("//") self.scheme = scheme.rstrip(":") parse(self, suffix, 0, 1) self.query = wrap(_convert.url_param2value(self.query)) self.fragment = self.fragment else: output = urlparse(value) self.scheme = output.scheme self.port = output.port self.host = output.netloc.split(":")[0] self.path = output.path self.query = wrap(_convert.url_param2value(output.query)) self.fragment = output.fragment
def append_query(self, es_query, start): self.start = start domain = self.domain field = self.edge.value if isinstance(field, Variable): key = domain.key if isinstance(key, (tuple, list)) and len(key)==1: key = key[0] include = [p[key] for p in domain.partitions] if self.edge.allowNulls: return wrap({"aggs": { "_match": set_default({"terms": { "field": field.var, "size": self.limit, "include": include }}, es_query), "_missing": set_default( {"filter": {"or": [ field.missing().to_esfilter(), {"not": {"terms": {field.var: include}}} ]}}, es_query ), }}) else: return wrap({"aggs": { "_match": set_default({"terms": { "field": field.var, "size": self.limit, "include": include }}, es_query) }}) else: include = [p[domain.key] for p in domain.partitions] if self.edge.allowNulls: return wrap({"aggs": { "_match": set_default({"terms": { "script_field": field.to_ruby(), "size": self.limit, "include": include }}, es_query), "_missing": set_default( {"filter": {"or": [ field.missing().to_esfilter(), NotOp("not", InOp("in", [field, Literal("literal", include)])).to_esfilter() ]}}, es_query ), }}) else: return wrap({"aggs": { "_match": set_default({"terms": { "script_field": field.to_ruby(), "size": self.limit, "include": include }}, es_query) }})
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False): """ return list of (keys, values) pairs where group by the set of keys values IS LIST OF ALL data that has those keys contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES """ if size != None or min_size != None or max_size != None: if size != None: max_size = size return groupby_min_max_size(data, min_size=min_size, max_size=max_size) if isinstance(data, Container): return data.groupby(keys) try: keys = listwrap(keys) get_key = jx_expression_to_function(keys) if not contiguous: data = sorted(data, key=get_key) return ((wrap({k: v for k, v in zip(keys, g)}), wrap(v)) for g, v in itertools.groupby(data, get_key)) except Exception, e: Log.error("Problem grouping", e)
def es_query_template(path): """ RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE :param path: :return: """ sub_path = split_field(path)[1:] if sub_path: f0 = {} f1 = {} output = wrap( { "filter": { "and": [ f0, {"nested": {"path": join_field(sub_path), "filter": f1, "inner_hits": {"size": 100000}}}, ] }, "from": 0, "size": 0, "sort": [], } ) return output, wrap([f0, f1]) else: f0 = {} output = wrap({"query": {"filtered": {"filter": f0}}, "from": 0, "size": 0, "sort": []}) return output, wrap([f0])
def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): script_field = self.edge.value.to_ruby() missing = self.edge.value.missing().to_esfilter() output = wrap( { "aggs": { "_match": set_default( {"terms": {"script_field": script_field, "size": self.domain.limit}}, es_query ), "_missing": set_default({"filter": missing}, es_query), } } ) return output output = wrap( { "aggs": { "_match": set_default( {"terms": {"field": self.edge.value.var, "size": self.domain.limit}}, es_query ), "_missing": set_default({"missing": {"field": self.edge.value}}, es_query), } } ) return output
def map(self, map_): def map_select(s, map_): return set_default({"value": s.value.map(map_)}, s) def map_edge(e, map_): partitions = unwraplist([set_default({"where": p.where.map(map_)}, p) for p in e.domain.partitions]) domain = copy(e.domain) domain.where = e.domain.where.map(map_) domain.partitions = partitions edge = copy(e) edge.value = e.value.map(map_) edge.domain = domain if e.range: edge.range.min = e.range.min.map(map_) edge.range.max = e.range.max.map(map_) return edge return QueryOp( "from", frum=self.frum.map(map_), select=wrap([map_select(s, map_) for s in listwrap(self.select)]), edges=wrap([map_edge(e, map_) for e in self.edges]), groupby=wrap([g.map(map_) for g in self.groupby]), window=wrap([w.map(map_) for w in self.window]), where=self.where.map(map_), sort=wrap([map_select(s, map_) for s in listwrap(self.sort)]), limit=self.limit, format=self.format, )
def _aggs_iterator(agg, d): agg = drill(agg) if d > 0: for b in agg.get("_match", EMPTY).get("buckets", EMPTY_LIST): parts[d] = wrap(b) for a in _aggs_iterator(b, d - 1): yield a parts[d] = Null for b in agg.get("_other", EMPTY).get("buckets", EMPTY_LIST): for a in _aggs_iterator(b, d - 1): yield a b = drill(agg.get("_missing", EMPTY)) if b.get("doc_count"): for a in _aggs_iterator(b, d - 1): yield a else: for b in agg.get("_match", EMPTY).get("buckets", EMPTY_LIST): parts[d] = wrap(b) b = drill(b) if b.get("doc_count"): yield b parts[d] = Null for b in agg.get("_other", EMPTY).get("buckets", EMPTY_LIST): b = drill(b) if b.get("doc_count"): yield b b = drill(agg.get("_missing", EMPTY)) if b.get("doc_count"): yield b
def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): script_field = self.edge.value.to_ruby() missing = self.edge.value.missing() output = wrap( { "aggs": { "_match": set_default( {"terms": {"script_field": script_field, "size": self.domain.limit}}, es_query ), "_missing": set_default({"filter": missing.to_esfilter()}, es_query) if missing else None, } } ) return output output = wrap( { "aggs": { "_match": set_default( {"terms": {"field": self.edge.value.var, "size": self.domain.limit}}, es_query ), "_missing": set_default( {"missing": {"field": self.edge.value}}, es_query ), # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER } } ) return output
def __init__(self, host, index, alias=None, name=None, port=9200, settings=None): global _elasticsearch if hasattr(self, "settings"): return from pyLibrary.queries.containers.lists import ListContainer from pyLibrary.env import elasticsearch as _elasticsearch self.settings = settings self.default_name = coalesce(name, alias, index) self.default_es = _elasticsearch.Cluster(settings=settings) self.todo = Queue("refresh metadata", max=100000, unique=True) self.meta=Dict() table_columns = metadata_tables() column_columns = metadata_columns() self.meta.tables = ListContainer("meta.tables", [], wrap({c.name: c for c in table_columns})) self.meta.columns = ListContainer("meta.columns", [], wrap({c.name: c for c in column_columns})) self.meta.columns.insert(column_columns) self.meta.columns.insert(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def get(url): """ USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON """ if not _Log: _late_import() if url.find("://") == -1: _Log.error("{{url}} must have a prototcol (eg http://) declared", url=url) base = URL("") if url.startswith("file://") and url[7] != "/": if os.sep=="\\": base = URL("file:///" + os.getcwd().replace(os.sep, "/").rstrip("/") + "/.") else: base = URL("file://" + os.getcwd().rstrip("/") + "/.") elif url[url.find("://") + 3] != "/": _Log.error("{{url}} must be absolute", url=url) phase1 = _replace_ref(wrap({"$ref": url}), base) # BLANK URL ONLY WORKS IF url IS ABSOLUTE try: phase2 = _replace_locals(phase1, [phase1]) return wrap(phase2) except Exception, e: _Log.error("problem replacing locals in\n{{phase1}}", phase1=phase1)
def __getitem__(self, key): if key == None: return Null if key == ".": output = _get(self, "_dict") if isinstance(output, Mapping): return self else: return output if isinstance(key, str): key = key.decode("utf8") elif not isinstance(key, unicode): from pyLibrary.debugs.logs import Log Log.error("only string keys are supported") d = _get(self, "_dict") if key.find(".") >= 0: seq = _split_field(key) for n in seq: if isinstance(d, NullType): d = NullType(d, n) # OH DEAR, Null TREATS n AS PATH, NOT LITERAL else: d = _getdefault(d, n) # EVERYTHING ELSE TREATS n AS LITERAL return wrap(d) else: o = d.get(key) if o == None: return NullType(d, key) return wrap(o)
def __init__(self, value): if not _convert: _late_import() try: self.scheme = None self.host = None self.port = None self.path = "" self.query = "" self.fragment = "" if value == None: return if value.startswith("file://") or value.startswith("//"): # urlparse DOES NOT WORK IN THESE CASES scheme, suffix = value.split("//") self.scheme = scheme.rstrip(":") parse(self, suffix, 0, 1) self.query = wrap(_convert.url_param2value(self.query)) else: output = urlparse(value) self.scheme = output.scheme self.port = output.port self.host = output.netloc.split(":")[0] self.path = output.path self.query = wrap(_convert.url_param2value(output.query)) self.fragment = output.fragment except Exception, e: _Log.error("problem parsing {{value}} to URL", value=value, cause=e)
def metas(self, prefix=None, limit=None, delimiter=None): """ RETURN THE METADATA DESCRIPTORS FOR EACH KEY """ keys = self.bucket.list(prefix=prefix, delimiter=delimiter) if limit: output = [] for i, k in enumerate(keys): output.append({ "key": strip_extension(k.key), "etag": convert.quote2string(k.etag), "expiry_date": Date(k.expiry_date), "last_modified": Date(k.last_modified) }) if i >= limit: break return wrap(output) output = [ { "key": strip_extension(k.key), "etag": convert.quote2string(k.etag), "expiry_date": Date(k.expiry_date), "last_modified": Date(k.last_modified) } for k in keys ] return wrap(output)
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_keyword(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, Query): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def _select_a_field(field): if isinstance(field, basestring): return wrap({"name": field, "value": split_field(field)}) elif isinstance(wrap(field).value, basestring): field = wrap(field) return wrap({"name": field.name, "value": split_field(field.value)}) else: return wrap({"name": field.name, "value": field.value})
def search(self, query): query = wrap(query) f = jx.get(query.query.filtered.filter) filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total": len(filtered), "hits": filtered}})
def run(query, frum=None): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ query = QueryOp.wrap(query, frum.schema) frum = coalesce(frum, query["from"]) if isinstance(frum, Container): return frum.query(query) elif isinstance(frum, (list, set, GeneratorType)): frum = wrap(list(frum)) elif isinstance(frum, Cube): if is_aggs(query): return cube_aggs(frum, query) elif isinstance(frum, QueryOp): frum = run(frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__) if is_aggs(query): frum = list_aggs(frum, query) else: # SETOP # try: # if query.filter != None or query.esfilter != None: # Log.error("use 'where' clause") # except AttributeError: # pass if query.where is not TRUE_FILTER: frum = filter(frum, query.where) if query.sort: frum = sort(frum, query.sort, already_normalized=True) if query.select: frum = select(frum, query.select) if query.window: if isinstance(frum, Cube): frum = list(frum.values()) for param in query.window: window(frum, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query.format == "cube": frum = convert.list2cube(frum) elif query.format == "table": frum = convert.list2table(frum) frum.meta.format = "table" else: frum = wrap({ "meta": {"format": "list"}, "data": frum }) return frum
def iter(data, depth): if depth == 0: for v in data: yield wrap(v) return for v in data.values(): for v1 in iter(v, depth - 1): yield wrap(v1)
def __init__(self, select, edges, data, frum=None): """ data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE ALLOWED, USING THE select AND edges TO DESCRIBE THE data """ self.is_value = False if isinstance(select, list) else True self.select = select self.meta = Dict(format="cube") # PUT EXTRA MARKUP HERE self.is_none = False if not all(data.values()): is_none = True # ENSURE frum IS PROPER FORM if isinstance(select, list): if edges and OR(not isinstance(v, Matrix) for v in data.values()): Log.error("Expecting data to be a dict with Matrix values") if not edges: if not data: if isinstance(select, list): Log.error("not expecting a list of records") data = {select.name: Matrix.ZERO} self.edges = DictList.EMPTY elif isinstance(data, Mapping): # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA length = MAX([len(v) for v in data.values()]) if length >= 1: self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}]) else: self.edges = DictList.EMPTY elif isinstance(data, list): if isinstance(select, list): Log.error("not expecting a list of records") data = {select.name: Matrix.wrap(data)} self.edges = wrap( [{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}] ) elif isinstance(data, Matrix): if isinstance(select, list): Log.error("not expecting a list of records") data = {select.name: data} else: if isinstance(select, list): Log.error("not expecting a list of records") data = {select.name: Matrix(value=data)} self.edges = DictList.EMPTY else: self.edges = wrap(edges) self.data = data
def get_metadata(self): if self.settings.explore_metadata: if not self.cluster_metadata: response = self.get("/_cluster/state") self.cluster_metadata = wrap(response.metadata) self.cluster_state = wrap(self.get("/")) self.version = self.cluster_state.version.number else: Log.error("Metadata exploration has been disabled") return self.cluster_metadata
def list2tab(rows): columns = set() for r in wrap(rows): columns |= set(k for k, v in r.leaves()) keys = list(columns) output = [] for r in wrap(rows): output.append("\t".join(value2json(r[k]) for k in keys)) return "\t".join(keys) + "\n" + "\n".join(output)
def __getitem__(self, item): # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]} # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING if isinstance(item, Mapping): coordinates = [None] * len(self.edges) # MAP DICT TO NUMERIC INDICES for name, v in item.items(): ei, parts = wrap([(i, e.domain.partitions) for i, e in enumerate(self.edges) if e.name == name])[0] if not parts: Log.error( "Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet", name=name, value=v, ) part = wrap([p for p in parts if p.value == v])[0] if not part: return Null else: coordinates[ei] = part.dataIndex edges = [e for e, v in zip(self.edges, coordinates) if v is None] if not edges: # ZERO DIMENSIONAL VALUE return wrap({k: v.__getitem__(coordinates) for k, v in self.data.items()}) else: output = Cube( select=self.select, edges=wrap([e for e, v in zip(self.edges, coordinates) if v is None]), data={k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items()}, ) return output elif isinstance(item, basestring): # RETURN A VALUE CUBE if self.is_value: if item != self.select.name: Log.error("{{name}} not found in cube", name=item) return self if item not in self.select.name: Log.error("{{name}} not found in cube", name=item) output = Cube( select=[s for s in self.select if s.name == item][0], edges=self.edges, data={item: self.data[item]} ) return output else: Log.error("not implemented yet")
def __getitem__(self, key): try: _key = value2key(self._keys, key) if len(self._keys) == 1 or len(_key) == len(self._keys): d = self._data.get(_key) return wrap(d) else: output = wrap([ d for d in self._data.values() if all(wrap(d)[k] == v for k, v in _key.items()) ]) return output except Exception, e: Log.error("something went wrong", e)
def list2cube(rows, column_names=None): if column_names: keys = column_names else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) data = {k: [] for k in keys} output = wrap({ "meta": {"format": "cube"}, "edges": [ { "name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1} } ], "data": data }) for r in rows: for k in keys: data[k].append(unwraplist(r[k])) return output
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "range" self.NULL = Null if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") parts = listwrap(self.partitions) for i, p in enumerate(parts): self.min = Math.min(self.min, p.min) self.max = Math.max(self.max, p.max) if p.dataIndex != None and p.dataIndex != i: Log.error("Expecting `dataIndex` to agree with the order of the parts") if p[self.key] == None: Log.error("Expecting all parts to have {{key}} as a property", key=self.key) p.dataIndex = i # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE for p, q in itertools.product(parts, parts): if p.min <= q.min and q.min < p.max: Log.error("partitions overlap!") self.partitions = parts return elif any([self.min == None, self.max == None, self.interval == None]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
def __getattribute__(self, key): if key == b"__class__": return NullType key = key.decode('utf8') d = _get(self, "__dict__") o = wrap(d["_obj"]) k = d["__key__"] if o is None: return Null elif isinstance(o, NullType): return NullType(self, key) v = o.get(k) if v == None: return NullType(self, key) return wrap(v).get(key)
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e
def json2value(json_string, params={}, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if isinstance(json_string, str): Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) try: value = wrap(json_decoder(unicode(json_string))) except Exception, e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) if leaves: value = wrap_leaves(value) return value
def as_dict(self): output = wrap({s: getattr(self, s) for s in QueryOp.__slots__}) return output
def __init__(self, description, param=None, debug=True, silent=False): self.template = description self.param = wrap(coalesce(param, {})) self.debug = debug self.silent = silent self.interval = None
def iteritems(self): # LOW LEVEL ITERATION, NO WRAPPING d = _get(self, "_dict") return ((k, wrap(v)) for k, v in d.iteritems())
def get(self, key, default=None): return wrap(dict.get(self, key, default))
def iteritems(self): for k, v in dict.iteritems(self): yield k, wrap(v)
def _normalize_job_result(self, branch, revision, job, details, notes, stars): output = Dict() try: job = wrap(copy(job)) # ORGANIZE PROPERTIES output.build.architecture = _scrub(job, "build_architecture") output.build.os = _scrub(job, "build_os") output.build.platform = _scrub(job, "build_platform") output.build.type = _scrub(job, "platform_option") output.build_system_type = _scrub(job, "build_system_type") output.job.id = _scrub(job, "id") output.job.guid = _scrub(job, "job_guid") if job.job_group_symbol != "?": output.job.group.name = _scrub(job, "job_group_name") output.job.group.description = _scrub(job, "job_group_description") output.job.group.symbol = _scrub(job, "job_group_symbol") else: job.job_group_name = None job.job_group_description = None job.job_group_symbol = None output.job.type.description = _scrub(job, "job_type_description") output.job.type.name = _scrub(job, "job_type_name") output.job.type.symbol = _scrub(job, "job_type_symbol") output.ref_data_name = _scrub(job, "ref_data_name") output.machine.name = _scrub(job, "machine_name") if Math.is_integer(output.machine.name.split("-")[-1]): output.machine.pool = "-".join( output.machine.name.split("-")[:-1]) output.machine.platform = _scrub(job, "machine_platform_architecture") output.machine.os = _scrub(job, "machine_platform_os") output.job.reason = _scrub(job, "reason") output.job.state = _scrub(job, "state") output.job.tier = _scrub(job, "tier") output.job.who = _scrub(job, "who") output.job.result = _scrub(job, "result") fcid = _scrub(job, "failure_classification_id") if fcid not in [0, 1]: # 0 is unknown, and 1 is "not classified" output.job.failure_classification = self.failure_classification.get( fcid) if job.result_set: output.repo.push_date = job.result_set.push_timestamp output.repo.branch = self.repo[job.result_set.repository_id] output.repo.revision = job.result_set.revision else: output.repo.branch = branch output.repo.revision = revision output.repo.revision12 = revision[:12] output.job.timing.submit = Date(_scrub(job, "submit_timestamp")) output.job.timing.start = Date(_scrub(job, "start_timestamp")) output.job.timing.end = Date(_scrub(job, "end_timestamp")) output.job.timing.last_modified = Date(_scrub( job, "last_modified")) # IGNORED job.job_group_id = None job.job_type_id = None job.result_set = None job.build_platform_id = None job.job_coalesced_to_guid = None job.option_collection_hash = None job.platform = None job.result_set_id = None job.running_eta = None job.signature = None if job.keys(): Log.error("{{names|json}} are not used", names=job.keys()) # ATTACH DETAILS (AND SCRUB OUT REDUNDANT VALUES) output.details = details.get(output.job.guid, Null) for d in output.details: d.job_guid = None d.job_id = None output.task.id = coalesce( *map(_extract_task_id, output.details.url)) # ATTACH NOTES (RESOLVED BY BUG...) for n in notes.get(output.job.id, Null): note = coalesce(n.note.strip(), n.text.strip()) if note: # LOOK UP REVISION IN REPO fix = re.findall(r'[0-9A-Fa-f]{12}', note) if fix: rev = self.hg.get_revision( Dict(changeset={"id": fix[0]}, branch={"name": branch})) n.revision = rev.changeset.id n.bug_id = self.hg._extract_bug_id( rev.changeset.description) else: note = None output.notes += [{ "note": note, "status": coalesce(n.active_status, n.status), "revision": n.revision, "bug_id": n.bug_id, "who": n.who, "failure_classification": self.failure_classification[n.failure_classification_id], "timestamp": Date(coalesce(n.note_timestamp, n.timestamp, n.created)) }] # ATTACH STAR INFO for s in stars.get(output.job.id, Null): # LOOKUP BUG DETAILS output.stars += [{ "bug_id": s.bug_id, "who": s.who, "timestamp": s.submit_timestamp }] output.etl = {"timestamp": Date.now()} return output except Exception, e: Log.error("Problem with normalization of job {{job_id}}", job_id=coalesce(output.job.id, job.id), cause=e)
def DataClass(name, columns): """ Each column has {"name", "required", "nulls", "default", "type"} properties """ columns = wrap([{ "name": c, "required": True, "nulls": False, "type": object } if isinstance(c, basestring) else c for c in columns]) slots = columns.name required = wrap( filter(lambda c: c.required and not c.nulls and not c.default, columns)).name nulls = wrap(filter(lambda c: c.nulls, columns)).name types = {c.name: coalesce(c.type, object) for c in columns} code = expand_template( """ from __future__ import unicode_literals from collections import Mapping meta = None types_ = {{types}} class {{name}}(Mapping): __slots__ = {{slots}} def __init__(self, **kwargs): if not kwargs: return for s in {{slots}}: setattr(self, s, kwargs.get(s, kwargs.get('default', Null))) missed = {{required}}-set(kwargs.keys()) if missed: Log.error("Expecting properties {"+"{missed}}", missed=missed) illegal = set(kwargs.keys())-set({{slots}}) if illegal: Log.error("{"+"{names}} are not a valid properties", names=illegal) def __getitem__(self, item): return getattr(self, item) def __setitem__(self, item, value): setattr(self, item, value) return self def __setattr__(self, item, value): if item not in {{slots}}: Log.error("{"+"{item|quote}} not valid attribute", item=item) #if not isinstance(value, types_[item]): # Log.error("{"+"{item|quote}} not of type "+"{"+"{type}}", item=item, type=types_[item]) object.__setattr__(self, item, value) def __getattr__(self, item): Log.error("{"+"{item|quote}} not valid attribute", item=item) def __hash__(self): return object.__hash__(self) def __eq__(self, other): if isinstance(other, {{name}}) and dict(self)==dict(other) and self is not other: Log.error("expecting to be same object") return self is other def __dict__(self): return {k: getattr(self, k) for k in {{slots}}} def items(self): return ((k, getattr(self, k)) for k in {{slots}}) def __copy__(self): _set = object.__setattr__ output = object.__new__({{name}}) {{assign}} return output def __iter__(self): return {{slots}}.__iter__() def __len__(self): return {{len_slots}} def __str__(self): return str({{dict}}) temp = {{name}} """, { "name": name, "slots": "(" + (", ".join(convert.value2quote(s) for s in slots)) + ")", "required": "{" + (", ".join(convert.value2quote(s) for s in required)) + "}", "nulls": "{" + (", ".join(convert.value2quote(s) for s in nulls)) + "}", "len_slots": len(slots), "dict": "{" + (", ".join(convert.value2quote(s) + ": self." + s for s in slots)) + "}", "assign": "; ".join( "_set(output, " + convert.value2quote(s) + ", self." + s + ")" for s in slots), "types": "{" + (",".join( convert.string2quote(k) + ": " + v.__name__ for k, v in types.items())) + "}" }) return _exec(code, name)
def pe_filter(filter, data, depth): """ PARTIAL EVALUATE THE filter BASED ON data GIVEN """ if filter is TRUE_FILTER: return True if filter is FALSE_FILTER: return False filter = wrap(filter) if filter["and"]: result = True output = DictList() for a in filter[u"and"]: f = pe_filter(a, data, depth) if f is False: result = False elif f is not True: output.append(f) if result and output: return {"and": output} else: return result elif filter["or"]: output = DictList() for o in filter[u"or"]: f = pe_filter(o, data, depth) if f is True: return True elif f is not False: output.append(f) if output: return {"or": output} else: return False elif filter["not"]: f = pe_filter(filter["not"], data, depth) if f is True: return False elif f is False: return True else: return {"not": f} elif filter.term or filter.eq: eq = coalesce(filter.term, filter.eq) result = True output = {} for col, val in eq.items(): first, rest = parse_field(col, data, depth) d = data[first] if not rest: if d != val: result = False else: output[rest] = val if result and output: return {"term": output} else: return result elif filter.equal: a, b = filter["equal"] first_a, rest_a = parse_field(a, data, depth) first_b, rest_b = parse_field(b, data, depth) val_a = data[first_a] val_b = data[first_b] if not rest_a: if not rest_b: if val_a != val_b: return False else: return True else: return {"term": {rest_b: val_a}} else: if not rest_b: return {"term": {rest_a: val_b}} else: return {"equal": [rest_a, rest_b]} elif filter.terms: result = True output = {} for col, vals in filter["terms"].items(): first, rest = parse_field(col, data, depth) d = data[first] if not rest: if d not in vals: result = False else: output[rest] = vals if result and output: return {"terms": output} else: return result elif filter.range: result = True output = {} for col, ranges in filter["range"].items(): first, rest = parse_field(col, data, depth) d = data[first] if not rest: for sign, val in ranges.items(): if sign in ("gt", ">") and d <= val: result = False if sign == "gte" and d < val: result = False if sign == "lte" and d > val: result = False if sign == "lt" and d >= val: result = False else: output[rest] = ranges if result and output: return {"range": output} else: return result elif filter.missing: if isinstance(filter.missing, basestring): field = filter["missing"] else: field = filter["missing"]["field"] first, rest = parse_field(field, data, depth) d = data[first] if not rest: if d == None: return True return False else: return {"missing": rest} elif filter.prefix: result = True output = {} for col, val in filter["prefix"].items(): first, rest = parse_field(col, data, depth) d = data[first] if not rest: if d == None or not d.startswith(val): result = False else: output[rest] = val if result and output: return {"prefix": output} else: return result elif filter.exists: if isinstance(filter["exists"], basestring): field = filter["exists"] else: field = filter["exists"]["field"] first, rest = parse_field(field, data, depth) d = data[first] if not rest: if d != None: return True return False else: return {"exists": rest} else: Log.error(u"Can not interpret esfilter: {{esfilter}}", {u"esfilter": filter})
def table2list( column_names, # tuple of columns names rows # list of tuples ): return wrap([dict(zip(column_names, r)) for r in rows])
def _normalize_edges(edges, schema=None): return wrap([_normalize_edge(e, schema=schema) for e in listwrap(edges)])
def _normalize_select(select, frum, schema=None): """ :param select: ONE SELECT COLUMN :param frum: TABLE TO get_columns() :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS :return: AN ARRAY OF SELECT COLUMNS """ if not _Column: _late_import() if isinstance(select, basestring): canonical = select = Dict(value=select) else: select = wrap(select) canonical = select.copy() canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none") canonical.default = coalesce( select.default, canonical_aggregates[canonical.aggregate].default) if hasattr(frum, "_normalize_select"): return frum._normalize_select(canonical) output = [] if not select.value or select.value == ".": output.extend([ set_default({ "name": c.name, "value": jx_expression(c.name) }, canonical) for c in frum.get_leaves() ]) elif isinstance(select.value, basestring): if select.value.endswith(".*"): base_name = select.value[:-2] canonical.name = coalesce(select.name, base_name, select.aggregate) value = jx_expression(select[:-2]) if not isinstance(value, Variable): Log.error("`*` over general expression not supported yet") output.append([ set_default( { "name": base_name, "value": LeavesOp("leaves", value), "format": "dict" # MARKUP FOR DECODING }, canonical) for c in frum.get_columns() if c.type not in ["object", "nested"] ]) else: output.extend([ set_default( { "name": base_name + "." + literal_field(c.name[len(base_name) + 1:]), "value": jx_expression(c.name) }, canonical) for c in frum.get_leaves() if c.name.startswith(base_name + ".") ]) else: canonical.name = coalesce(select.name, select.value, select.aggregate) canonical.value = jx_expression(select.value) output.append(canonical) output = wrap(output) if any(n == None for n in output.name): Log.error("expecting select to have a name: {{select}}", select=select) return output
def find_holes(db, table_name, column_name, _range, filter=None): """ FIND HOLES IN A DENSE COLUMN OF INTEGERS RETURNS A LIST OF {"min"min, "max":max} OBJECTS """ if not filter: filter = {"match_all": {}} _range = wrap(_range) params = { "min": _range.min, "max": _range.max - 1, "column_name": db.quote_column(column_name), "table_name": db.quote_column(table_name), "filter": esfilter2sqlwhere(db, filter) } min_max = db.query( """ SELECT min({{column_name}}) `min`, max({{column_name}})+1 `max` FROM {{table_name}} a WHERE a.{{column_name}} BETWEEN {{min}} AND {{max}} AND {{filter}} """, params)[0] db.execute("SET @last={{min}}-1", {"min": _range.min}) ranges = db.query( """ SELECT prev_rev+1 `min`, curr_rev `max` FROM ( SELECT a.{{column_name}}-@last diff, @last prev_rev, @last:=a.{{column_name}} curr_rev FROM {{table_name}} a WHERE a.{{column_name}} BETWEEN {{min}} AND {{max}} AND {{filter}} ORDER BY a.{{column_name}} ) a WHERE diff>1 """, params) if ranges: ranges.append({"min": min_max.max, "max": _range.max}) else: if min_max.min: ranges.append({"min": _range.min, "max": min_max.min}) ranges.append({"min": min_max.max, "max": _range.max}) else: ranges.append(_range) return ranges
def __deepcopy__(self, memo): return wrap(dict.__deepcopy__(self, memo))
def __iter__(self): return (wrap(v) for v in self._data.itervalues())
def items(self): return [(k, wrap(v)) for k, v in dict.items(self) if v != None or isinstance(v, Mapping)]
def _convert_query(self, query): # if not isinstance(query["from"], Container): # Log.error('Expecting from clause to be a Container') query = wrap(query) output = QueryOp("from", None) output["from"] = self._convert_from(query["from"]) output.format = query.format if query.select: output.select = convert_list(self._convert_select, query.select) else: if query.edges or query.groupby: output.select = { "name": "count", "value": ".", "aggregate": "count", "default": 0 } else: output.select = { "name": "__all__", "value": "*", "aggregate": "none" } if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = convert_list(self._convert_edge, query.edges) output.groupby = None elif query.groupby: output.edges = None output.groupby = convert_list(self._convert_group, query.groupby) else: output.edges = [] output.groupby = None output.where = self.convert(query.where) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars( output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in query.columns: if c.name in vars and c.nested_path: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) output.having = convert_list(self._convert_having, query.having) return output
def __deepcopy__(self, memo): d = _get(self, "_dict") return wrap(deepcopy(d, memo))
def add(self, message): message = wrap(message) m = Message() m.set_body(convert.value2json(message)) self.queue.write(m)
def items(self): d = _get(self, "_dict") return [(k, wrap(v)) for k, v in d.items() if v != None or isinstance(v, Mapping)]
def list_aggs(frum, query): frum = wrap(frum) select = listwrap(query.select) for e in query.edges: if isinstance(e.domain, DefaultDomain): accessor = jx_expression_to_function(e.value) unique_values = set(map(accessor, frum)) if None in unique_values: e.allowNulls = coalesce(e.allowNulls, True) unique_values -= {None} e.domain = SimpleSetDomain(partitions=list(sorted(unique_values))) else: pass s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select] result = { s.name: Matrix( dims=[len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges], zeros=lambda: windows.name2accumulator.get(s.aggregate)(**s) ) for s in select } where = jx_expression_to_function(query.where) coord = [None]*len(query.edges) edge_accessor = [(i, make_accessor(e)) for i, e in enumerate(query.edges)] net_new_edge_names = set(wrap(query.edges).name) - UNION(e.value.vars() for e in query.edges) if net_new_edge_names & UNION(ss.value.vars() for ss in select): # s_accessor NEEDS THESE EDGES, SO WE PASS THEM ANYWAY for d in filter(where, frum): d = d.copy() for c, get_matches in edge_accessor: coord[c] = get_matches(d) for s_name, s_accessor in s_accessors: mat = result[s_name] for c in itertools.product(*coord): acc = mat[c] for e, cc in zip(query.edges, c): d[e.name] = e.domain.partitions[cc] val = s_accessor(d, c, frum) acc.add(val) else: # FASTER for d in filter(where, frum): for c, get_matches in edge_accessor: coord[c] = get_matches(d) for s_name, s_accessor in s_accessors: mat = result[s_name] for c in itertools.product(*coord): acc = mat[c] val = s_accessor(d, c, frum) acc.add(val) for s in select: # if s.aggregate == "count": # continue m = result[s.name] for c, var in m.items(): if var != None: m[c] = var.end() from pyLibrary.queries.containers.cube import Cube output = Cube(select, query.edges, result) return output
zip = ZIP_REQUEST if isinstance(url, unicode): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode("ascii") _to_ascii_dict(kwargs) timeout = kwargs[b'timeout'] = coalesce(kwargs.get(b'timeout'), default_timeout) if retry == None: retry = Dict(times=1, sleep=0) elif isinstance(retry, Number): retry = Dict(times=retry, sleep=1) else: retry = wrap(retry) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds set_default(retry, {"times": 1, "sleep": 0}) if b'json' in kwargs: kwargs[b'data'] = convert.value2json(kwargs[b'json']).encode("utf8") del kwargs[b'json'] try: headers = kwargs[b"headers"] = unwrap( coalesce(wrap(kwargs)[b"headers"], {})) set_default(headers, {b"accept-encoding": b"compress, gzip"}) if zip and len(coalesce(kwargs.get(b"data"))) > 1000: compressed = convert.bytes2zip(kwargs[b"data"])
def assign(source, destination): destination[name] = field.value(wrap(source)) return 0, None
seq = list(sequence) seq.append(row) output.append(seq) return elif new_filter is False: return seq = list(sequence) seq.append(row) for d in primary_branch[depth]: main(seq, new_filter, d, depth + 1) # OUTPUT for i, d in enumerate(data): if isinstance(d, Mapping): main([], esfilter, wrap(d), 0) else: Log.error("filter is expecting a dict, not {{type}}", type=d.__class__) # AT THIS POINT THE primary_column[] IS DETERMINED # USE IT TO EXPAND output TO ALL NESTED OBJECTS max = 0 # EVEN THOUGH A ROW CAN HAVE MANY VALUES, WE ONLY NEED UP TO max for i, n in enumerate(primary_nested): if n: max = i + 1 # OUTPUT IS A LIST OF ROWS, # WHERE EACH ROW IS A LIST OF VALUES SEEN DURING A WALK DOWN A PATH IN THE HIERARCHY uniform_output = DictList()
def get_instance_metadata(): output = wrap({ k.replace("-", "_"): v for k, v in boto_utils.get_instance_metadata().items() }) return output
{"a": "x", "t": Date("today-2day").unix, "v": 3}, {"a": "x", "t": Date("today-3day").unix, "v": 5}, {"a": "x", "t": Date("today-4day").unix, "v": 7}, {"a": "x", "t": Date("today-5day").unix, "v": 11}, {"a": "x", "t": NULL, "v": 27}, {"a": "y", "t": Date("today-day").unix, "v": 13}, {"a": "y", "t": Date("today-2day").unix, "v": 17}, {"a": "y", "t": Date("today-4day").unix, "v": 19}, {"a": "y", "t": Date("today-5day").unix, "v": 23} ] expected_list_1 = wrap([ {"t": (TODAY - WEEK).unix, "v": NULL}, {"t": (TODAY - 6 * DAY).unix, "v": NULL}, {"t": (TODAY - 5 * DAY).unix, "v": 34}, {"t": (TODAY - 4 * DAY).unix, "v": 26}, {"t": (TODAY - 3 * DAY).unix, "v": 5}, {"t": (TODAY - 2 * DAY).unix, "v": 20}, {"t": (TODAY - 1 * DAY).unix, "v": 15}, {"v": 29} ]) expected2 = wrap([ {"a": "x", "t": (TODAY - WEEK).unix, "v": NULL}, {"a": "x", "t": (TODAY - 6 * DAY).unix, "v": NULL}, {"a": "x", "t": (TODAY - 5 * DAY).unix, "v": 11}, {"a": "x", "t": (TODAY - 4 * DAY).unix, "v": 7}, {"a": "x", "t": (TODAY - 3 * DAY).unix, "v": 5}, {"a": "x", "t": (TODAY - 2 * DAY).unix, "v": 3}, {"a": "x", "t": (TODAY - 1 * DAY).unix, "v": 2}, {"a": "x", "v": 29}, {"a": "y", "t": (TODAY - WEEK).unix, "v": NULL},
def as_dict(self): output = wrap({s: getattr(self, s) for s in QueryOp.__slots__}) return output canonical_aggregates = wrap({ "count": { "name": "count", "default": 0 }, "min": { "name": "minimum" }, "max": { "name": "maximum" }, "add": { "name": "sum" }, "avg": { "name": "average" }, "mean": { "name": "average" }, }) def _normalize_selects( selects, frum,
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = DictList() self.primitive = True # True IF DOMAIN IS A PRIMITIVE VALUE SET if isinstance(self.key, set): Log.error("problem") if not desc.key and isinstance(desc.partitions[0], (basestring, Number)): # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.map = {} self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i self.label = coalesce(self.label, "name") self.primitive = True return if desc.partitions and desc.dimension.fields and len( desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and isinstance(desc.key, (list, set)): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping): self.key = desc.key self.map = UniqueIndex(keys=desc.key) # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) # self.map = UniqueIndex(keys=self.key) elif len(desc.partitions) == 0: # CREATE AN EMPTY DOMAIN self.key = "value" self.map = {} self.order[None] = 0 self.label = coalesce(self.label, "name") return elif desc.key == None: if desc.partitions and len(set(desc.partitions.value)) == len( desc.partitions): # TRY A COMMON KEY CALLED "value". IT APPEARS UNIQUE self.key = "value" self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False else: Log.error("Domains must have keys") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i self.primitive = False elif all(p.esfilter for p in self.partitions): # EVERY PART HAS AN esfilter DEFINED, SO USE THEM for i, p in enumerate(self.partitions): p.dataIndex = i else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name") if hasattr(desc.partitions, "__iter__"): self.partitions = list(desc.partitions) else: Log.error("expecting a list of partitions")
def __init__(self, **desc): desc = wrap(desc) self._set_slots_to_none(self.__class__) set_default(self, desc) self.name = coalesce(desc.name, desc.type) self.isFacet = coalesce(desc.isFacet, False)
def _normalize_groupby(groupby, schema=None): if groupby == None: return None return wrap( [_normalize_group(e, schema=schema) for e in listwrap(groupby)])
def buildCondition(mvel, edge, partition): """ RETURN AN ES FILTER OBJECT """ output = {} if edge.domain.isFacet: # MUST USE THIS' esFacet condition = wrap(coalesce(partition.where, {"and": []})) if partition.min and partition.max and is_keyword(edge.value): condition["and"].append({ "range": { edge.value: { "gte": partition.min, "lt": partition.max } } }) # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT) return simplify_esfilter(condition) elif edge.range: # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN # USE MVEL CODE if edge.domain.type in domains.ALGEBRAIC: output = {"and": []} if edge.range.mode and edge.range.mode == "inclusive": # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE if is_keyword(edge.range.min): output["and"].append({ "range": { edge.range.min: { "lt": es09.expressions.value2value(partition.max) } } }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)) } }) if is_keyword(edge.range.max): output["and"].append({ "or": [{ "missing": { "field": edge.range.max } }, { "range": { edge.range.max, { "gt": es09.expressions.value2value(partition.min) } } }] }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( edge.range.max + " > " + es09.expressions.value2MVEL(partition.min)) } }) else: # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE if is_keyword(edge.range.min): output["and"].append({ "range": { edge.range.min: { "lte": es09.expressions.value2value(partition.min) } } }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)) } }) if is_keyword(edge.range.max): output["and"].append({ "or": [{ "missing": { "field": edge.range.max } }, { "range": { edge.range.max, { "gte": es09.expressions.value2value(partition.min) } } }] }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max) } }) return output else: Log.error( "Do not know how to handle range query on non-continuous domain" ) elif not edge.value: # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE) return partition.esfilter elif is_keyword(edge.value): # USE FAST ES SYNTAX if edge.domain.type in domains.ALGEBRAIC: output.range = {} output.range[edge.value] = { "gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max) } elif edge.domain.type == "set": if partition.value: if partition.value != edge.domain.getKey(partition): Log.error( "please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former" ) # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS output.term = {edge.value: partition.value} else: output.term = {edge.value: edge.domain.getKey(partition)} elif edge.domain.type == "default": output.term = dict() output.term[edge.value] = partition.value else: Log.error("Edge \"" + edge.name + "\" is not supported") return output else: # USE MVEL CODE if edge.domain.type in domains.ALGEBRAIC: output.script = { "script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max) } else: output.script = { "script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value) } code = es09.expressions.addFunctions(output.script.script) output.script.script = code.head + code.body return output