def _delayed_imports(): global _ListContainer global _meta global _containers from pyLibrary.queries import meta as _meta from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer from pyLibrary.queries import containers as _containers _ = _ListContainer _ = _meta _ = _containers try: from pyLibrary.queries.jx_usingMySQL import MySQL except Exception: MySQL = None from pyLibrary.queries.jx_usingES import FromES from pyLibrary.queries.meta import FromESMetadata set_default(_containers.type2container, { "elasticsearch": FromES, "mysql": MySQL, "memory": None, "meta": FromESMetadata })
def _range_composer(edge, domain, es_query, to_float): # USE RANGES _min = coalesce(domain.min, MAX(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) if isinstance(edge.value, Variable): calc = {"field": edge.value.var} else: calc = {"script_field": edge.value.to_ruby()} if edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER missing_filter = set_default( {"filter": {"or": [ OrOp("or", [ InequalityOp("lt", [edge.value, Literal(None, to_float(_min))]), InequalityOp("gte", [edge.value, Literal(None, to_float(_max))]), ]).to_esfilter(), edge.value.missing().to_esfilter() ]}}, es_query ) else: missing_filter = None return wrap({"aggs": { "_match": set_default( {"range": calc}, {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, es_query ), "_missing": missing_filter }})
def __init__(self, **desc): desc = wrap(desc) self._set_slots_to_null(self.__class__) set_default(self, desc) self.name = coalesce(desc.name, desc.type) self.isFacet = coalesce(desc.isFacet, False) self.dimension = Null
def append_query(self, es_query, start): self.start = start parts = self.edge.domain.partitions filters = [] notty = [] for p in parts: filters.append(AndOp("and", [p.where]+notty).to_esfilter()) notty.append(NotOp("not", p.where)) missing_filter = None if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER missing_filter = set_default( {"filter": AndOp("and", notty).to_esfilter()}, es_query ) return wrap({"aggs": { "_match": set_default( {"filters": {"filters": filters}}, es_query ), "_missing": missing_filter }})
def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): script_field = self.edge.value.to_ruby() missing = self.edge.value.missing() output = wrap({"aggs": { "_match": set_default( {"terms": { "script_field": script_field, "size": self.domain.limit }}, es_query ), "_missing": set_default({"filter": missing.to_esfilter()}, es_query) if missing else None }}) return output output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.edge.value.var, "size": self.domain.limit }}, es_query ), "_missing": set_default({"missing": {"field": self.edge.value}}, es_query) # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER }}) return output
def _delayed_imports(): global type2container global _ListContainer global _Cube global _run global _Query global _Normal try: from pyLibrary.queries.jx_usingMySQL import MySQL as _MySQL except Exception: _MySQL = None from pyLibrary.queries.jx_usingES import FromES as _FromES from pyLibrary.queries.containers.list_usingPythonList import ListContainer as _ListContainer from pyLibrary.queries.containers.cube import Cube as _Cube from pyLibrary.queries.jx import run as _run from pyLibrary.queries.query import QueryOp as _Query from pyLibrary.queries.containers.list_usingSQLite import Table_usingSQLite set_default(type2container, { "elasticsearch": _FromES, "mysql": _MySQL, "sqlite": Table_usingSQLite, "memory": None }) _ = _run _ = _Query _ = _Normal
def es_setop(es, query): es_query, filters = es14.util.es_query_template(query.frum.name) set_default(filters[0], simplify_esfilter(query.where.to_esfilter())) es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT) es_query.sort = jx_sort_to_es_sort(query.sort) es_query.fields = FlatList() return extract_rows(es, es_query, query)
def _convert_clause(self, clause): """ JSON QUERY EXPRESSIONS HAVE MANY CLAUSES WITH SIMILAR COLUMN DELCARATIONS """ if clause == None: return None elif isinstance(clause, Mapping): return set_default({"value": self.convert(clause["value"])}, clause) else: return [set_default({"value": self.convert(c.value)}, c) for c in clause]
def warning( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template, params, cause, trace) Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1 )
def send(self, topic, message): """Publishes a pulse message to the proper exchange.""" if not message: Log.error("Expecting a message") message._prepare() if not self.connection: self.connect() producer = Producer( channel=self.connection, exchange=Exchange(self.settings.exchange, type='topic'), routing_key=topic ) # The message is actually a simple envelope format with a payload and # some metadata. final_data = Data( payload=message.data, _meta=set_default({ 'exchange': self.settings.exchange, 'routing_key': message.routing_key, 'serializer': self.settings.serializer, 'sent': time_to_string(datetime.datetime.now(timezone(self.settings.broker_timezone))), 'count': self.count }, message.metadata) ) producer.publish(jsons.scrub(final_data), serializer=self.settings.serializer) self.count += 1
def alarm( cls, template, default_params={}, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ # USE replace() AS POOR MAN'S CHILD TEMPLATE template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80) Log.note( template, default_params=default_params, stack_depth=stack_depth + 1, log_context=set_default({"context": exceptions.ALARM}, log_context), **more_params )
def unexpected( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) if cause and not isinstance(cause, Except): cause = Except(exceptions.UNEXPECTED, unicode(cause), trace=exceptions._extract_traceback(0)) trace = exceptions.extract_stack(1) e = Except(exceptions.UNEXPECTED, template, params, cause, trace) Log.note( "{{error}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1 )
def query(self, _query): return self.meta.columns.query(QueryOp(set_default( { "from": self.meta.columns, "sort": ["table", "name"] }, _query.as_dict() )))
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if isinstance(node, Mapping): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise _Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = pyDots.get_attr(new_value, ref.fragment) if DEBUG: _Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value else: output = unwrap(set_default(output, new_value)) if DEBUG: _Log.note("Return {{output}}", output=output) return output elif isinstance(node, list): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def __init__(self, type=ERROR, template=Null, params=Null, cause=Null, trace=Null, **kwargs): Exception.__init__(self) self.type = type self.template = template self.params = set_default(kwargs, params) self.cause = cause if not trace: self.trace=extract_stack(2) else: self.trace = trace
def append_query(self, es_query, start): #TODO: USE "reverse_nested" QUERY TO PULL THESE self.start = start for i, v in enumerate(self.fields): nest = wrap({"aggs": { "_match": set_default({"terms": { "field": v, "size": self.domain.limit }}, es_query) }}) if self.edge.allowNulls: nest.aggs._missing = set_default({"missing": {"field": v}}, es_query) # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER es_query = nest if self.domain.where: filter = simplify_esfilter(self.domain.where) es_query = {"aggs": {"_filter": set_default({"filter": filter}, es_query)}} return es_query
def get_schema(self, retry=True): if self.settings.explore_metadata: indices = self.cluster.get_metadata().indices if not self.settings.alias or self.settings.alias==self.settings.index: #PARTIALLY DEFINED settings candidates = [(name, i) for name, i in indices.items() if self.settings.index in i.aliases] # TODO: MERGE THE mappings OF ALL candidates, DO NOT JUST PICK THE LAST ONE index = "dummy value" schema = wrap({"_routing": {}, "properties": {}}) for _, ind in jx.sort(candidates, {"value": 0, "sort": -1}): mapping = ind.mappings[self.settings.type] set_default(schema._routing, mapping._routing) schema.properties = _merge_mapping(schema.properties, mapping.properties) else: #FULLY DEFINED settings index = indices[self.settings.index] schema = index.mappings[self.settings.type] if index == None and retry: #TRY AGAIN, JUST IN CASE self.cluster.cluster_state = None return self.get_schema(retry=False) #TODO: REMOVE THIS BUG CORRECTION if not schema and self.settings.type == "test_result": schema = index.mappings["test_results"] # DONE BUG CORRECTION if not schema: Log.error( "ElasticSearch index ({{index}}) does not have type ({{type}})", index=self.settings.index, type=self.settings.type ) return schema else: mapping = self.cluster.get(self.path + "/_mapping") if not mapping[self.settings.type]: Log.error("{{index}} does not have type {{type}}", self.settings) return wrap({"mappings": mapping[self.settings.type]})
def append_query(self, es_query, start): self.start = start domain = self.domain field = self.edge.value if isinstance(field, Variable): key = domain.key if isinstance(key, (tuple, list)) and len(key)==1: key = key[0] include = [p[key] for p in domain.partitions] if self.edge.allowNulls: return wrap({"aggs": { "_match": set_default({"terms": { "field": field.var, "size": self.limit, "include": include }}, es_query), "_missing": set_default( {"filter": {"or": [ field.missing().to_esfilter(), {"not": {"terms": {field.var: include}}} ]}}, es_query ), }}) else: return wrap({"aggs": { "_match": set_default({"terms": { "field": field.var, "size": self.limit, "include": include }}, es_query) }}) else: include = [p[domain.key] for p in domain.partitions] if self.edge.allowNulls: return wrap({"aggs": { "_match": set_default({"terms": { "script_field": field.to_ruby(), "size": self.limit, "include": include }}, es_query), "_missing": set_default( {"filter": {"or": [ field.missing().to_esfilter(), NotOp("not", InOp("in", [field, Literal("literal", include)])).to_esfilter() ]}}, es_query ), }}) else: return wrap({"aggs": { "_match": set_default({"terms": { "script_field": field.to_ruby(), "size": self.limit, "include": include }}, es_query) }})
def wrap_from(frum, schema=None): """ :param frum: :param schema: :return: """ if not _containers: _delayed_imports() frum = wrap(frum) if isinstance(frum, basestring): if not _containers.config.default.settings: Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info") type_ = None index = frum if frum.startswith("meta."): if frum == "meta.columns": return _meta.singlton.meta.columns elif frum == "meta.tables": return _meta.singlton.meta.tables else: Log.error("{{name}} not a recognized table", name=frum) else: type_ = _containers.config.default.type index = join_field(split_field(frum)[:1:]) settings = set_default( { "index": index, "name": frum }, _containers.config.default.settings ) settings.type = None return _containers.type2container[type_](settings) elif isinstance(frum, Mapping) and frum.type and _containers.type2container[frum.type]: # TODO: Ensure the frum.name is set, so we capture the deep queries if not frum.type: Log.error("Expecting from clause to have a 'type' property") return _containers.type2container[frum.type](frum.settings) elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))): from pyLibrary.queries.query import QueryOp return QueryOp.wrap(frum, schema=schema) elif isinstance(frum, (list, set)): return _ListContainer("test_list", frum) else: return frum
def _convert_edge(self, edge): dim = self.dimensions[edge.value] if not dim: return edge if len(listwrap(dim.fields)) == 1: #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT new_edge = set_default({"value": unwraplist(dim.fields)}, edge) return new_edge new_edge.domain = dim.getDomain() edge = copy(edge) edge.value = None edge.domain = dim.getDomain() return edge
def _replace_locals(node, doc_path): if isinstance(node, Mapping): # RECURS, DEEP COPY ref = None output = {} for k, v in node.items(): if k == "$ref": ref = v elif v == None: continue else: output[k] = _replace_locals(v, [v] + doc_path) if not ref: return output # REFER TO SELF frag = ref.fragment if frag[0] == ".": # RELATIVE for i, p in enumerate(frag): if p != ".": if i>len(doc_path): _Log.error("{{frag|quote}} reaches up past the root document", frag=frag) new_value = pyDots.get_attr(doc_path[i-1], frag[i::]) break else: new_value = doc_path[len(frag) - 1] else: # ABSOLUTE new_value = pyDots.get_attr(doc_path[-1], frag) new_value = _replace_locals(new_value, [new_value] + doc_path) if not output: return new_value # OPTIMIZATION FOR CASE WHEN node IS {} else: return unwrap(set_default(output, new_value)) elif isinstance(node, list): candidate = [_replace_locals(n, [n] + doc_path) for n in node] # if all(p[0] is p[1] for p in zip(candidate, node)): # return node return candidate return node
def index(data, keys=None): # return dict that uses keys to index data o = Index(keys) if isinstance(data, Cube): if data.edges[0].name==keys[0]: #QUICK PATH names = list(data.data.keys()) for d in (set_default(pyDots.zip(names, r), {keys[0]: p}) for r, p in zip(zip(*data.data.values()), data.edges[0].domain.partitions.value)): o.add(d) return o else: Log.error("Can not handle indexing cubes at this time") for d in data: o.add(d) return o
def note( cls, template, default_params={}, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if len(template) > 10000: template = template[:10000] params = dict(unwrap(default_params), **more_params) log_params = set_default({ "template": template, "params": params, "timestamp": datetime.utcnow(), "machine": machine_metadata }, log_context, {"context": exceptions.NOTE}) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if cls.trace: log_template = "{{machine.name}} (pid{{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.") f = sys._getframe(stack_depth + 1) log_params.location = { "line": f.f_lineno, "file": f.f_code.co_filename.split(os.sep)[-1], "method": f.f_code.co_name } thread = _Thread.current() log_params.thread = {"name": thread.name, "id": thread.id} else: log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") cls.main_log.write(log_template, log_params)
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8'))
def append_query(self, es_query, start): self.start = start edge = self.edge range = edge.range domain = edge.domain aggs = {} for i, p in enumerate(domain.partitions): filter_ = AndOp("and", [ InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]), InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))]) ]) aggs["_join_" + unicode(i)] = set_default( {"filter": filter_.to_esfilter()}, es_query ) return wrap({"aggs": aggs})
def map_edge(e, map_): partitions = unwraplist([ set_default( {"where": p.where.map(map_)}, p ) for p in e.domain.partitions ]) domain = copy(e.domain) domain.where = e.domain.where.map(map_) domain.partitions = partitions edge = copy(e) edge.value = e.value.map(map_) edge.domain = domain if e.range: edge.range.min = e.range.min.map(map_) edge.range.max = e.range.max.map(map_) return edge
def new_instance(settings): """ MAKE A PYTHON INSTANCE `settings` HAS ALL THE `kwargs`, PLUS `class` ATTRIBUTE TO INDICATE THE CLASS TO CREATE """ settings = set_default({}, settings) if not settings["class"]: Log.error("Expecting 'class' attribute with fully qualified class name") # IMPORT MODULE FOR HANDLER path = settings["class"].split(".") class_name = path[-1] path = ".".join(path[:-1]) constructor = None try: temp = __import__(path, globals(), locals(), [class_name], -1) constructor = object.__getattribute__(temp, class_name) except Exception, e: Log.error("Can not find class {{class}}", {"class": path}, cause=e)
def new_instance(type, frum, schema=None): """ Factory! """ if not type2container: _delayed_imports() if isinstance(frum, Container): return frum elif isinstance(frum, _Cube): return frum elif isinstance(frum, _Query): return _run(frum) elif isinstance(frum, (list, set, GeneratorType)): return _ListContainer(frum) elif isinstance(frum, basestring): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info") settings = set_default( { "index": join_field(split_field(frum)[:1:]), "name": frum, }, config.default.settings ) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) elif isinstance(frum, Mapping): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) elif frum["from"]: frum = copy(frum) frum["from"] = Container(frum["from"]) return _Query.wrap(frum) else: Log.error("Do not know how to handle {{frum|json}}", frum=frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
def alert( cls, template, default_params={}, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ return Log.alarm( template, default_params=default_params, stack_depth=stack_depth + 1, log_context=set_default({"context": exceptions.ALARM}, log_context), **more_params )
def __new__(cls, e=None, query=None, *args, **kwargs): e.allowNulls = coalesce(e.allowNulls, True) if e.value and e.domain.type == "default": if query.groupby: return object.__new__(DefaultDecoder, e) if isinstance(e.value, basestring): Log.error("Expecting Variable or Expression, not plain string") if isinstance(e.value, TupleOp): # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields # JUST PULL THE FIELDS if not all(isinstance(t, Variable) for t in e.value.terms): Log.error("Can only handle variables in tuples") e.domain = Data( dimension={"fields":e.value.terms} ) return object.__new__(DimFieldListDecoder, e) elif isinstance(e.value, Variable): cols = query.frum.get_columns() col = cols.filter(lambda c: c.name == e.value.var)[0] if not col: return object.__new__(DefaultDecoder, e) limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT) if col.partitions != None: e.domain = SimpleSetDomain(partitions=col.partitions[:limit:]) else: e.domain = set_default(DefaultDomain(limit=limit), e.domain.as_dict()) return object.__new__(DefaultDecoder, e) else: return object.__new__(DefaultDecoder, e) if e.value and e.domain.type in PARTITION: return object.__new__(SetDecoder, e) if isinstance(e.domain.dimension, Dimension): e.domain = e.domain.dimension.getDomain() return object.__new__(SetDecoder, e) if e.value and e.domain.type == "time": return object.__new__(TimeDecoder, e) if e.range: return object.__new__(GeneralRangeDecoder, e) if e.value and e.domain.type == "duration": return object.__new__(DurationDecoder, e) elif e.value and e.domain.type == "range": return object.__new__(RangeDecoder, e) elif not e.value and e.domain.dimension.fields: # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields # JUST PULL THE FIELDS fields = e.domain.dimension.fields if isinstance(fields, Mapping): Log.error("No longer allowed: All objects are expressions") else: return object.__new__(DimFieldListDecoder, e) elif not e.value and all(e.domain.partitions.where): return object.__new__(GeneralSetDecoder, e) else: Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)