def get_treeherder_job(self): try: with Timer("Process Request"): args = Dict(**flask.request.args) # IS THE branch/revision PENDING? result = self.get_markup(unwraplist(args.branch), unwraplist(args.revision), unwraplist(args.task_id), unwraplist(args.buildername), unwraplist(args.timestamp)) response_data = convert.unicode2utf8( convert.value2json(result)) return Response(response_data, status=200, headers={ "access-control-allow-origin": "*", "content-type": "text/plain" }) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response(convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" })
def format_list(T, select, query=None): data = [] if isinstance(query.select, list): for row in T: r = Dict() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) elif isinstance(query.select.value, LeavesOp): for row in T: r = Dict() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) else: for row in T: r = Dict() for s in select: r[s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) return Dict( meta={"format": "list"}, data=data )
def warning( cls, template, default_params={}, cause=None, stack_depth=0, # stack trace offset (==1 if you do not want to report self) **more_params ): if isinstance(default_params, BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = extract_stack(stack_depth + 1) e = Except(WARNING, template, params, cause, trace) Log.note( unicode(e), { "warning": {# REDUNDANT INFO "template": template, "params": params, "cause": cause, "trace": trace } }, stack_depth=stack_depth + 1 )
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e
def _get_schema_from_list(frum, columns, prefix, nested_path): """ SCAN THE LIST FOR COLUMN TYPES """ names = {} for d in frum: for name, value in d.items(): agg_type = names.get(name, "undefined") this_type = _type_to_name[value.__class__] new_type = _merge_type[agg_type][this_type] names[name] = new_type if this_type == "object": _get_schema_from_list([value], columns, prefix + [name], nested_path) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist([".".join((np[0], name))]+np) _get_schema_from_list(value, columns, prefix + [name], newpath) for n, t in names.items(): full_name = ".".join(prefix + [n]) column = Column( table=".", name=full_name, abs_name=full_name, type=t, nested_path=nested_path ) columns[column.name] = column
def warning( cls, template, default_params={}, cause=None, stack_depth=0, # stack trace offset (==1 if you do not want to report self) **more_params): if isinstance(default_params, BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = extract_stack(stack_depth + 1) e = Except(WARNING, template, params, cause, trace) Log.note( unicode(e), { "warning": { # REDUNDANT INFO "template": template, "params": params, "cause": cause, "trace": trace } }, stack_depth=stack_depth + 1)
def format_table(T, select, query=None): data = [] num_columns = (Math.MAX(select.put.index) + 1) for row in T: r = [None] * num_columns for s in select: value = unwraplist(row[s.pull]) if value == None: continue index, child = s.put.index, s.put.child if child == ".": r[index] = value else: if r[index] is None: r[index] = Dict() r[index][child] = value data.append(r) header = [None] * num_columns for s in select: if header[s.put.index]: continue header[s.put.index] = s.name return Dict( meta={"format": "table"}, header=header, data=data )
def format_list(T, select, source): data = [] for row in T: r = Dict() for s in select: if s.value == ".": r[s.name] = row[source] else: if source == "_source": r[s.name] = unwraplist(row[source][s.value]) elif isinstance(s.value, basestring): # fields r[s.name] = unwraplist(row[source][literal_field(s.value)]) else: r[s.name] = unwraplist(row[source][literal_field(s.name)]) data.append(r) return Dict(meta={"format": "list"}, data=data)
def warning(cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template, params, cause, trace) Log.note("{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1)
def _get_schema_from_list(frum, columns, prefix, nested_path): """ SCAN THE LIST FOR COLUMN TYPES """ names = {} for d in frum: for name, value in d.items(): agg_type = names.get(name, "undefined") this_type = _type_to_name[value.__class__] new_type = _merge_type[agg_type][this_type] names[name] = new_type if this_type == "object": _get_schema_from_list([value], columns, prefix + [name], nested_path) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist([".".join((np[0], name))] + np) _get_schema_from_list(value, columns, prefix + [name], newpath) for n, t in names.items(): full_name = ".".join(prefix + [n]) column = Column(name=full_name, table=".", es_column=full_name, es_index=".", type=t, nested_path=nested_path) columns[column.name] = column
def _scrub(record, name): value = record[name] record[name] = None if value == "-" or value == "": return None else: return unwraplist(value)
def _normalize_edge(edge, schema=None): if not _Column: _late_import() if edge == None: Log.error("Edge has no value, or expression is empty") elif isinstance(edge, basestring): if schema: try: e = schema[edge] except Exception, e: e = None e = unwraplist(e) if e and not isinstance(e, (_Column, set, list)): if isinstance(e, _Column): return Dict(name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(domain=e, schema=schema)) elif isinstance(e.fields, list) and len(e.fields) == 1: return Dict(name=e.name, value=jx_expression(e.fields[0]), allowNulls=True, domain=e.getDomain()) else: return Dict(name=e.name, allowNulls=True, domain=e.getDomain()) return Dict(name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(schema=schema))
def list2cube(rows, column_names=None): if column_names: keys = column_names else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) data = {k: [] for k in keys} output = wrap({ "meta": {"format": "cube"}, "edges": [ { "name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1} } ], "data": data }) for r in rows: for k in keys: data[k].append(unwraplist(r[k])) return output
def warning( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template, params, cause, trace) Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1 )
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]) trace = extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(ERROR, template, params, cause, trace) raise e
def get_decoders_by_depth(query): """ RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH """ schema = query.frum output = DictList() for e in wrap(coalesce(query.edges, query.groupby, [])): if e.value != None and not isinstance(e.value, NullOp): e = e.copy() vars_ = e.value.vars() for v in vars_: if not schema[v]: Log.error("{{var}} does not exist in schema", var=v) e.value = e.value.map({schema[v].name: schema[v].es_column for v in vars_}) elif e.range: e = e.copy() min_ = e.range.min max_ = e.range.max vars_ = min_.vars() | max_.vars() for v in vars_: if not schema[v]: Log.error("{{var}} does not exist in schema", var=v) map_ = {schema[v].name: schema[v].es_column for v in vars_} e.range = { "min": min_.map(map_), "max": max_.map(map_) } elif e.domain.dimension: vars_ = e.domain.dimension.fields e.domain.dimension = e.domain.dimension.copy() e.domain.dimension.fields = [schema[v].es_column for v in vars_] elif all(e.domain.partitions.where): vars_ = set() for p in e.domain.partitions: vars_ |= p.where.vars() try: depths = set(len(schema[v].nested_path)-1 for v in vars_) if -1 in depths: Log.error( "Do not know of column {{column}}", column=unwraplist([v for v in vars_ if schema[v]==None]) ) if len(depths) > 1: Log.error("expression {{expr}} spans tables, can not handle", expr=e.value) max_depth = Math.MAX(depths) while len(output) <= max_depth: output.append([]) except Exception, e: # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY max_depth = 0 output.append([]) limit = 0 output[max_depth].append(AggsDecoder(e, query, limit))
def to_dict(self): return wrap({ "meta": { "format": "list" }, "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] })
def format_cube(T, select, source): matricies = {} for s in select: try: if s.value == ".": matricies[s.name] = Matrix.wrap(T.select(source)) elif isinstance(s.value, list): matricies[s.name] = Matrix.wrap([tuple(unwraplist(t[source][ss]) for ss in s.value) for t in T]) else: if source == "_source": matricies[s.name] = Matrix.wrap([unwraplist(t[source][s.value]) for t in T]) elif isinstance(s.value, basestring): # fields matricies[s.name] = Matrix.wrap([unwraplist(t[source].get(s.value)) for t in T]) else: matricies[s.name] = Matrix.wrap([unwraplist(t[source].get(s.name)) for t in T]) except Exception, e: Log.error("", e)
def format_list(T, select, source): data = [] for row in T: r = Dict() for s in select: if s.value == ".": r[s.name] = row[source] else: if source=="_source": r[s.name] = unwraplist(row[source][s.value]) elif isinstance(s.value, basestring): # fields r[s.name] = unwraplist(row[source][literal_field(s.value)]) else: r[s.name] = unwraplist(row[source][literal_field(s.name)]) data.append(r) return Dict( meta={"format": "list"}, data=data )
def format_list(T, select, query=None): data = [] if isinstance(query.select, list) or (isinstance(query.select.value, basestring) and query.select.value.endswith("*")): for row in T: r = Dict() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) else: for row in T: r = Dict() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) return Dict( meta={"format": "list"}, data=data )
def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column): """ SCAN THE LIST FOR COLUMN TYPES """ for d in frum: row_type = _type_to_name[d.__class__] if row_type != "object": full_name = join_field(prefix) column = name_to_column.get(full_name) if not column: column = Column( name=full_name, table=".", es_column=full_name, es_index=".", type="undefined", nested_path=nested_path ) columns[full_name] = column column.type = _merge_type[column.type][row_type] else: for name, value in d.items(): full_name = join_field(prefix + [name]) column = name_to_column.get(full_name) if not column: column = Column( name=full_name, table=".", es_column=full_name, es_index=".", type="undefined", nested_path=nested_path ) columns[full_name] = column if isinstance(value, list): if len(value)==0: this_type = "undefined" elif len(value)==1: this_type = _type_to_name[value[0].__class__] else: this_type = _type_to_name[value[0].__class__] if this_type == "object": this_type = "nested" else: this_type = _type_to_name[value.__class__] new_type = _merge_type[column.type][this_type] column.type = new_type if this_type == "object": _get_schema_from_list([value], columns, prefix + [name], nested_path, name_to_column) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist([join_field(split_field(np[0])+[name])]+np) _get_schema_from_list(value, columns, prefix + [name], newpath, name_to_column)
def _get_schema_from_list(frum, columns, prefix, nested_path, name_to_column): """ SCAN THE LIST FOR COLUMN TYPES """ for d in frum: row_type = _type_to_name[d.__class__] if row_type != "object": full_name = join_field(prefix) column = name_to_column.get(full_name) if not column: column = Column(name=full_name, table=".", es_column=full_name, es_index=".", type="undefined", nested_path=nested_path) columns[full_name] = column column.type = _merge_type[column.type][row_type] else: for name, value in d.items(): full_name = join_field(prefix + [name]) column = name_to_column.get(full_name) if not column: column = Column(name=full_name, table=".", es_column=full_name, es_index=".", type="undefined", nested_path=nested_path) columns[full_name] = column if isinstance(value, list): if len(value) == 0: this_type = "undefined" elif len(value) == 1: this_type = _type_to_name[value[0].__class__] else: this_type = _type_to_name[value[0].__class__] if this_type == "object": this_type = "nested" else: this_type = _type_to_name[value.__class__] new_type = _merge_type[column.type][this_type] column.type = new_type if this_type == "object": _get_schema_from_list([value], columns, prefix + [name], nested_path, name_to_column) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist( [join_field(split_field(np[0]) + [name])] + np) _get_schema_from_list(value, columns, prefix + [name], newpath, name_to_column)
def format_table(T, select, source): header = [s.name for s in select] map = {s.name: i for i, s in enumerate(select)} # MAP FROM name TO COLUMN INDEX data = [] for row in T: r = [None] * len(header) for s in select: if s.value == ".": r[map[s.name]] = row[source] else: if source == "_source": r[map[s.name]] = unwraplist(row[source][s.value]) elif isinstance(s.value, basestring): # fields r[map[s.name]] = unwraplist(row[source][literal_field( s.value)]) else: r[map[s.name]] = unwraplist(row[source][literal_field( s.name)]) data.append(r) return Dict(meta={"format": "table"}, header=header, data=data)
def list2table(rows, column_names=None): if column_names: keys = list(set(column_names)) else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) output = [[unwraplist(r[k]) for k in keys] for r in rows] return wrap({"meta": {"format": "table"}, "header": keys, "data": output})
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_schema() # GET IDS OF DOCUMENTS results = self._es.search( { "fields": listwrap(schema._routing.path), "query": { "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()} }, "size": 200000, } ) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = DictList() for k, v in command.set.items(): if not is_keyword(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append( { "update": { "_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]), } } ) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8") response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"} ) if response.errors: Log.error( "could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)], )
def format_table(T, select, source): header = [s.name for s in select] map = {s.name: i for i, s in enumerate(select)} # MAP FROM name TO COLUMN INDEX data = [] for row in T: r = [None] * len(header) for s in select: if s.value == ".": r[map[s.name]] = row[source] else: if source == "_source": r[map[s.name]] = unwraplist(row[source][s.value]) elif isinstance(s.value, basestring): # fields r[map[s.name]] = unwraplist(row[source][literal_field(s.value)]) else: r[map[s.name]] = unwraplist(row[source][literal_field(s.name)]) data.append(r) return Dict( meta={"format": "table"}, header=header, data=data )
def format_list(T, select, query=None): data = [] if isinstance(query.select, list): for row in T: r = Dict() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) elif isinstance(query.select.value, LeavesOp): for row in T: r = Dict() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) else: for row in T: r = Dict() for s in select: r[s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) return Dict(meta={"format": "list"}, data=data)
def map_edge(e, map_): partitions = unwraplist([set_default({"where": p.where.map(map_)}, p) for p in e.domain.partitions]) domain = copy(e.domain) domain.where = e.domain.where.map(map_) domain.partitions = partitions edge = copy(e) edge.value = e.value.map(map_) edge.domain = domain if e.range: edge.range.min = e.range.min.map(map_) edge.range.max = e.range.max.map(map_) return edge
def get_treeherder_job(self): try: with Timer("Process Request"): args = Dict(**flask.request.args) # IS THE branch/revision PENDING? result = self.get_markup( unwraplist(args.branch), unwraplist(args.revision), unwraplist(args.task_id), unwraplist(args.buildername), unwraplist(args.timestamp) ) response_data = convert.unicode2utf8(convert.value2json(result)) return Response( response_data, status=200, headers={ "access-control-allow-origin": "*", "content-type": "text/plain" } ) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response( convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } )
def _convert_edge(self, edge): dim = self.dimensions[edge.value] if not dim: return edge if len(listwrap(dim.fields)) == 1: #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT new_edge = set_default({"value": unwraplist(dim.fields)}, edge) return new_edge new_edge.domain = dim.getDomain() edge = copy(edge) edge.value = None edge.domain = dim.getDomain() return edge
def format_cube(T, select, source): matricies = {} for s in select: try: if s.value == ".": matricies[s.name] = Matrix.wrap(T.select(source)) elif isinstance(s.value, list): matricies[s.name] = Matrix.wrap([ tuple(unwraplist(t[source][ss]) for ss in s.value) for t in T ]) else: if source == "_source": matricies[s.name] = Matrix.wrap( [unwraplist(t[source][s.value]) for t in T]) elif isinstance(s.value, basestring): # fields matricies[s.name] = Matrix.wrap( [unwraplist(t[source].get(s.value)) for t in T]) else: matricies[s.name] = Matrix.wrap( [unwraplist(t[source].get(s.name)) for t in T]) except Exception, e: Log.error("", e)
def list2table(rows, column_names=None): if column_names: keys = list(set(column_names)) else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) output = [[unwraplist(r.get(k)) for k in keys] for r in rows] return wrap({ "meta": {"format": "table"}, "header": keys, "data": output })
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode try: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) except Exception: pass cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8'))
def map_edge(e, map_): partitions = unwraplist([ set_default({"where": p.where.map(map_)}, p) for p in e.domain.partitions ]) domain = copy(e.domain) domain.where = e.domain.where.map(map_) domain.partitions = partitions edge = copy(e) edge.value = e.value.map(map_) edge.domain = domain if e.range: edge.range.min = e.range.min.map(map_) edge.range.max = e.range.max.map(map_) return edge
def wrap(cls, e, stack_depth=0): if e == None: return Null elif isinstance(e, (list, Except)): return e elif isinstance(e, Mapping): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: if hasattr(e, "message") and e.message: cause = Except(ERROR, unicode(e.message), trace=_extract_traceback(0)) else: cause = Except(ERROR, unicode(e), trace=_extract_traceback(0)) trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() cause.trace.extend(trace) return cause
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8'))
def _get_schema_from_list(frum, columns, prefix, nested_path): """ SCAN THE LIST FOR COLUMN TYPES """ names = {} for d in frum: row_type = _type_to_name[d.__class__] if row_type!="object": agg_type = names.get(".", "undefined") names["."] = _merge_type[agg_type][row_type] else: for name, value in d.items(): agg_type = names.get(name, "undefined") if isinstance(value, list): if len(value)==0: this_type = "undefined" else: this_type=_type_to_name[value[0].__class__] if this_type=="object": this_type="nested" else: this_type = _type_to_name[value.__class__] new_type = _merge_type[agg_type][this_type] names[name] = new_type if this_type == "object": _get_schema_from_list([value], columns, prefix + [name], nested_path) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist([join_field(split_field(np[0])+[name])]+np) _get_schema_from_list(value, columns, prefix + [name], newpath) for n, t in names.items(): full_name = ".".join(prefix + [n]) column = Column( name=full_name, table=".", es_column=full_name, es_index=".", type=t, nested_path=nested_path ) columns.append(column)
def wrap(cls, e, stack_depth=0): if e == None: return Null elif isinstance(e, (list, Except)): return e elif isinstance(e, Mapping): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: if hasattr(e, "message") and e.message: cause = Except(ERROR, unicode(e.message), trace=_extract_traceback(0)) else: cause = Except(ERROR, unicode(e), trace=_extract_traceback(0)) trace = extract_stack( stack_depth + 2 ) # +2 = to remove the caller, and it's call to this' Except.wrap() cause.trace.extend(trace) return cause
def _normalize_edge(edge, schema=None): if not _Column: _late_import() if edge == None: Log.error("Edge has no value, or expression is empty") elif isinstance(edge, basestring): if schema: try: e = schema[edge] except Exception, e: e = None e = unwraplist(e) if e and not isinstance(e, (_Column, set, list)): if isinstance(e, _Column): return Dict( name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(domain=e, schema=schema) ) elif isinstance(e.fields, list) and len(e.fields) == 1: return Dict( name=e.name, value=jx_expression(e.fields[0]), allowNulls=True, domain=e.getDomain() ) else: return Dict( name=e.name, allowNulls=True, domain=e.getDomain() ) return Dict( name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(schema=schema) )
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ SEND TO STDERR """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = extract_stack(stack_depth + 1) e = Except(ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode try: if not error_mode: cls.error_mode = True Log.note( "{{error}}", error=e, log_context={"context": WARNING}, stack_depth=stack_depth + 1 ) except Exception: pass cls.error_mode = error_mode sys.stderr.write(str_e)
def warning( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): if isinstance(default_params, BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = extract_stack(stack_depth + 1) e = Except(WARNING, template, params, cause, trace) Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": WARNING}, log_context), stack_depth=stack_depth + 1 )
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_schema() # GET IDS OF DOCUMENTS results = self._es.search({ "fields": listwrap(schema._routing.path), "query": { "filtered": { "query": { "match_all": {} }, "filter": jx_expression(command.where).to_esfilter() } }, "size": 200000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = DictList() for k, v in command.set.items(): if not is_keyword(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: scripts.append({ "script": "ctx._source." + k + " = " + jx_expression(v).to_ruby() }) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({ "update": { "_id": h._id, "_routing": unwraplist(h.fields[literal_field( schema._routing.path)]) } }) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8') response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}) if response.errors: Log.error("could not update: {{error}}", error=[ e.error for i in response["items"] for e in i.values() if e.status not in (200, 201) ])
def _flatten(d, uid, path, nested_path): insertion = doc_collection[ "." if not nested_path else nested_path[0]] row = uid.copy() insertion.rows.append(row) if isinstance(d, Mapping): for k, v in d.items(): cname = join_field(split_field(path) + [k]) ctype = get_type(v) if ctype is None: continue c = unwraplist([ c for c in self.columns.get(cname, Null) if c.type == ctype ]) if not c: c = Column(name=cname, table=self.name, type=ctype, es_column=typed_column(cname, ctype), es_index=self.name, nested_path=nested_path) self.add_column(c) insertion.active_columns.add(c) if ctype == "nested": row[cname] = "." deeper = [cname] + listwrap(nested_path) insertion = doc_collection.get(cname, None) if not insertion: doc_collection[cname] = Dict(active_columns=set(), rows=[]) for i, r in enumerate(v): child_uid = set_default( {UID_PREFIX + "id" + unicode(len(uid)): i}, uid) _flatten(r, child_uid, cname, deeper) elif ctype == "object": row[cname] = "." _flatten(v, cname, nested_path) elif c.type: row[cname] = v else: k = "." v = d cname = join_field(split_field(path) + [k]) ctype = get_type(v) if ctype is None: return c = unwraplist([ c for c in self.columns.get(cname, Null) if c.type == ctype ]) if not c: c = Column(name=cname, table=self.name, type=ctype, es_column=typed_column(cname, ctype), es_index=self.name, nested_path=nested_path) self.add_column(c) insertion.active_columns.add(c) if ctype == "nested": row[cname] = "." deeper = [cname] + listwrap(nested_path) insertion = doc_collection.get(cname, None) if not insertion: doc_collection[cname] = Dict(active_columns=set(), rows=[]) for i, r in enumerate(v): child_uid = set_default( {UID_PREFIX + "id" + unicode(len(uid)): i}, uid) _flatten(r, child_uid, cname, deeper) elif ctype == "object": row[cname] = "." _flatten(v, cname, nested_path) elif c.type: row[cname] = v
def parse_properties(parent_index_name, parent_query_path, esProperties): """ RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT """ from pyLibrary.queries.meta import Column columns = DictList() for name, property in esProperties.items(): if parent_query_path: index_name, query_path = parent_index_name, join_field(split_field(parent_query_path) + [name]) else: index_name, query_path = parent_index_name, name if property.type == "nested" and property.properties: # NESTED TYPE IS A NEW TYPE DEFINITION # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH self_columns = parse_properties(index_name, query_path, property.properties) for c in self_columns: c.nested_path = unwraplist([query_path] + listwrap(c.nested_path)) columns.extend(self_columns) columns.append(Column( table=index_name, es_index=index_name, name=query_path, es_column=query_path, type="nested", nested_path=query_path )) continue if property.properties: child_columns = parse_properties(index_name, query_path, property.properties) columns.extend(child_columns) columns.append(Column( table=index_name, es_index=index_name, name=query_path, es_column=query_path, type="source" if property.enabled == False else "object" )) if property.dynamic: continue if not property.type: continue if property.type == "multi_field": property.type = property.fields[name].type # PULL DEFAULT TYPE for i, (n, p) in enumerate(property.fields.items()): if n == name: # DEFAULT columns.append(Column( table=index_name, es_index=index_name, name=query_path, es_column=query_path, type=p.type )) else: columns.append(Column( table=index_name, es_index=index_name, name=query_path + "\\." + n, es_column=query_path + "\\." + n, type=p.type )) continue if property.type in ["string", "boolean", "integer", "date", "long", "double"]: columns.append(Column( table=index_name, es_index=index_name, name=query_path, es_column=query_path, type=property.type )) if property.index_name and name != property.index_name: columns.append(Column( table=index_name, es_index=index_name, es_column=query_path, name=query_path, type=property.type )) elif property.enabled == None or property.enabled == False: columns.append(Column( table=index_name, es_index=index_name, name=query_path, es_column=query_path, type="source" if property.enabled==False else "object" )) else: Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=query_path) return columns
def to_dict(self): return wrap({ "meta": {"format": "list"}, "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] })
def es_aggsop(es, frum, query): select = wrap([s.copy() for s in listwrap(query.select)]) es_column_map = {c.name: unwraplist(c.es_column) for c in frum.schema.all_columns} es_query = Dict() new_select = Dict() #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": s.pull = "doc_count" elif isinstance(s.value, Variable): if s.value.var == ".": if frum.typed: # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING if s.aggregate in NON_STATISTICAL_AGGS: #TODO: HANDLE BOTH $value AND $objects TO COUNT Log.error("do not know how to handle") else: s.value.var = "$value" new_select["$value"] += [s] else: if s.aggregate in NON_STATISTICAL_AGGS: #TODO: WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT Log.error("do not know how to handle") else: Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate) elif s.aggregate == "count": s.value = s.value.map(es_column_map) new_select["count_"+literal_field(s.value.var)] += [s] else: s.value = s.value.map(es_column_map) new_select[literal_field(s.value.var)] += [s] else: formula.append(s) for canonical_name, many in new_select.items(): representative = many[0] if representative.value.var == ".": Log.error("do not know how to handle") else: field_name = representative.value.var # canonical_name=literal_field(many[0].name) for s in many: if s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.field = field_name s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile: Log.error("Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = literal_field(canonical_name + " cardinality") es_query.aggs[key].cardinality.field = field_name s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = field_name # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.field = field_name es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.field = field_name es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT) s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate] for i, s in enumerate(formula): canonical_name = literal_field(s.name) abs_value = s.value.map(es_column_map) if s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby() s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = abs_value.to_ruby() s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby() # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = abs_value.to_ruby() es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate=="union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby() s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = canonical_name + "." + aggregates1_4[s.aggregate] es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby() decoders = get_decoders_by_depth(query) start = 0 vars_ = query.where.vars() #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter()) es_query = Dict( aggs={"_filter": set_default({"filter": filter_}, es_query)} ) es_query = wrap({ "aggs": {"_nested": set_default( { "nested": { "path": frum.query_path } }, es_query )} }) else: if any(split_where[1::]): Log.error("Where clause is too deep") for d in decoders[0]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter()) es_query = Dict( aggs={"_filter": set_default({"filter": filter}, es_query)} ) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es09.util.post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception, e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", e)
def _load_all_in_push(self, revision, locale=None): # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c found_revision = copy(revision) if isinstance(found_revision.branch, basestring): lower_name = found_revision.branch.lower() else: lower_name = found_revision.branch.name.lower() if not lower_name: Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) b = found_revision.branch = self.branches[(lower_name, locale)] if not b: b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] if not b: Log.error("can not find branch ({{branch}}, {{locale}})", name=lower_name, locale=locale) if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH: self.branches = _hg_branches.get_branches(use_cache=True, settings=self.settings) url = found_revision.branch.url.rstrip("/") + "/json-pushes?full=1&changeset=" + found_revision.changeset.id Log.note( "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}}): {{url}}", branch=found_revision.branch.name, locale=locale, changeset=found_revision.changeset.id, url=url ) try: data = self._get_and_retry(url, found_revision.branch) revs = [] output = None for index, _push in data.items(): push = Push(id=int(index), date=_push.date, user=_push.user) for _, ids in qb.groupby(_push.changesets.node, size=200): url_param = "&".join("node=" + c[0:12] for c in ids) url = found_revision.branch.url.rstrip("/") + "/json-info?" + url_param Log.note("Reading details from {{url}}", {"url": url}) raw_revs = self._get_and_retry(url, found_revision.branch) for r in raw_revs.values(): rev = Revision( branch=found_revision.branch, index=r.rev, changeset=Changeset( id=r.node, id12=r.node[0:12], author=r.user, description=r.description, date=Date(r.date), files=r.files ), parents=unwraplist(r.parents), children=unwraplist(r.children), push=push, etl={"timestamp": Date.now().unix} ) if r.node == found_revision.changeset.id: output = rev if r.node[0:12] == found_revision.changeset.id[0:12]: output = rev _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) revs.append({"id": _id, "value": rev}) self.es.extend(revs) return output except Exception, e: Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)