def parse(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = _unix2Date(datetime2unix(a0)) elif isinstance(a0, Date): output = _unix2Date(a0.unix) elif isinstance(a0, (int, long, float, Decimal)): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) elif is_text(a0) and len(a0) in [9, 10, 12, 13] and mo_math.is_integer(a0): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) elif is_text(a0): output = unicode2Date(a0) else: output = _unix2Date(datetime2unix(datetime(*args))) else: if is_text(args[0]): output = unicode2Date(*args) else: output = _unix2Date(datetime2unix(datetime(*args))) return output except Exception as e: from mo_logs import Log Log.error("Can not convert {{args}} to Date", args=args, cause=e)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort] }) return output
def _convert_query(self, query): # if not isinstance(query["from"], Container): # Log.error('Expecting from clause to be a Container') query = to_data(query) output = QueryOp(None) output["from"] = self._convert_from(query["from"]) output.format = query.format if query.select: output.select = convert_list(self._convert_select, query.select) else: if query.edges or query.groupby: output.select = { "name": "count", "value": ".", "aggregate": "count", "default": 0 } else: output.select = { "name": "__all__", "value": "*", "aggregate": "none" } if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = convert_list(self._convert_edge, query.edges) output.groupby = None elif query.groupby: output.edges = None output.groupby = convert_list(self._convert_group, query.groupby) else: output.edges = [] output.groupby = None output.where = self.convert(query.where) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) if not mo_math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars( output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in query.columns: if c.name in vars and len(c.nested_path) != 1: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) return output
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if is_op(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp( frum=table, format=query.format, chunk_size=query.chunk_size, destination=query.destination, ) _import_temper_limit() output.limit = temper_limit(query.limit, query) if query.select or is_many(query.select) or is_data(query.select): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where({"and": listwrap(query.where)}, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.sort = _normalize_sort(query.sort) if output.limit != None and (not mo_math.is_integer(output.limit) or output.limit < 0): Log.error("Expecting limit >= 0") return output
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if is_op(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp(frum=table, format=query.format, limit=mo_math.min( MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))) if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where({"and": listwrap(query.where)}, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) if not mo_math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def scrub_args(args): output = {} for k, v in list(args.items()): vs = [] for v in listwrap(v): if is_integer(v): vs.append(int(v)) elif is_number(v): vs.append(float(v)) else: vs.append(v) output[k] = unwraplist(vs) return wrap(output)
def _convert_query(self, query): # if not isinstance(query["from"], Container): # Log.error('Expecting from clause to be a Container') query = wrap(query) output = QueryOp(None) output["from"] = self._convert_from(query["from"]) output.format = query.format if query.select: output.select = convert_list(self._convert_select, query.select) else: if query.edges or query.groupby: output.select = {"name": "count", "value": ".", "aggregate": "count", "default": 0} else: output.select = {"name": "__all__", "value": "*", "aggregate": "none"} if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = convert_list(self._convert_edge, query.edges) output.groupby = None elif query.groupby: output.edges = None output.groupby = convert_list(self._convert_group, query.groupby) else: output.edges = [] output.groupby = None output.where = self.convert(query.where) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) if not mo_math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars(output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in query.columns: if c.name in vars and len(c.nested_path) != 1: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) output.having = convert_list(self._convert_having, query.having) return output
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if is_op(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp( frum=table, format=query.format, limit=mo_math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) ) if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) if not mo_math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return Null output = FlatList() for s in listwrap(sort): if is_text(s) or mo_math.is_integer(s): output.append({"value": s, "sort": 1}) elif not s.field and not s.value and s.sort==None: #ASSUME {name: sort} FORM for n, v in s.items(): output.append({"value": n, "sort": sort_direction[v]}) else: output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)}) return wrap(output)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort==None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) elif is_expression(s): output.append({"value": s, "sort": 1}) elif mo_math.is_integer(s): output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({"value": jx_expression(v), "sort": sort_direction[d]}) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort]}) return output
def process(sig_id, show=False, show_limit=MAX_POINTS, show_old=True, show_distribution=None): if not mo_math.is_integer(sig_id): Log.error("expecting integer id") sig = first(get_signature(config.database, sig_id)) data = get_dataum(config.database, sig_id) min_date = (Date.today() - 3 * MONTH).unix pushes = jx.sort( [{ "value": median(rows.value), "runs": rows, "push": { "time": unwrap(t)["push.time"] }, } for t, rows in jx.groupby(data, "push.time") if t["push\\.time"] > min_date], "push.time", ) values = pushes.value title = "-".join( map( text, [ sig.id, sig.framework, sig.suite, sig.test, sig.platform, sig.repository.name, ], )) Log.note("With {{title}}", title=title) with Timer("find segments"): new_segments, new_diffs = find_segments(values, sig.alert_change_type, sig.alert_threshold) # USE PERFHERDER ALERTS TO IDENTIFY OLD SEGMENTS old_segments = tuple( sorted( set([ i for i, p in enumerate(pushes) if any(r.alert.id for r in p.runs) ] + [0, len(pushes)]))) old_medians = [0] + [ np.median(values[s:e]) for s, e in zip(old_segments[:-1], old_segments[1:]) ] old_diffs = np.array( [b / a - 1 for a, b in zip(old_medians[:-1], old_medians[1:])] + [0]) if len(new_segments) == 1: dev_status = None dev_score = None relative_noise = None else: # MEASURE DEVIANCE (USE THE LAST SEGMENT) s, e = new_segments[-2], new_segments[-1] last_segment = np.array(values[s:e]) trimmed_segment = last_segment[np.argsort(last_segment) [IGNORE_TOP:-IGNORE_TOP]] dev_status, dev_score = deviance(trimmed_segment) relative_noise = np.std(trimmed_segment) / np.mean(trimmed_segment) Log.note( "\n\tdeviance = {{deviance}}\n\tnoise={{std}}", title=title, deviance=(dev_status, dev_score), std=relative_noise, ) if show_distribution: histogram(last_segment, title=dev_status + "=" + text(dev_score)) max_extra_diff = None max_missing_diff = None _is_diff = is_diff(new_segments, old_segments) if _is_diff: # FOR MISSING POINTS, CALC BIGGEST DIFF max_extra_diff = mo_math.MAX( abs(d) for s, d in zip(new_segments, new_diffs) if all(not (s - TOLLERANCE <= o <= s + TOLLERANCE) for o in old_segments)) max_missing_diff = mo_math.MAX( abs(d) for s, d in zip(old_segments, old_diffs) if all(not (s - TOLLERANCE <= n <= s + TOLLERANCE) for n in new_segments)) Log.alert( "Disagree max_extra_diff={{max_extra_diff|round(places=3)}}, max_missing_diff={{max_missing_diff|round(places=3)}}", max_extra_diff=max_extra_diff, max_missing_diff=max_missing_diff, ) Log.note("old={{old}}, new={{new}}", old=old_segments, new=new_segments) if show and len(pushes): show_old and assign_colors( values, old_segments, title="OLD " + title) assign_colors(values, new_segments, title="NEW " + title) else: Log.note("Agree") if show and len(pushes): show_old and assign_colors( values, old_segments, title="OLD " + title) assign_colors(values, new_segments, title="NEW " + title) summary_table.upsert( where={"eq": { "id": sig.id }}, doc=Data( id=sig.id, title=title, num_pushes=len(pushes), is_diff=_is_diff, max_extra_diff=max_extra_diff, max_missing_diff=max_missing_diff, num_new_segments=len(new_segments), num_old_segments=len(old_segments), relative_noise=relative_noise, dev_status=dev_status, dev_score=dev_score, last_updated=Date.now(), ), )
def __init__(self, var): Expression.__init__(self, None) if not is_integer(var): Log.error("Expecting an integer") self.var = var