def value2json(value): try: scrubbed = scrub(value, scrub_number=float) return text(_json_encoder(scrubbed)) except Exception as e: e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text(repr(value)), cause=e) raise e
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return text_type(self.encoder(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def ujson_encode(value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return ujson_dumps(scrubbed, ensure_ascii=False, sort_keys=True, escape_forward_slashes=False).decode('utf8') except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return unicode(self.encoder.encode(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=_repr(value), cause=e) raise e
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) with Timer("encode", too_long=0.1): return text_type(self.encoder(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_properties() # GET IDS OF DOCUMENTS results = self._es.search({ "stored_fields": listwrap(schema._routing.path), "query": {"bool": { "filter": jx_expression(command.where).to_esfilter(Null) }}, "size": 10000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: v = scrub(v) scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_painless(schema).script(schema)}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8') response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self.es.get_properties() # GET IDS OF DOCUMENTS results = self.es.search({ "fields": listwrap(schema._routing.path), "query": {"filtered": { "filter": jx_expression(command.where).to_esfilter(Null) }}, "size": 10000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: v = scrub(v) scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(value2json(c) for c in updates) + "\n") response = self.es.cluster.post( self.es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def test_json(results, description, method, n): output = [] for case in cases: try: data, count = globals()[case] if "scrub" in description: # SCRUB BEFORE SENDING TO C ROUTINE (NOT FAIR, BUT WE GET TO SEE HOW FAST ENCODING GOES) data = unwrap(scrub(data)) try: example = method(data) if case == "HUGE": example = "<too big to show>" except Exception as e: Log.warning(u"json encoding failure", cause=e) example = "<CRASH>" t0 = time.time() try: for i in range(n): for i in range(count): output.append(method(data)) duration = time.time() - t0 except Exception: duration = time.time() - t0 summary = { "description": description, "interpreter": platform.python_implementation(), "time": duration, "type": case, "num": n, "count": count, "length": len(output), "result": example } Log.note( u"using {{interpreter}}: {{description}} {{type}} x {{num}} x {{count}} = {{time}} result={{result}}", **summary) results.append(summary) except Exception as e: Log.warning(u"problem with encoding: {{message}}", {"message": e.message}, e)
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) param = {"size": 0} with Timer("encode {{size}} characters", param=param, too_long=0.1): output = text_type(self.encoder(scrubbed)) param["size"] = len(output) return output except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def query(self, query): """ :param query: JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT :return: """ if not startswith_field(query['from'], self.name): Log.error("Expecting table, or some nested table") frum, query['from'] = query['from'], self query = QueryOp.wrap(query, self.columns) # TYPE CONFLICTS MUST NOW BE RESOLVED DURING # TYPE-SPECIFIC QUERY NORMALIZATION # vars_ = query.vars(exclude_select=True) # type_map = { # v: c.es_column # for v in vars_ # if v in self.columns and len([c for c in self.columns[v] if c.type != "nested"]) == 1 # for c in self.columns[v] # if c.type != "nested" # } # # sql_query = query.map(type_map) query = query new_table = "temp_" + unique_name() if query.format == "container": create_table = "CREATE TABLE " + quote_table(new_table) + " AS " else: create_table = "" if query.groupby: op, index_to_columns = self._groupby_op(query, frum) command = create_table + op elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate): op, index_to_columns = self._edges_op(query, frum) command = create_table + op else: op = self._set_op(query, frum) return op if query.sort: command += "\nORDER BY " + ",\n".join( "(" + sql[t] + ") IS NULL" + (" DESC" if s.sort == -1 else "") + ",\n" + sql[t] + (" DESC" if s.sort == -1 else "") for s, sql in [(s, s.value.to_sql(self)[0].sql) for s in query.sort] for t in "bns" if sql[t]) result = self.db.query(command) column_names = query.edges.name + query.groupby.name + listwrap( query.select).name if query.format == "container": output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True) elif query.format == "cube" or (not query.format and query.edges): if len(query.edges) == 0 and len(query.groupby) == 0: data = {n: Data() for n in column_names} for s in index_to_columns.values(): data[s.push_name][s.push_child] = unwrap( s.pull(result.data[0])) return Data(data=unwrap(data), meta={"format": "cube"}) if not result.data: edges = [] dims = [] for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain( partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif isinstance(e.value, TupleOp): pulls = jx.sort([ c for c in index_to_columns.values() if c.push_name == e.name ], "push_child").pull parts = [ tuple(p(d) for p in pulls) for d in result.data ] domain = SimpleSetDomain( partitions=jx.sort(set(parts))) else: domain = SimpleSetDomain(partitions=[]) dims.append(1 if allowNulls else 0) edges.append( Data(name=e.name, allowNulls=allowNulls, domain=domain)) zeros = [ 0 if s.aggregate == "count" and index_to_columns[si].push_child == "." else Data for si, s in enumerate(listwrap(query.select)) ] data = { s.name: Matrix(dims=dims, zeros=zeros[si]) for si, s in enumerate(listwrap(query.select)) } if isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data(meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data.items()}) columns = None edges = [] dims = [] for g in query.groupby: g.is_groupby = True for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain( partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif e.domain.type == "time": domain = wrap(mo_json.scrub(e.domain)) elif e.domain.type == "duration": domain = wrap(mo_json.scrub(e.domain)) elif isinstance(e.value, TupleOp): pulls = jx.sort([ c for c in index_to_columns.values() if c.push_name == e.name ], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: if not columns: columns = zip(*result.data) parts = set(columns[i]) if e.is_groupby and None in parts: allowNulls = True parts -= {None} domain = SimpleSetDomain(partitions=jx.sort(parts)) dims.append(len(domain.partitions) + (1 if allowNulls else 0)) edges.append( Data(name=e.name, allowNulls=allowNulls, domain=domain)) zeros = [ 0 if s.aggregate == "count" and index_to_columns[si].push_child == "." else Data for si, s in enumerate(listwrap(query.select)) ] data_cubes = { s.name: Matrix(dims=dims, zeros=zeros[si]) for si, s in enumerate(listwrap(query.select)) } r2c = index_to_coordinate( dims) # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM for rownum, row in enumerate(result.data): coord = r2c(rownum) for i, s in enumerate(index_to_columns.values()): if s.is_edge: continue if s.push_child == ".": data_cubes[s.push_name][coord] = s.pull(row) else: data_cubes[s.push_name][coord][s.push_child] = s.pull( row) if isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data(meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data_cubes.items()}) elif query.format == "table" or (not query.format and query.groupby): data = [] for d in result.data: row = [None for _ in column_names] for s in index_to_columns.values(): if s.push_child == ".": row[s.push_column] = s.pull(d) elif s.num_push_columns: tuple_value = row[s.push_column] if tuple_value == None: tuple_value = row[ s.push_column] = [None] * s.num_push_columns tuple_value[s.push_child] = s.pull(d) elif row[s.push_column] == None: row[s.push_column] = Data() row[s.push_column][s.push_child] = s.pull(d) else: row[s.push_column][s.push_child] = s.pull(d) data.append(tuple(unwrap(r) for r in row)) output = Data(meta={"format": "table"}, header=column_names, data=data) elif query.format == "list" or (not query.edges and not query.groupby): if not query.edges and not query.groupby and any( listwrap(query.select).aggregate): if isinstance(query.select, list): data = Data() for c in index_to_columns.values(): if c.push_child == ".": data[c.push_name] = c.pull(result.data[0]) else: data[c.push_name][c.push_child] = c.pull( result.data[0]) output = Data(meta={"format": "value"}, data=data) else: data = Data() for s in index_to_columns.values(): data[s.push_child] = s.pull(result.data[0]) output = Data(meta={"format": "value"}, data=unwrap(data)) else: data = [] for rownum in result.data: row = Data() for c in index_to_columns.values(): if c.push_child == ".": row[c.push_name] = c.pull(rownum) elif c.num_push_columns: tuple_value = row[c.push_name] if not tuple_value: tuple_value = row[ c.push_name] = [None] * c.num_push_columns tuple_value[c.push_child] = c.pull(rownum) else: row[c.push_name][c.push_child] = c.pull(rownum) data.append(row) output = Data(meta={"format": "list"}, data=data) else: Log.error("unknown format {{format}}", format=query.format) return output
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif isinstance(value, Mapping): try: items = sort_using_key(list(value.items()), lambda r: r[0]) values = [ encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None ] if not values: return "{}" elif len(values) == 1: return "{" + values[0] + "}" else: return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR if OR(not isinstance(k, text_type) for k in value.keys()): Log.error("JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=e) Log.error("problem making dict pretty: keys={{keys}}:", keys=[k for k in value.keys()], cause=e) elif value in (None, Null): return "null" elif isinstance(value, (text_type, binary_type)): if isinstance(value, binary_type): value = utf82unicode(value) try: return quote(value) except Exception as e: from mo_logs import Log try: Log.note( "try explicit convert of string with length {{length}}", length=len(value)) acc = [QUOTE] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = text_type(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(QUOTE) output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException as f: Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f) return "null" elif isinstance(value, list): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return "[\n" + ",\n".join( [indent(pretty_json(v)) for v in value]) + "\n]" if len(value) == 1: j = pretty_json(value[0]) if j.find("\n") >= 0: return "[\n" + indent(j) + "\n]" else: return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if max_len <= ARRAY_ITEM_MAX_LENGTH and max( *[j.find("\n") for j in js]) == -1: # ALL TINY VALUES num_columns = max( 1, min( ARRAY_MAX_COLUMNS, int( floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))))) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + PRETTY_COMMA.join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return "[\n" + ",\n".join( [indent(pretty_json(v)) for v in value]) + "\n]" content = ",\n".join( PRETTY_COMMA.join( j.rjust(max_len) for j in js[r:r + num_columns]) for r in xrange(0, len(js), num_columns)) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning( "problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p)) output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e) elif hasattr(value, '__data__'): d = value.__data__() return pretty_json(d) elif hasattr(value, '__json__'): j = value.__json__() if j == None: return " null " # TODO: FIND OUT WHAT CAUSES THIS return pretty_json(json_decoder(j)) elif scrub(value) is None: return "null" elif hasattr(value, '__iter__'): return pretty_json(list(value)) elif hasattr(value, '__call__'): return "null" else: try: if int(value) == value: return text_type(int(value)) except Exception: pass try: if float(value) == value: return text_type(float(value)) except Exception: pass return pypy_json_encode(value) except Exception as e: problem_serializing(value, e)
def query(self, query): """ :param query: JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT :return: """ if not startswith_field(query['from'], self.sf.fact): Log.error("Expecting table, or some nested table") frum, query['from'] = query['from'], self table = self.sf.tables[relative_field(frum, self.sf.fact)] schema = table.schema query = QueryOp.wrap(query, table=table, schema=schema) new_table = "temp_" + unique_name() if query.format == "container": create_table = "CREATE TABLE " + quote_column(new_table) + " AS " else: create_table = "" if query.groupby and query.format != "cube": op, index_to_columns = self._groupby_op(query, frum) command = create_table + op elif query.groupby: query.edges, query.groupby = query.groupby, query.edges op, index_to_columns = self._edges_op(query, frum) command = create_table + op query.edges, query.groupby = query.groupby, query.edges elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate): op, index_to_columns = self._edges_op(query, frum) command = create_table + op else: op = self._set_op(query, frum) return op result = self.db.query(command) if query.format == "container": output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True) elif query.format == "cube" or (not query.format and query.edges): column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1) for c in index_to_columns.values(): column_names[c.push_column] = c.push_column_name if len(query.edges) == 0 and len(query.groupby) == 0: data = {n: Data() for n in column_names} for s in index_to_columns.values(): data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0])) if isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( data=unwrap(data), select=select, meta={"format": "cube"} ) if not result.data: edges = [] dims = [] for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain(partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif isinstance(e.value, TupleOp): pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: domain = SimpleSetDomain(partitions=[]) dims.append(1 if allowNulls else 0) edges.append(Data( name=e.name, allowNulls=allowNulls, domain=domain )) data = {} for si, s in enumerate(listwrap(query.select)): if s.aggregate == "count": data[s.name] = Matrix(dims=dims, zeros=0) else: data[s.name] = Matrix(dims=dims) if isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data.items()} ) columns = None edges = [] dims = [] for g in query.groupby: g.is_groupby = True for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain(partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif e.domain.type == "time": domain = wrap(mo_json.scrub(e.domain)) elif e.domain.type == "duration": domain = wrap(mo_json.scrub(e.domain)) elif isinstance(e.value, TupleOp): pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: if not columns: columns = zip(*result.data) parts = set(columns[i]) if e.is_groupby and None in parts: allowNulls = True parts -= {None} if query.sort[i].sort == -1: domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True))) else: domain = SimpleSetDomain(partitions=jx.sort(parts)) dims.append(len(domain.partitions) + (1 if allowNulls else 0)) edges.append(Data( name=e.name, allowNulls=allowNulls, domain=domain )) data_cubes = {} for si, s in enumerate(listwrap(query.select)): if s.aggregate == "count": data_cubes[s.name] = Matrix(dims=dims, zeros=0) else: data_cubes[s.name] = Matrix(dims=dims) r2c = index_to_coordinate(dims) # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM for rownum, row in enumerate(result.data): coord = r2c(rownum) for i, s in enumerate(index_to_columns.values()): if s.is_edge: continue if s.push_child == ".": data_cubes[s.push_name][coord] = s.pull(row) else: data_cubes[s.push_name][coord][s.push_child] = s.pull(row) if query.select == None: select = Null elif isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data_cubes.items()} ) elif query.format == "table" or (not query.format and query.groupby): column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1) for c in index_to_columns.values(): column_names[c.push_column] = c.push_column_name data = [] for d in result.data: row = [None for _ in column_names] for s in index_to_columns.values(): if s.push_child == ".": row[s.push_column] = s.pull(d) elif s.num_push_columns: tuple_value = row[s.push_column] if tuple_value == None: tuple_value = row[s.push_column] = [None] * s.num_push_columns tuple_value[s.push_child] = s.pull(d) elif row[s.push_column] == None: row[s.push_column] = Data() row[s.push_column][s.push_child] = s.pull(d) else: row[s.push_column][s.push_child] = s.pull(d) data.append(tuple(unwrap(r) for r in row)) output = Data( meta={"format": "table"}, header=column_names, data=data ) elif query.format == "list" or (not query.edges and not query.groupby): if not query.edges and not query.groupby and any(listwrap(query.select).aggregate): if isinstance(query.select, list): data = Data() for c in index_to_columns.values(): if c.push_child == ".": if data[c.push_name] == None: data[c.push_name] = c.pull(result.data[0]) elif isinstance(data[c.push_name], list): data[c.push_name].append(c.pull(result.data[0])) else: data[c.push_name] = [data[c.push_name], c.pull(result.data[0])] else: data[c.push_name][c.push_child] = c.pull(result.data[0]) output = Data( meta={"format": "value"}, data=data ) else: data = Data() for s in index_to_columns.values(): if not data[s.push_child]: data[s.push_child] = s.pull(result.data[0]) else: data[s.push_child] += [s.pull(result.data[0])] output = Data( meta={"format": "value"}, data=unwrap(data) ) else: data = [] for rownum in result.data: row = Data() for c in index_to_columns.values(): if c.push_child == ".": row[c.push_name] = c.pull(rownum) elif c.num_push_columns: tuple_value = row[c.push_name] if not tuple_value: tuple_value = row[c.push_name] = [None] * c.num_push_columns tuple_value[c.push_child] = c.pull(rownum) else: row[c.push_name][c.push_child] = c.pull(rownum) data.append(row) output = Data( meta={"format": "list"}, data=data ) else: Log.error("unknown format {{format}}", format=query.format) return output
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif is_data(value): try: items = sort_using_key(value.items(), lambda r: r[0]) values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None] if not values: return "{}" elif len(values) == 1: return "{" + values[0] + "}" else: return "{\n" + ",\n".join(indent(v) for v in values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR if OR(not is_text(k) for k in value.keys()): Log.error( "JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=e ) Log.error( "problem making dict pretty: keys={{keys}}:", keys=[k for k in value.keys()], cause=e ) elif value in (None, Null): return "null" elif value.__class__ in (binary_type, text_type): if is_binary(value): value = utf82unicode(value) try: return quote(value) except Exception as e: from mo_logs import Log try: Log.note("try explicit convert of string with length {{length}}", length=len(value)) acc = [QUOTE] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = text_type(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(QUOTE) output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException as f: Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f) return "null" elif is_list(value): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" if len(value) == 1: j = pretty_json(value[0]) if j.find("\n") >= 0: return "[\n" + indent(j) + "\n]" else: return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if max_len <= ARRAY_ITEM_MAX_LENGTH and max(*[j.find("\n") for j in js]) == -1: # ALL TINY VALUES num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))))) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + PRETTY_COMMA.join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" content = ",\n".join( PRETTY_COMMA.join(j.rjust(max_len) for j in js[r:r + num_columns]) for r in xrange(0, len(js), num_columns) ) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning("problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p) ) output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e) elif hasattr(value, '__data__'): d = value.__data__() return pretty_json(d) elif hasattr(value, '__json__'): j = value.__json__() if j == None: return " null " # TODO: FIND OUT WHAT CAUSES THIS return pretty_json(json_decoder(j)) elif scrub(value) is None: return "null" elif hasattr(value, '__iter__'): return pretty_json(list(value)) elif hasattr(value, '__call__'): return "null" else: try: if int(value) == value: return text_type(int(value)) except Exception: pass try: if float(value) == value: return text_type(float(value)) except Exception: pass return pypy_json_encode(value) except Exception as e: problem_serializing(value, e)
def process( sig_id, since, source, destination, ): """ :param sig_id: The performance hash :param since: Only data after this date :param show: :param show_limit: :param show_old: :param show_distribution: :return: """ if not isinstance(sig_id, int): Log.error("expecting id") # GET SIGNATURE DETAILS sig = get_signature(source, sig_id) # GET SIGNATURE DETAILS pushes = get_dataum(source, sig_id, since, LIMIT) pushes = jx.sort( [{ "value": median(rows.value), "runs": rows, "push": { "time": unwrap(t)["push.time"] }, } for t, rows in jx.groupby(pushes, "push.time") if t["push\\.time"] > since], "push.time", ) values = list(pushes.value) title = "-".join( map( str, [ sig.framework, sig.suite, sig.test, sig.platform, sig.repository, ], )) Log.note("With {{title}}", title=title) if len(values) > LIMIT: Log.alert( "Too many values for {{title}} ({at least {num}}), choosing last {{limit}}", title=title, num=len(values), limit=LIMIT, ) values = values[-LIMIT:] with Timer("find segments"): new_segments, new_diffs = find_segments(values, sig.alert_change_type, sig.alert_threshold) if len(new_segments) == 1: overall_dev_status = None overall_dev_score = None last_mean = None last_std = None last_dev_status = None last_dev_score = None relative_noise = None else: # NOISE OF LAST SEGMENT s, e = new_segments[-2], new_segments[-1] last_segment = np.array(values[s:e]) trimmed_segment = last_segment last_mean = np.mean(trimmed_segment) last_std = np.std(trimmed_segment) last_dev_status, last_dev_score = deviance(trimmed_segment) relative_noise = last_std / last_mean # FOR EACH SEGMENT, NORMALIZE MEAN AND VARIANCE normalized = [] for s, e in jx.pairs(new_segments): data = np.array(values[s:e]) norm = (data + last_mean - np.mean(data)) * last_std / np.std(data) normalized.extend(norm) overall_dev_status, overall_dev_score = deviance(normalized) Log.note( "\n\tdeviance = {{deviance}}\n\tnoise={{std}}\n\tpushes={{pushes}}\n\tsegments={{num_segments}}", title=title, deviance=(overall_dev_status, overall_dev_score), std=relative_noise, pushes=len(values), num_segments=len(new_segments) - 1, ) destination.add( Data( id=sig_id, title=title, num_pushes=len(values), num_segments=len(new_segments) - 1, relative_noise=relative_noise, overall_dev_status=overall_dev_status, overall_dev_score=overall_dev_score, last_mean=last_mean, last_std=last_std, last_dev_status=last_dev_status, last_dev_score=last_dev_score, last_updated=Date.now(), values=values, ) | scrub(sig))
def json_encode(value): """ FOR PUTTING JSON INTO DATABASE (sort_keys=True) dicts CAN BE USED AS KEYS """ return text(utf8_json_encoder(mo_json.scrub(value)))
output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e) elif hasattr(value, '__data__'): d = value.__data__() return pretty_json(d) elif hasattr(value, '__json__'): j = value.__json__() if j == None: return " null " # TODO: FIND OUT WHAT CAUSES THIS return pretty_json(json_decoder(j)) elif scrub(value) is None: return "null" elif hasattr(value, '__iter__'): return pretty_json(list(value)) elif hasattr(value, '__call__'): return "null" else: try: if int(value) == value: return str(int(value)) except Exception: pass try: if float(value) == value: return str(float(value))
def json_encode(value): """ FOR PUTTING JSON INTO DATABASE (sort_keys=True) dicts CAN BE USED AS KEYS """ return text_type(utf8_json_encoder(mo_json.scrub(value)))
def json_encode(value): """ FOR PUTTING JSON INTO DATABASE (sort_keys=True) dicts CAN BE USED AS KEYS """ return unicode(json_encoder.encode(mo_json.scrub(value)))
def test_minus_inf(self): test = float("-inf") output = value2json(test) expecting = cPythonJSONEncoder().encode(mo_json.scrub(test)) self.assertEqual(output, expecting, "expecting " + expecting)