def test_bad_long_json(self): test = value2json({"values": [i for i in range(1000)]}) test = test[:1000] + "|" + test[1000:] expected = "Can not decode JSON at:\n\t..., 216, 217, 218, 219|, 220, 221, 222, 22...\n\t ^\n" # expected = u'Can not decode JSON at:\n\t...9,270,271,272,273,27|4,275,276,277,278,2...\n\t ^\n' with self.assertRaises("Can not decode JSON"): json2value(test)
def test_aes_on_char(self): key = convert.base642bytearray(u'nm5/wK20R45AUtetHJwHTdOigvGTxP7NcH/41YE8AZo=') encrypted = aes_crypto.encrypt("kyle", key, salt=convert.base642bytearray("AehqWt1OdEgPJhCx6uylyg==")) self.assertEqual( json2value(encrypted.decode('utf8')), json2value(u'{"type": "AES256", "length": 4, "salt": "AehqWt1OdEgPJhCx6uylyg==", "data": "FXUGxdb9E+4UCKwsIT9ugQ=="}') )
def test_aes_nothing(self): key = convert.base642bytearray(u'nm5/wK20R45AUtetHJwHTdOigvGTxP7NcH/41YE8AZo=') encrypted = aes_crypto.encrypt("", key, salt=convert.base642bytearray("AehqWt1OdEgPJhCx6uylyg==")) self.assertEqual( json2value(encrypted.decode('utf8')), json2value(u'{"type": "AES256", "length": 0, "salt": "AehqWt1OdEgPJhCx6uylyg=="}') )
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, kwargs=None ): if not alias: alias = kwargs.alias = kwargs.index index = kwargs.index = proto_name(alias, create_timestamp) if kwargs.alias == index: Log.error("Expecting index name to conform to pattern") if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = mo_json.json2value(schema, leaves=True) else: schema = mo_json.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) es = Index(kwargs=kwargs) return es
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, kwargs=None ): if not alias: alias = kwargs.alias = kwargs.index index = kwargs.index = proto_name(alias, create_timestamp) if kwargs.alias == index: Log.error("Expecting index name to conform to pattern") if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = mo_json.json2value(schema, leaves=True) else: schema = mo_json.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.put( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) es = Index(kwargs=kwargs) return es
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = mo_json.json2value(suite_json) suite = convert.value2json(coalesce(suite.fullname, suite.name)) line = line.replace(suite_json, suite) if source.name.startswith("active-data-codecoverage"): d = convert.json2value(line) if d.source.file.total_covered > 0: return {"id": d._id, "json": line}, False else: return None, False if rownum == 0: value = mo_json.json2value(line) if len(line) > MAX_RECORD_LENGTH: _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > MAX_RECORD_LENGTH: value = mo_json.json2value(line) _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find('"resource_usage":') != -1: value = mo_json.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def __data__(self): if isinstance(self.value, Variable) and isinstance( self.length, Literal): output = { "concat": { self.terms[0].var: json2value(self.terms[2].json) } } else: output = {"concat": [t.__data__() for t in self.terms]} if self.separator.json != '""': output["separator"] = json2value(self.terms[2].json) return output
def device_register(self, path=None): """ EXPECTING A SIGNED REGISTRATION REQUEST RETURN JSON WITH url FOR LOGIN """ now = Date.now() expires = now + parse(self.device.register.session['max-age']) request_body = request.get_data() signed = json2value(request_body.decode("utf8")) command = json2value(base642bytes(signed.data).decode("utf8")) session.public_key = command.public_key rsa_crypto.verify(signed, session.public_key) self.session_manager.create_session(session) session.expires = expires.unix session.state = bytes2base64URL(crypto.bytes(32)) with self.device.db.transaction() as t: t.execute( sql_insert( self.device.table, { "state": session.state, "session_id": session.session_id }, )) body = value2json( Data( session_id=session.session_id, interval="5second", expires=session.expires, url=URL( self.device.home, path=self.device.endpoints.login, query={"state": session.state}, ), )) response = Response(body, headers={"Content-Type": mimetype.JSON}, status=200) response.set_cookie(self.device.register.session.name, session.session_id, path=self.device.login.session.path, domain=self.device.login.session.domain, expires=expires.format(RFC1123), secure=self.device.login.session.secure, httponly=self.device.login.session.httponly) return response
def fix(rownum, line, source, sample_only_filter, sample_size): value = json2value(line) if value._id.startswith(("tc.97", "96", "bb.27")): # AUG 24, 25 2017 - included full diff with repo; too big to index try: data = json2value(line) repo = data.repo repo.etl = None repo.branch.last_used = None repo.branch.description = None repo.branch.etl = None repo.branch.parent_name = None repo.children = None repo.parents = None if repo.changeset.diff or data.build.repo.changeset.diff: Log.error("no diff allowed") else: assertAlmostEqual(minimize_repo(repo), repo) except Exception as e: if CAN_NOT_DECODE_JSON in e: raise e data.repo = minimize_repo(repo) data.build.repo = minimize_repo(data.build.repo) line = value2json(data) else: pass if rownum == 0: if len(line) > MAX_RECORD_LENGTH: _shorten(value, source) value = _fix(value) if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") row = {"value": value} return row, True elif len(line) > MAX_RECORD_LENGTH: _shorten(value, source) value = _fix(value) elif line.find('"resource_usage":') != -1: value = _fix(value) row = {"value": value} return row, False
def row_to_column(header, row): return Column( **{ h: c if c is None or h not in ("nested_path", "partitions") else json2value(c) for h, c in zip(header, row) })
def fix(source_key, rownum, line, source, sample_only_filter, sample_size): """ :param rownum: :param line: :param source: :param sample_only_filter: :param sample_size: :return: (row, no_more_data) TUPLE WHERE row IS {"value":<data structure>} OR {"json":<text line>} """ value = json2value(line) if rownum == 0: if len(line) > MAX_RECORD_LENGTH: _shorten(source_key, value, source) value = _fix(value) if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") row = {"value": value} return row, True elif len(line) > MAX_RECORD_LENGTH: _shorten(source_key, value, source) value = _fix(value) elif '"resource_usage":' in line: value = _fix(value) row = {"value": value} return row, False
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds kwargs.host = Random.sample(listwrap(host), 1)[0] schema = json2value(value2json(SCHEMA), leaves=True) schema.mappings[type].properties["~N~"].type = "nested" self.es = Cluster(kwargs).get_or_create_index( schema=schema, limit_replicas=True, typed=True, kwargs=kwargs, ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def _convert(row): text = row[column] if text == None: return set() else: value = json2value(row[column]) return set(value) - {None}
def test_one_tuple(self): test = to_data({ "data": [{"a": 1}, {"a": 2}], "query": {"from": TEST_TABLE}, # "expecting_list": [{"a": 1}, {"a": 2}] }) self.utils.fill_container(test) # FILL AND MAKE DUMMY QUERY query = value2json({"tuple": { "from": test.query['from'], "where": {"eq": {"a": 1}}, "format": "list", "meta": {"testing": True} }}).encode('utf8') # SEND QUERY response = self.utils.try_till_response(self.utils.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) self.assertEqual(result, { "data": [ {"data": [{"a": 1}]}, ] })
def get_state(self): try: state = json2value(adr_configuration.config.cache.get(CACHY_STATE)) Log.note("Got ETL state:\n{{state|json|indent}}", state=state) return state except Exception: return None
def __init__(self, filename, host="fake", index="fake", kwargs=None): self.settings = kwargs self.filename = kwargs.filename try: self.data = mo_json.json2value(File(self.filename).read()) except Exception: self.data = Data()
def fix(source_key, rownum, line, source, sample_only_filter, sample_size): """ :param rownum: :param line: :param source: :param sample_only_filter: :param sample_size: :return: (row, no_more_data) TUPLE WHERE row IS {"value":<data structure>} OR {"json":<text line>} """ value = json2value(line) if rownum == 0: if len(line) > MAX_RECORD_LENGTH: _shorten(source_key, value, source) value = _fix(value) if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") row = {"value": value} return row, True elif len(line) > MAX_RECORD_LENGTH: _shorten(source_key, value, source) value = _fix(value) elif '"resource_usage":' in line: value = _fix(value) row = {"value": value} return row, False
def get_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ response = get(url, **kwargs) c = response.all_content return mo_json.json2value(convert.utf82unicode(c))
def test_branch_count(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"aggregate": "count"}, ], "edges": [ "build.branch" ], "where": {"or": [ {"missing": "build.id"} # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "table" }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def put(self, path, **kwargs): url = self.settings.host + ":" + unicode(self.settings.port) + path data = kwargs.get(b'data') if data == None: pass elif isinstance(data, Mapping): kwargs[b'data'] = data = convert.unicode2utf8(convert.value2json(data)) elif not isinstance(kwargs["data"], str): Log.error("data must be utf8 encoded string") if self.debug: sample = kwargs.get(b'data', "")[:300] Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample) # try: response = http.put(url, **kwargs) if response.status_code not in [200]: Log.error(response.reason+": "+response.all_content) if self.debug: Log.note("response: {{response}}", response= utf82unicode(response.all_content)[0:300:]) details = mo_json.json2value(utf82unicode(response.content)) if details.error: Log.error(convert.quote2string(details.error)) if details._shards.failed > 0: Log.error("Shard failures {{failures|indent}}", failures="---\n".join(r.replace(";", ";\n") for r in details._shards.failures.reason) ) return details
def to_sql(self, schema, not_null=False, boolean=False): return wrap([{ "name": ".", "sql": { "n": sql_quote(json2value(self.json)) } }])
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] elif 'data' in kwargs: kwargs['data'] = value2json(kwargs['data']).encode('utf8') else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) details = json2value(response.content.decode('utf8')) if response.status_code not in [200, 201, 202]: if "template" in details: Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details)) else: Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details) else: return details
def test_multiple_agg_on_same_field(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "max_bytes", "value": "run.stats.bytes", "aggregate": "max" }, { "name": "count", "value": "run.stats.bytes", "aggregate": "count" }] } }) query = unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def delete_index(self, index_name): if not isinstance(index_name, unicode): Log.error("expecting an index name") if self.debug: Log.note("Deleting index {{index}}", index=index_name) # REMOVE ALL ALIASES TOO aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None] if aliases: self.post( path="/_aliases", data={"actions": [{"remove": a} for a in aliases]} ) url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200, got {{code}}", code=response.status_code) details = mo_json.json2value(utf82unicode(response.content)) if self.debug: Log.note("delete response {{response}}", response=details) return response except Exception, e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def __new__(cls, op, term, *args): Expression.__new__(cls, *args) field, comparisons = term # comparisons IS A Literal() return AndOp("and", [ operators[op](op, [field, Literal(None, value)]) for op, value in json2value(comparisons.json).items() ])
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds self.es = Cluster(kwargs).get_or_create_index( schema=json2value(value2json(SCHEMA), leaves=True), limit_replicas=True, typed=True, kwargs=kwargs, ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def delete_index(self, index_name): if not isinstance(index_name, unicode): Log.error("expecting an index name") if self.debug: Log.note("Deleting index {{index}}", index=index_name) # REMOVE ALL ALIASES TOO aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None] if aliases: self.post( path="/_aliases", data={"actions": [{"remove": a} for a in aliases]} ) url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200, got {{code}}", code=response.status_code) details = mo_json.json2value(utf82unicode(response.content)) if self.debug: Log.note("delete response {{response}}", response=details) return response except Exception as e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, refresh_interval="1second", kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds kwargs.host = randoms.sample(listwrap(host), 1)[0] rollover_interval = coalesce(kwargs.rollover.interval, kwargs.rollover.max, "year") rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval, "year") schema = set_default( kwargs.schema, { "mappings": { kwargs.type: { "properties": { "~N~": { "type": "nested" } } } } }, json2value(value2json(SCHEMA), leaves=True), ) self.es = RolloverIndex( rollover_field={"get": [{ "first": "." }, { "literal": "timestamp" }]}, rollover_interval=rollover_interval, rollover_max=rollover_max, schema=schema, limit_replicas=True, typed=True, read_only=False, kwargs=kwargs, ) self.batch_size = batch_size self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def command_loop(local): DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config']) while not please_stop: line = sys.stdin.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy = {} if is_text(command['import']): exec("from " + command['import'] + " import *", dummy, context) else: exec( "from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: STDOUT.write( value2json({ "out": coalesce(local.get(command['get']), context.get(command['get'])) })) STDOUT.write('\n') elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: if not is_text(command['exec']): Log.error("exec expects only text") exec(command['exec'], context, local) STDOUT.write(DONE) else: for k, v in command.items(): if is_list(v): exec( "_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) else: exec( "_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) STDOUT.write(value2json({"out": local['_return']})) STDOUT.write('\n') except Exception as e: STDOUT.write(value2json({"err": e})) STDOUT.write('\n') finally: STDOUT.flush()
def expand_json(rows): # CONVERT JSON TO VALUES for r in rows: for k, json in items(r): if isinstance(json, text) and json[0:1] in ("[", "{"): with suppress_exception: value = mo_json.json2value(json) r[k] = value
def _get_url(url, branch, **kwargs): with Explanation("get push from {{url}}", url=url, debug=DEBUG): response = http.get(url, **kwargs) data = json2value(response.content.decode("utf8")) if isinstance(data, (text_type, str)) and data.startswith("unknown revision"): Log.error(UNKNOWN_PUSH, revision=strings.between(data, "'", "'")) branch.url = _trim(url) # RECORD THIS SUCCESS IN THE BRANCH return data
def expand_json(rows): # CONVERT JSON TO VALUES for r in rows: for k, json in items(r): if isinstance(json, text_type) and json[0:1] in ("[", "{"): with suppress_exception: value = mo_json.json2value(json) r[k] = value
def get_env(ref, url): # GET ENVIRONMENT VARIABLES ref = ref.host try: new_value = json2value(os.environ[ref]) except Exception as e: new_value = os.environ[ref] return new_value
def __init__(self, filename, host="fake", index="fake", kwargs=None): self.settings = kwargs self.file = File(filename) self.cluster= Null try: self.data = mo_json.json2value(self.file.read()) except Exception as e: self.data = Data()
def _get_env(ref, url): # GET ENVIRONMENT VARIABLES ref = ref.host try: new_value = json2value(os.environ[ref]) except Exception as e: new_value = os.environ.get(ref) return new_value
def to_python(self, not_null=False, boolean=False, many=False): return ( "re.match(" + quote(json2value(self.pattern.json) + "$") + ", " + Python[self.var].to_python() + ")" )
def _get_url(url, branch, **kwargs): with Explanation("get push from {{url}}", url=url, debug=DEBUG): response = http.get(url, **kwargs) data = json2value(response.content.decode("utf8")) if isinstance(data, (text_type, str)) and data.startswith("unknown revision"): Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'")) branch.url = _trim(url) # RECORD THIS SUCCESS IN THE BRANCH return data
def typed_encode(self, r): """ :param record: expecting id and value properties :return: dict with id and json properties """ try: value = r['value'] if "json" in r: value = json2value(r["json"]) elif isinstance(value, Mapping) or value != None: pass else: from mo_logs import Log raise Log.error( "Expecting every record given to have \"value\" or \"json\" property" ) _buffer = UnicodeBuilder(1024) net_new_properties = [] path = [] if isinstance(value, Mapping): given_id = self.get_id(value) if self.remove_id: value['_id'] = None else: given_id = None if given_id: record_id = r.get('id') if record_id and record_id != given_id: from mo_logs import Log raise Log.error( "expecting {{property}} of record ({{record_id|quote}}) to match one given ({{given|quote}})", property=self.id_column, record_id=record_id, given=given_id) else: record_id = r.get('id') if record_id: given_id = record_id else: given_id = random_id() self._typed_encode(value, self.schema, path, net_new_properties, _buffer) json = _buffer.build() for props in net_new_properties: path, type = props[:-1], props[-1][1:] # self.es.add_column(join_field(path), type) return {"id": given_id, "json": json} except Exception as e: # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS from mo_logs import Log Log.error("Serialization of JSON problems", cause=e)
def row_to_column(header, row): return Column( **{ h: c if c is None or h not in ("nested_path", "partitions") else json2value(c) for h, c in zip(header, row) } )
def to_python(self, not_null=False, boolean=False, many=False): agg = "rows[rownum+" + self.offset.to_python() + "]" path = split_field(json2value(self.var.json)) if not path: return agg for p in path[:-1]: agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)" return agg + ".get(" + convert.value2quote(path[-1]) + ")"
def pop(self, wait=SECOND, till=None): if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") m = self.queue.read(wait_time_seconds=mo_math.floor(wait.seconds)) if not m: return None self.pending.append(m) output = mo_json.json2value(m.get_body()) return output
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = mo_json.json2value(suite_json) suite = convert.value2json(coalesce(suite.fullname, suite.name)) line = line.replace(suite_json, suite) if rownum == 0: value = mo_json.json2value(line) if len(line) > MAX_RECORD_LENGTH: _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > MAX_RECORD_LENGTH: value = mo_json.json2value(line) _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find('"resource_usage":') != -1: value = mo_json.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def extend(self, records): """ JUST SO WE MODEL A Queue """ records = { v["id"]: v["value"] if "value" in v else mo_json.json2value(v['json']) for v in records } unwrap(self.data).update(records) self.refresh() Log.note("{{num}} documents added", num=len(records))
def get_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ response = get(url, **kwargs) try: c = response.all_content return json2value(utf82unicode(c)) except Exception as e: if mo_math.round(response.status_code, decimal=-2) in [400, 500]: Log.error(u"Bad GET response: {{code}}", code=response.status_code) else: Log.error(u"Good GET requests, but bad JSON", cause=e)
def delete(self, path, **kwargs): url = self.settings.host + ":" + unicode(self.settings.port) + path try: response = http.delete(url, **kwargs) if response.status_code not in [200]: Log.error(response.reason+": "+response.all_content) if self.debug: Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130)) details = wrap(mo_json.json2value(utf82unicode(response.all_content))) if details.error: Log.error(details.error) return details except Exception as e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def pop_message(self, wait=SECOND, till=None): """ RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE """ if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") message = self.queue.read(wait_time_seconds=mo_math.floor(wait.seconds)) if not message: return None message.delete = lambda: self.queue.delete_message(message) payload = mo_json.json2value(message.get_body()) return message, payload
def set_refresh_interval(self, seconds, **kwargs): """ :param seconds: -1 FOR NO REFRESH :param kwargs: ANY OTHER REQUEST PARAMETERS :return: None """ if seconds <= 0: interval = -1 else: interval = unicode(seconds) + "s" if self.cluster.version.startswith("0.90."): response = self.cluster.put( "/" + self.settings.index + "/_settings", data='{"index":{"refresh_interval":' + convert.value2json(interval) + '}}', **kwargs ) result = mo_json.json2value(utf82unicode(response.all_content)) if not result.ok: Log.error("Can not set refresh interval ({{error}})", { "error": utf82unicode(response.all_content) }) elif any(map(self.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])): response = self.cluster.put( "/" + self.settings.index + "/_settings", data=convert.unicode2utf8('{"index":{"refresh_interval":' + convert.value2json(interval) + '}}'), **kwargs ) result = mo_json.json2value(utf82unicode(response.all_content)) if not result.acknowledged: Log.error("Can not set refresh interval ({{error}})", { "error": utf82unicode(response.all_content) }) else: Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version)
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = json2value(row.json), None timestamp = Date(self.rollover_field(row.value)) if timestamp == None: return Null elif timestamp < Date.today() - self.rollover_max: return DATA_TOO_OLD rounded_timestamp = timestamp.floor(self.rollover_interval) with self.locker: queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": ListContainer(".", self.cluster.get_aliases()), "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}}, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) else: try: es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings) es.add_alias(self.settings.index) except Exception as e: e = Except.wrap(e) if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 5, timeout=5) self._delete_old_indexes(candidates) threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True) with self.locker: queue = self.known_queues[rounded_timestamp.unix] = threaded_queue return queue
def post(self, path, **kwargs): url = self.settings.host + ":" + unicode(self.settings.port) + path try: wrap(kwargs).headers["Accept-Encoding"] = "gzip,deflate" data = kwargs.get(b'data') if data == None: pass elif isinstance(data, Mapping): kwargs[b'data'] = data =convert.unicode2utf8(convert.value2json(data)) elif not isinstance(kwargs["data"], str): Log.error("data must be utf8 encoded string") if self.debug: sample = kwargs.get(b'data', "")[:300] Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample) if self.debug: Log.note("POST {{url}}", url=url) response = http.post(url, **kwargs) if response.status_code not in [200, 201]: Log.error(response.reason.decode("latin1") + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000)) if self.debug: Log.note("response: {{response}}", response=utf82unicode(response.content)[:130]) details = mo_json.json2value(utf82unicode(response.content)) if details.error: Log.error(convert.quote2string(details.error)) if details._shards.failed > 0: Log.error("Shard failures {{failures|indent}}", failures="---\n".join(r.replace(";", ";\n") for r in details._shards.failures.reason) ) return details except Exception as e: if url[0:4] != "http": suggestion = " (did you forget \"http://\" prefix on the host name?)" else: suggestion = "" if kwargs.get("data"): Log.error( "Problem with call to {{url}}" + suggestion + "\n{{body|left(10000)}}", url=url, body=strings.limit(kwargs["data"], 100 if self.debug else 10000), cause=e ) else: Log.error("Problem with call to {{url}}" + suggestion, url=url, cause=e)
def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(kwargs).get_or_create_index( schema=mo_json.json2value(value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, kwargs=kwargs ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop)
def get_file(ref, url): if ref.path.startswith("~"): home_path = os.path.expanduser("~") if os.sep == "\\": home_path = "/" + home_path.replace(os.sep, "/") if home_path.endswith("/"): home_path = home_path[:-1] ref.path = home_path + ref.path[1::] elif not ref.path.startswith("/"): # CONVERT RELATIVE TO ABSOLUTE if ref.path[0] == ".": num_dot = 1 while ref.path[num_dot] == ".": num_dot += 1 parent = url.path.rstrip("/").split("/")[:-num_dot] ref.path = "/".join(parent) + ref.path[num_dot:] else: parent = url.path.rstrip("/").split("/")[:-1] ref.path = "/".join(parent) + "/" + ref.path path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\") try: if DEBUG: Log.note("reading file {{path}}", path=path) content = File(path).read() except Exception as e: content = None Log.error("Could not read file {{filename}}", filename=path, cause=e) try: new_value = json2value(content, params=ref.query, flexible=True, leaves=True) except Exception as e: e = Except.wrap(e) try: new_value = ini2value(content) except Exception: raise Log.error("Can not read {{file}}", file=path, cause=e) new_value = _replace_ref(new_value, ref) return new_value
def _decode(v): output = [] i = 0 while i < len(v): c = v[i] if c == "%": d = hex2chr(v[i + 1:i + 3]) output.append(d) i += 3 else: output.append(c) i += 1 output = text_type("".join(output)) try: return json2value(output) except Exception: pass return output
def command_loop(local): DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config']) while not please_stop: line = sys.stdin.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy={} if is_text(command['import']): exec ("from " + command['import'] + " import *", dummy, context) else: exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))})) STDOUT.write('\n') elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: if not is_text(command['exec']): Log.error("exec expects only text") exec (command['exec'], context, local) STDOUT.write(DONE) else: for k, v in command.items(): if is_list(v): exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) else: exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) STDOUT.write(value2json({"out": local['_return']})) STDOUT.write('\n') except Exception as e: STDOUT.write(value2json({"err": e})) STDOUT.write('\n') finally: STDOUT.flush()
def _decode(v): output = [] i = 0 while i < len(v): c = v[i] if c == "%": d = (v[i + 1:i + 3]).decode("hex") output.append(d) i += 3 else: output.append(c) i += 1 output = (b"".join(output)).decode("latin1") try: return json2value(output) except Exception: pass return output
def _daemon(self, please_stop): while not please_stop: line = self.process.stdout.pop(till=please_stop) if line == THREAD_STOP: break try: data = json2value(line.decode('utf8')) if "log" in data: Log.main_log.write(*data.log) elif "out" in data: with self.lock: self.current_response = data.out self.current_task.go() elif "err" in data: with self.lock: self.current_error = data.err self.current_task.go() except Exception: Log.note("non-json line: {{line}}", line=line) DEBUG and Log.note("stdout reader is done")
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: kwargs['data'] = unicode2utf8(value2json(kwargs['json'])) del kwargs['json'] elif 'data' in kwargs: kwargs['data'] = unicode2utf8(value2json(kwargs['data'])) else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) details = json2value(utf82unicode(response.content)) if response.status_code not in [200, 201, 202]: if "template" in details: Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details)) else: Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details) else: return details
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if b"json" in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"])) elif b'data' in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"])) else: Log.error("Expecting `json` parameter") response = post(url, **kwargs) c = response.content try: details = mo_json.json2value(convert.utf82unicode(c)) except Exception as e: Log.error("Unexpected return value {{content}}", content=c, cause=e) if response.status_code not in [200, 201]: Log.error("Bad response", cause=Except.wrap(details)) return details