def set_refresh_interval(self, seconds): if seconds <= 0: interval = -1 else: interval = unicode(seconds) + "s" if self.cluster.version.startswith("0.90."): response = self.cluster.put( "/" + self.settings.index + "/_settings", data='{"index":{"refresh_interval":' + convert.value2json(interval) + '}}' ) result = convert.json2value(utf82unicode(response.all_content)) if not result.ok: Log.error("Can not set refresh interval ({{error}})", { "error": utf82unicode(response.all_content) }) elif any(map(self.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])): response = self.cluster.put( "/" + self.settings.index + "/_settings", data=convert.unicode2utf8('{"index":{"refresh_interval":' + convert.value2json(interval) + '}}') ) result = convert.json2value(utf82unicode(response.all_content)) if not result.acknowledged: Log.error("Can not set refresh interval ({{error}})", { "error": utf82unicode(response.all_content) }) else: Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version)
def process_unittest_in_s3(source_key, source, destination, resources, please_stop=None): lines = source.read_lines() etl_header = convert.json2value(lines[0]) # FIX ETL IDS e = etl_header while e: if isinstance(e.id, basestring): e.id = int(e.id.split(":")[0]) e = e.source bb_summary = transform_buildbot(convert.json2value(lines[1]), resources=resources) unittest_log = lines[2:] return process_unittest(source_key, etl_header, bb_summary, unittest_log, destination, please_stop=please_stop)
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, settings=None ): if not alias: alias = settings.alias = settings.index index = settings.index = proto_name(alias, create_timestamp) if settings.alias == index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, leaves=True) else: schema = convert.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception, e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Thread.sleep(seconds=1)
def create_index( self, index, alias=None, schema=None, limit_replicas=None, read_only=False, tjson=False, settings=None ): if not settings.alias: settings.alias = settings.index settings.index = proto_name(settings.alias) if settings.alias == settings.index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, leaves=True) else: schema = convert.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + settings.index, data=schema, headers={"Content-Type": "application/json"} ) while True: time.sleep(1) try: self.head("/" + settings.index) break except Exception: Log.note("{{index}} does not exist yet", index=settings.index) es = Index(settings=settings) return es
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(suite.name) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > 100000: value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > 100000: value = convert.json2value(line) value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find("\"resource_usage\":") != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def test_request(self): # SIMPLEST POSSIBLE REQUEST (NOTHING IMPORTANT HAPPENING) data = { "meta": { "suite": "sessionrestore_no_auto_restore osx-10-10", "platform": "osx-10-10", "e10s": False, "och": "opt", "bucket": "startup", "statistic": "mean" }, "header": ["rownum", "timestamp", "revision", "value"], "data": [ [1, "2015-12-06 09:21:15", "18339318", 879], [2, "2015-12-06 16:50:36", "18340858", 976], [3, "2015-12-06 19:01:54", "18342319", 880], [4, "2015-12-06 21:08:56", "18343567", 1003], [5, "2015-12-06 23:33:27", "18345266", 1002], [6, "2015-12-07 02:16:22", "18347807", 977], [7, "2015-12-07 02:18:29", "18348057", 1035], [8, "2015-12-07 04:51:52", "18351263", 1032], [9, "2015-12-07 05:29:42", "18351078", 1035], [10, "2015-12-07 05:50:37", "18351749", 1010] ] } response = requests.post(settings.url, json=data) self.assertEqual(response.status_code, 200) data = convert.json2value(convert.utf82unicode(response.content)) self.assertEqual(data, {})
def delete_index(self, index_name): if not isinstance(index_name, unicode): Log.error("expecting an index name") if self.debug: Log.note("Deleting index {{index}}", index=index_name) # REMOVE ALL ALIASES TOO aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None] if aliases: self.post( path="/_aliases", data={"actions": [{"remove": a} for a in aliases]} ) url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200, got {{code}}", code=response.status_code) details = convert.json2value(utf82unicode(response.content)) if self.debug: Log.note("delete response {{response}}", response=details) return response except Exception, e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def process(source_key, source, destination, resources, please_stop=None): lines = source.read_lines() etl_header = convert.json2value(lines[0]) if etl_header.etl: start = 0 elif etl_header.locale or etl_header._meta: start = 0 else: start = 1 keys = [] records = [] stats = Dict() for i, line in enumerate(lines[start:]): pulse_record = Null try: pulse_record = scrub_pulse_record(source_key, i, line, stats) if not pulse_record: continue with Profiler("transform_buildbot"): record = transform_buildbot(pulse_record.payload, resources=resources) record.etl = { "id": i, "source": pulse_record.etl, "type": "join", "revision": get_git_revision() } key = etl2key(record.etl) keys.append(key) records.append({"id": key, "value": record}) except Exception, e: Log.warning("Problem with pulse payload {{pulse|json}}", pulse=pulse_record.payload, cause=e)
def process_test_result(source_key, source, destination, please_stop=None): path = key2path(source_key) destination.delete({"and": [ {"term": {"etl.source.id": path[1]}}, {"term": {"etl.source.source.id": path[0]}} ]}) lines = source.read_lines() keys = [] data = [] for l in lines: record = convert.json2value(l) if record._id==None: continue record.result.crash_result = None #TODO: Remove me after May 2015 keys.append(record._id) data.append({ "id": record._id, "value": record }) record._id = None if data: try: destination.extend(data) except Exception, e: if "Can not decide on index by build.date" in e: if source.bucket.name == "ekyle-test-result": # KNOWN CORRUPTION # TODO: REMOVE LATER (today = Mar2015) delete_list = source.bucket.keys(prefix=key_prefix(source_key)) for d in delete_list: source.bucket.delete_key(d) Log.error("Can not add to sink", e)
def __init__(self, host, index, type="query", max_size=10, batch_size=10, kwargs=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ es = Cluster(kwargs).get_or_create_index(schema=convert.json2value( convert.value2json(SCHEMA), leaves=True), limit_replicas=True, kwargs=kwargs) #ENSURE THE TYPE EXISTS FOR PROBING try: es.add({ "id": "dummy", "value": { "hash": "dummy", "create_time": Date.now(), "last_used": Date.now(), "query": {} } }) except Exception, e: Log.warning("Problem saving query", cause=e)
def __init__(self, settings): self.settings = wrap({"host":"fake", "index":"fake"}) self.filename = settings.filename try: self.data = convert.json2value(File(self.filename).read()) except IOError: self.data = Dict()
def decrypt(data, _key): """ ACCEPT JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d} """ from pyLibrary.queries import jx # Key and iv have not been generated or provided, bail out if _key is None: Log.error("Expecting a key") _input = convert.json2value(data) # Initialize encryption using key and iv key_expander_256 = key_expander.KeyExpander(256) expanded_key = key_expander_256.expand(_key) aes_cipher_256 = aes_cipher.AESCipher(expanded_key) aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) aes_cbc_256.set_iv(convert.base642bytearray(_input.salt)) raw = convert.base642bytearray(_input.data) out_data = bytearray() for _, e in jx.groupby(raw, size=16): out_data.extend(aes_cbc_256.decrypt_block(e)) return str(out_data[:_input.length:]).decode("utf8")
def test_request(self): # MAKE SOME DATA data = { "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } client = Client(settings.url, unwrap(settings.hawk)) # unwrap() DUE TO BUG https://github.com/kumar303/mohawk/issues/21 link, id = client.send(data) Log.note("Success! Located at {{link}} id={{id}}", link=link, id=id) # FILL THE REST OF THE FILE Log.note("Add ing {{num}} more...", num=99-id) for i in range(id + 1, storage.BATCH_SIZE): l, k = client.send(data) if l != link: Log.error("Expecting rest of data to have same link") # TEST LINK HAS DATA raw_content = requests.get(link).content content = convert.zip2bytes(raw_content) for line in convert.utf82unicode(content).split("\n"): data = convert.json2value(line) if data.etl.id == id: Log.note("Data {{id}} found", id=id) break else: Log.error("Expecting to find data at link")
def __init__(self, filename, host="fake", index="fake", settings=None): self.settings = settings self.filename = settings.filename try: self.data = convert.json2value(File(self.filename).read()) except Exception: self.data = Dict()
def get_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ response = get(url, **kwargs) c = response.all_content return convert.json2value(convert.utf82unicode(c))
def get_active_data(settings): query = { "limit": 100000, "from": "unittest", "where": {"and": [ {"eq": {"result.ok": False}}, {"gt": {"run.timestamp": RECENT.milli}} ]}, "select": [ "result.ok", "build.branch", "build.platform", "build.release", "build.revision", "build.type", "build.revision", "build.date", "run.timestamp", "run.suite", "run.chunk", "result.test", "run.stats.status.test_status" ], "format": "table" } result = http.post("http://activedata.allizom.org/query", data=convert.unicode2utf8(convert.value2json(query))) query_result = convert.json2value(convert.utf82unicode(result.all_content)) tab = convert.table2tab(query_result.header, query_result.data) File(settings.output.activedata).write(tab)
def test_branch_count(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"aggregate": "count"}, ], "edges": [ "build.branch" ], "where": {"or": [ {"missing": "build.id"} # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "table" }}) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def pop(self, wait=SECOND, till=None): m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not m: return None self.pending.append(m) return convert.json2value(m.get_body())
def accumulate_logs(source_key, file_name, lines, please_stop): accumulator = LogSummary() for line in lines: if please_stop: Log.error( "Shutdown detected. Structured log iterator is stopped.") accumulator.stats.bytes += len( line ) + 1 # INCLUDE THE \n THAT WOULD HAVE BEEN AT END OF EACH LINE line = strings.strip(line) if line == "": continue try: accumulator.stats.lines += 1 log = convert.json2value(line) log.time = log.time / 1000 accumulator.stats.start_time = Math.min( accumulator.stats.start_time, log.time) accumulator.stats.end_time = Math.max(accumulator.stats.end_time, log.time) # FIX log.test TO BE A STRING if isinstance(log.test, list): log.test = " ".join(log.test) accumulator.__getattribute__(log.action)(log) if log.subtest: accumulator.last_subtest = log.time except Exception, e: accumulator.stats.bad_lines += 1
def get_env(ref, url): # GET ENVIRONMENT VARIABLES ref = ref.host try: new_value = _convert.json2value(os.environ[ref]) except Exception, e: new_value = os.environ[ref]
def __init__(self, settings): self.settings = wrap({"host": "fake", "index": "fake"}) self.filename = settings.filename try: self.data = convert.json2value(File(self.filename).read()) except IOError: self.data = Dict()
def test_multiple_agg_on_same_field(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "max_bytes", "value": "run.stats.bytes", "aggregate": "max" }, { "name": "count", "value": "run.stats.bytes", "aggregate": "count" }] } }) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def get_env(ref, url): # GET ENVIRONMENT VARIABLES ref = ref.host try: new_value = convert.json2value(os.environ[ref]) except Exception, e: new_value = os.environ[ref]
def solve(): try: data = convert.json2value(convert.utf82unicode(flask.request.data)) solved = noop.solve(data) response_data = convert.unicode2utf8(convert.value2json(solved)) return Response( response_data, direct_passthrough=True, # FOR STREAMING status=200, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } ) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response( convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } )
def delete_index(self, index_name): if not isinstance(index_name, unicode): Log.error("expecting an index name") if self.debug: Log.note("Deleting index {{index}}", index=index_name) # REMOVE ALL ALIASES TOO aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None] if aliases: self.post( path="/_aliases", data={"actions": [{"remove": a} for a in aliases]} ) url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200") details = convert.json2value(utf82unicode(response.content)) if self.debug: Log.note("delete response {{response}}", response=details) return response except Exception, e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = convert.json2value(row.json), None timestamp = Date(self.rollover_field(wrap(row).value)) if timestamp == None or timestamp < Date.today() - self.rollover_max: return Null rounded_timestamp = timestamp.floor(self.rollover_interval) queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": self.cluster.get_aliases(), "where": { "regex": { "index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d" } }, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) else: try: es = self.cluster.create_index( create_timestamp=rounded_timestamp, settings=self.settings) es.add_alias(self.settings.index) except Exception, e: if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 10, timeout=5) self._delete_old_indexes(candidates) queue = self.known_queues[ rounded_timestamp.unix] = es.threaded_queue( max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
def expand_json(rows): # CONVERT JSON TO VALUES for r in rows: for k, json in list(r.items()): if isinstance(json, basestring) and json[0:1] in ("[", "{"): with suppress_exception: value = convert.json2value(json) r[k] = value
def _get_url(url, branch, **kwargs): with Explanation("get push from {{url}}", url=url): response = http.get(url, **kwargs) data = convert.json2value(response.content.decode("utf8")) if isinstance(data, basestring) and data.startswith("unknown revision"): Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'")) branch.url = _trim(url) #RECORD THIS SUCCESS IN THE BRANCH return data
def create_index(self, index, alias=None, schema=None, limit_replicas=None, settings=None): if not settings.alias: settings.alias = settings.index settings.index = proto_name(settings.alias) if settings.alias == settings.index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error( 'schema_file attribute not supported. Use {"$ref":<filename>} instead' ) if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, paths=True) else: schema = convert.json2value(convert.value2json(schema), paths=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning( "Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to=health.number_of_nodes - 1) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self._post("/" + settings.index, data=convert.value2json(schema).encode("utf8"), headers={"Content-Type": "application/json"}) while True: time.sleep(1) try: self.head("/" + settings.index) break except Exception, _: Log.note("{{index}} does not exist yet", index=settings.index)
def delete(self, path, **kwargs): url = self.settings.host + ":" + unicode(self.settings.port) + path try: response = convert.json2value(utf82unicode(http.delete(url, **kwargs).content)) if self.debug: Log.note("delete response {{response}}", response= response) return response except Exception, e: Log.error("Problem with call to {{url}}", url= url, cause=e)
def read_json(self, encoding="utf8"): from pyLibrary.jsons import ref content = self.read(encoding=encoding) value = convert.json2value(content, flexible=True, leaves=True) abspath = self.abspath if os.sep == "\\": abspath = "/" + abspath.replace(os.sep, "/") return ref.expand(value, "file://" + abspath)
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"])) response = post(url, **kwargs) c=response.all_content return convert.json2value(convert.utf82unicode(c))
def test_timing(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "count", "value": "run.duration", "aggregate": "count" }, { "name": "total", "value": "run.duration", "aggregate": "sum" }], "edges": [{ "name": "chunk", "value": ["run.suite", "run.chunk"] }, "result.ok"], "where": { "and": [{ "lt": { "timestamp": Date.floor(Date.now()).milli / 1000 } }, { "gte": { "timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000 } }] }, "format": "cube", "samples": { "limit": 30 } } }) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def get_http(ref, url): from pyLibrary.env import http params = url.query new_value = convert.json2value(http.get(ref), params=params, flexible=True, paths=True) return new_value
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"])) response = post(url, **kwargs) c = response.all_content return convert.json2value(convert.utf82unicode(c))
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(suite.name) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > 100000: value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > 100000: value = convert.json2value(line) value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find("\"resource_usage\":") != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def full_etl(settings): schema = convert.json2value(convert.value2json(SCHEMA), leaves=True) Cluster(settings.destination).get_or_create_index(settings=settings.destination, schema=schema, limit_replicas=True) destq = FromES(settings.destination) if settings.incremental: min_bug_id = destq.query({ "from": coalesce(settings.destination.alias, settings.destination.index), "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"} }) min_bug_id = int(MAX(min_bug_id-1000, 0)) else: min_bug_id = 0 sourceq = FromES(settings.source) max_bug_id = sourceq.query({ "from": coalesce(settings.source.alias, settings.source.index), "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"} }) + 1 max_bug_id = int(coalesce(max_bug_id, 0)) # FIRST, GET ALL MISSING BUGS for s, e in qb.reverse(list(qb.intervals(min_bug_id, max_bug_id, 10000))): with Timer("pull {{start}}..{{end}} from ES", {"start": s, "end": e}): children = sourceq.query({ "from": settings.source.alias, "select": ["bug_id", "dependson", "blocked", "modified_ts", "expires_on"], "where": {"and": [ {"range": {"bug_id": {"gte": s, "lt": e}}}, {"or": [ {"exists": "dependson"}, {"exists": "blocked"} ]} ]}, "limit": 10000 }) with Timer("fixpoint work"): to_fix_point(settings, destq, children.data) # PROCESS RECENT CHANGES with Timer("pull recent dependancies from ES"): children = sourceq.query({ "from": settings.source.alias, "select": ["bug_id", "dependson", "blocked"], "where": {"and": [ {"range": {"modified_ts": {"gte": convert.datetime2milli(datetime.utcnow() - timedelta(days=7))}}}, {"or": [ {"exists": "dependson"}, {"exists": "blocked"} ]} ]}, "limit": 100000 }) to_fix_point(settings, destq, children.data)
def delete(self, path, **kwargs): url = self.settings.host + ":" + unicode(self.settings.port) + path try: response = convert.json2value( utf82unicode(http.delete(url, **kwargs).content)) if self.debug: Log.note("delete response {{response}}", response=response) return response except Exception, e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def __eq__(self, other): Log.warning("expensive") from pyLibrary.testing.fuzzytestcase import assertAlmostEqual try: assertAlmostEqual(convert.json2value(self.json), other) return True except Exception: return False
def copy(self, keys, source, sample_only_filter=None, sample_size=None): num_keys = 0 for key in keys: try: for rownum, line in enumerate( source.read_lines(strip_extension(key))): if rownum == 0: value = convert.json2value(line) if len(line) > 1000000: # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce( sample_size, 0.01))) != 0 and qb.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") num_keys += 1 self.queue.add(row) break elif len(line) > 1000000: value = convert.json2value(line) # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} else: #FAST _id = strings.between(line, "_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} num_keys += 1 self.queue.add(row) except Exception, e: Log.warning("Could not get queue for {{key}}", key=key, cause=e)
def pop_message(self, wait=SECOND, till=None): """ RETURN THE MESSAGE, CALLER IS RESPONSIBLE FOR CALLING delete_message() WHEN DONE """ m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not m: return None output = convert.json2value(m.get_body()) return output
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), paths=True), limit_replicas=True, settings=settings ) self.queue = self.es.threaded_queue(max_size=max_size, batch_size=batch_size)
def pop(self, wait=SECOND, till=None): if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not m: return None self.pending.append(m) output = convert.json2value(m.get_body()) return output
def pop_message(self, wait=SECOND, till=None): """ RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE """ message = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not message: return None message.delete = lambda: self.queue.delete_message(message) payload = convert.json2value(message.get_body()) return message, payload
def test_bad_long_json(self): test = pypy_json_encode({"values": [i for i in range(1000)]}) test = test[:1000] + "|" + test[1000:] expected = u"Can not decode JSON at:\n\t..., 216, 217, 218, 219|, 220, 221, 222, 22...\n\t ^\n" # expected = u'Can not decode JSON at:\n\t...9,270,271,272,273,27|4,275,276,277,278,2...\n\t ^\n' try: output = convert.json2value(test) Log.error("Expecting error") except Exception, e: if e.message != expected: Log.error("Expecting good error message", cause=e)
def to_esfilter(self): if not isinstance(self.lhs, Variable) or not isinstance(self.rhs, Literal) or self.op in BinaryOp.algebra_ops: return {"script": {"script": self.to_ruby()}} if self.op in ["eq", "term"]: return {"term": {self.lhs.var: self.rhs.to_esfilter()}} elif self.op in ["ne", "neq"]: return {"not": {"term": {self.lhs.var: self.rhs.to_esfilter()}}} elif self.op in BinaryOp.ineq_ops: return {"range": {self.lhs.var: {self.op: convert.json2value(self.rhs.json)}}} else: Log.error("Logic error")
def error(response): response = convert.utf82unicode(response.content) try: e = Except.new_instance(convert.json2value(response)) except Exception: e = None if e: Log.error("Failed request", e) else: Log.error("Failed request\n {{response}}", {"response": response})
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = mo_json.json2value(suite_json) suite = convert.value2json(coalesce(suite.fullname, suite.name)) line = line.replace(suite_json, suite) if source.name.startswith("active-data-codecoverage"): d = convert.json2value(line) if d.source.file.total_covered > 0: return {"id": d._id, "json": line}, False else: return None, False if rownum == 0: value = mo_json.json2value(line) if len(line) > MAX_RECORD_LENGTH: _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > MAX_RECORD_LENGTH: value = mo_json.json2value(line) _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find('"resource_usage":') != -1: value = mo_json.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def test_simple_query(self): if self.not_real_service(): return query = convert.unicode2utf8(convert.value2json({"from": "unittest"})) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})