def solve(): try: data = convert.json2value(convert.utf82unicode(flask.request.data)) solved = noop.solve(data) response_data = convert.unicode2utf8(convert.value2json(solved)) return Response( response_data, direct_passthrough=True, # FOR STREAMING status=200, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } ) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response( convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" } )
def add_alias(self, alias=None): if alias: self.cluster_state = None self.cluster._post("/_aliases", data=convert.unicode2utf8( convert.value2json({ "actions": [{ "add": { "index": self.settings.index, "alias": alias } }] })), timeout=coalesce(self.settings.timeout, 30)) else: # SET ALIAS ACCORDING TO LIFECYCLE RULES self.cluster_state = None self.cluster._post("/_aliases", data=convert.unicode2utf8( convert.value2json({ "actions": [{ "add": { "index": self.settings.index, "alias": self.settings.alias } }] })), timeout=coalesce(self.settings.timeout, 30))
def _worker(self, please_stop): curr = "0.0" acc = [] last_count_written = -1 next_write = Date.now() while not please_stop: d = self.temp_queue.pop(timeout=MINUTE) if d == None: if not acc: continue # WRITE THE INCOMPLETE DATA TO S3, BUT NOT TOO OFTEN next_write = Date.now() + MINUTE try: if last_count_written != len(acc): if DEBUG: Log.note("write incomplete data ({{num}} lines) to {{uid}} in S3 next (time = {{next_write}})", uid=curr, next_write=next_write, num=len(acc)) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = len(acc) except Exception, e: Log.note("Problem with write to S3", cause=e) elif d[UID_PATH] != curr: # WRITE acc TO S3 IF WE ARE MOVING TO A NEW KEY try: if acc: if DEBUG: Log.note("write complete data ({{num}} lines) to {{curr}} in S3", num=len(acc), curr=curr) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = 0 curr = d[UID_PATH] acc = [d] except Exception, e: Log.warning("Can not store data", cause=e) Thread.sleep(30*MINUTE)
def not_done_test1(self): esquery = self.esq.query({ "from": "private_bugs", "select": "*", "where": { "and": [{ "range": { "expires_on": { "gte": 1393804800000 } } }, { "range": { "modified_ts": { "lte": 1394074529000 } } }, { "term": { "changes.field_name": "assigned_to" } }, { "term": { "changes.new_value": "klahnakoski" } }] }, "limit": 10 }) expecting = {} assert convert.value2json(esquery, pretty=True) == convert.value2json( expecting, pretty=True)
def set_refresh_interval(self, seconds): if seconds <= 0: interval = -1 else: interval = unicode(seconds) + "s" if self.cluster.version.startswith("0.90."): response = self.cluster.put( "/" + self.settings.index + "/_settings", data='{"index":{"refresh_interval":' + convert.value2json(interval) + '}}' ) result = convert.json2value(utf82unicode(response.all_content)) if not result.ok: Log.error("Can not set refresh interval ({{error}})", { "error": utf82unicode(response.all_content) }) elif any(map(self.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])): response = self.cluster.put( "/" + self.settings.index + "/_settings", data=convert.unicode2utf8('{"index":{"refresh_interval":' + convert.value2json(interval) + '}}') ) result = convert.json2value(utf82unicode(response.all_content)) if not result.acknowledged: Log.error("Can not set refresh interval ({{error}})", { "error": utf82unicode(response.all_content) }) else: Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version)
def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(Thread.STOP) if self.db.status.end == self.start: if DEBUG: Log.note("persistent queue clear and closed") self.file.delete() else: if DEBUG: Log.note("persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write( convert.value2json({"add": self.db}) + "\n" + ("\n".join(convert.value2json(p) for p in self.pending)) + "\n" ) self._apply_pending() except Exception, e: raise e self.db = None
def extend(self, records): """ records - MUST HAVE FORM OF [{"value":value}, ... {"value":value}] OR [{"json":json}, ... {"json":json}] OPTIONAL "id" PROPERTY IS ALSO ACCEPTED """ lines = [] try: for r in records: id = r.get("id") if id == None: id = Random.hex(40) if "json" in r: json = r["json"] elif "value" in r: json = convert.value2json(r["value"]) else: json = None Log.error("Expecting every record given to have \"value\" or \"json\" property") lines.append('{"index":{"_id": ' + convert.value2json(id) + '}}') lines.append(json) del records if not lines: return try: data_bytes = "\n".join(lines) + "\n" data_bytes = data_bytes.encode("utf8") except Exception, e: Log.error("can not make request body from\n{{lines|indent}}", lines= lines, cause=e) response = self.cluster._post( self.path + "/_bulk", data=data_bytes, headers={"Content-Type": "text"}, timeout=self.settings.timeout ) items = response["items"] for i, item in enumerate(items): if self.cluster.version.startswith("0.90."): if not item.index.ok: Log.error("{{error}} while loading line:\n{{line}}", error= item.index.error, line= lines[i * 2 + 1]) elif self.cluster.version.startswith("1.4."): if item.index.status not in [200, 201]: Log.error("{{error}} while loading line:\n{{line}}", error= item.index.error, line= lines[i * 2 + 1]) else: Log.error("version not supported {{version}}", version=self.cluster.version) if self.debug: Log.note("{{num}} documents added", num= len(items))
def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(Thread.STOP) if self.db.status.end == self.start: if DEBUG: Log.note("persistent queue clear and closed") self.file.delete() else: if DEBUG: Log.note("persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write( convert.value2json({"add": self.db}) + "\n" + ("\n".join( convert.value2json(p) for p in self.pending)) + "\n") self._apply_pending() except Exception, e: raise e self.db = None
def get_treeherder_job(self): try: with Timer("Process Request"): args = Dict(**flask.request.args) # IS THE branch/revision PENDING? result = self.get_markup(unwraplist(args.branch), unwraplist(args.revision), unwraplist(args.task_id), unwraplist(args.buildername), unwraplist(args.timestamp)) response_data = convert.unicode2utf8( convert.value2json(result)) return Response(response_data, status=200, headers={ "access-control-allow-origin": "*", "content-type": "text/plain" }) except Exception, e: e = Except.wrap(e) Log.warning("Could not process", cause=e) e = e.as_dict() return Response(convert.unicode2utf8(convert.value2json(e)), status=400, headers={ "access-control-allow-origin": "*", "content-type": "application/json" })
def _shorten(value, source): if source.name.startswith("active-data-test-result"): value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True value.repo.changeset.files = None shorter_length = len(convert.value2json(value)) if shorter_length > MAX_RECORD_LENGTH: result_size = len(convert.value2json(value.result)) if source.name == "active-data-test-result": if result_size > MAX_RECORD_LENGTH: Log.warning( "Epic test failure in {{name}} results in big record for {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length) else: pass # NOT A PROBLEM else: Log.warning( "Monstrous {{name}} record {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length)
def to_sql(self, schema, not_null=False, boolean=False): term = self.term.partial_eval() if isinstance(term, Literal): val = term.value if isinstance(val, text_type): return wrap([{"name": ".", "sql": {"n": convert.value2json(len(val))}}]) elif isinstance(val, (float, int)): return wrap([{"name": ".", "sql": {"n": convert.value2json(len(convert.value2json(val)))}}]) else: return Null value = term.to_sql(schema)[0].sql.s return wrap([{"name": ".", "sql": {"n": "LENGTH" + sql_iso(value)}}])
def test_rest_get(self): settings = self.utils.fill_container({ "data": [{ "a": 0, "b": 0 }, { "a": 0, "b": 1 }, { "a": 1, "b": 0 }, { "a": 1, "b": 1 }], "query": { "from": "" } # DUMMY LINE }) url = URL(self.utils.service_url) url.path = "json/" + settings.index url.query = {"a": 1} response = self.utils.try_till_response(str(url), data=b"") self.assertEqual(response.status_code, 200) # ORDER DOES NOT MATTER, TEST EITHER expected1 = convert.unicode2utf8( convert.value2json([{ "a": 1, "b": 0 }, { "a": 1, "b": 1 }], pretty=True)) expected2 = convert.unicode2utf8( convert.value2json([{ "a": 1, "b": 1 }, { "a": 1, "b": 0 }], pretty=True)) try: self.assertEqual(response.all_content, expected1) except Exception: self.assertEqual(response.all_content, expected2)
def put(self, path, **kwargs): url = self.settings.host + ":" + unicode(self.settings.port) + path data = kwargs.get(b'data') if data == None: pass elif isinstance(data, Mapping): kwargs[b'data'] = data = convert.unicode2utf8(convert.value2json(data)) elif not isinstance(kwargs["data"], str): Log.error("data must be utf8 encoded string") if self.debug: sample = kwargs.get(b'data', "")[:300] Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample) # try: response = http.put(url, **kwargs) if response.status_code not in [200]: Log.error(response.reason+": "+response.all_content) if self.debug: Log.note("response: {{response}}", response= utf82unicode(response.all_content)[0:300:]) details = mo_json.json2value(utf82unicode(response.content)) if details.error: Log.error(convert.quote2string(details.error)) if details._shards.failed > 0: Log.error("Shard failures {{failures|indent}}", failures="---\n".join(r.replace(";", ";\n") for r in details._shards.failures.reason) ) return details
def stream(): # IT'S A LOT OF PRICES, STREAM THEM TO FILE prefix = "[\n" for p in new_prices: yield prefix yield convert.value2json(p) prefix = ",\n" yield "]"
def get_raw_json(path): active_data_timer = Timer("total duration") body = flask.request.get_data() try: with active_data_timer: args = wrap(Data(**flask.request.args)) limit = args.limit if args.limit else 10 args.limit = None frum = wrap_from(path) result = jx.run( { "from": path, "where": { "eq": args }, "limit": limit, "format": "list" }, frum) if isinstance( result, Container ): #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format("list") result.meta.active_data_response_time = active_data_timer.duration response_data = convert.unicode2utf8( convert.value2json(result.data, pretty=True)) Log.note("Response is {{num}} bytes", num=len(response_data)) return Response(response_data, status=200) except Exception, e: e = Except.wrap(e) return _send_error(active_data_timer, body, e)
def _shorten(value, source): if source.name.startswith("active-data-test-result"): value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True value.repo.changeset.files = None shorter_length = len(convert.value2json(value)) if shorter_length > MAX_RECORD_LENGTH: result_size = len(convert.value2json(value.result)) if source.name == "active-data-test-result": if result_size > MAX_RECORD_LENGTH: Log.warning("Epic test failure in {{name}} results in big record for {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length) else: pass # NOT A PROBLEM else: Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length)
def delete_record(self, filter): if self.settings.read_only: Log.error("Index opened in read only mode, no changes allowed") self.cluster.get_metadata() if self.cluster.cluster_state.version.number.startswith("0.90"): query = {"filtered": { "query": {"match_all": {}}, "filter": filter }} elif self.cluster.cluster_state.version.number.startswith("1."): query = {"query": {"filtered": { "query": {"match_all": {}}, "filter": filter }}} else: raise NotImplementedError if self.debug: Log.note("Delete bugs:\n{{query}}", query= query) result = self.cluster.delete( self.path + "/_query", data=convert.value2json(query), timeout=600, params={"consistency": self.settings.consistency} ) for name, status in result._indices.items(): if status._shards.failed > 0: Log.error("Failure to delete from {{index}}", index=name)
def test_branch_count(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"aggregate": "count"}, ], "edges": [ "build.branch" ], "where": {"or": [ {"missing": "build.id"} # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "table" }}) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def test_multiple_agg_on_same_field(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "max_bytes", "value": "run.stats.bytes", "aggregate": "max" }, { "name": "count", "value": "run.stats.bytes", "aggregate": "count" }] } }) query = unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def __init__(self, host, index, type="query", max_size=10, batch_size=10, kwargs=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ es = Cluster(kwargs).get_or_create_index(schema=convert.json2value( convert.value2json(SCHEMA), leaves=True), limit_replicas=True, kwargs=kwargs) #ENSURE THE TYPE EXISTS FOR PROBING try: es.add({ "id": "dummy", "value": { "hash": "dummy", "create_time": Date.now(), "last_used": Date.now(), "query": {} } }) except Exception, e: Log.warning("Problem saving query", cause=e)
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta(**kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if Math.is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif isinstance(value, basestring): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and Math.is_nan(value): return None else: from pyLibrary import convert from pyLibrary.debugs.logs import Log Log.error("Do not know type of object (" + convert.value2json(value) + ")of to make a Duration")
def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if ( self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0 ): # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note( "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start ) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write(convert.value2json({"add": self.db}) + "\n") else: self._commit() except Exception, e: raise e
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta( **kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if Math.is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif isinstance(value, basestring): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and Math.is_nan(value): return None else: from pyLibrary import convert from pyLibrary.debugs.logs import Log Log.error("Do not know type of object (" + convert.value2json(value) + ")of to make a Duration")
def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if self.db.status.end - self.start < 10 or Random.range( 0, 1000) == 0: # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note( "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write( convert.value2json({"add": self.db}) + "\n") else: self._commit() except Exception, e: raise e
def delete_record(self, filter): if self.settings.read_only: Log.error("Index opened in read only mode, no changes allowed") self.cluster.get_metadata() if self.cluster.cluster_state.version.number.startswith("0.90"): query = {"filtered": { "query": {"match_all": {}}, "filter": filter }} elif self.cluster.cluster_state.version.number.startswith("1.0"): query = {"query": {"filtered": { "query": {"match_all": {}}, "filter": filter }}} else: raise NotImplementedError if self.debug: Log.note("Delete bugs:\n{{query}}", query= query) result = self.cluster.delete( self.path + "/_query", data=convert.value2json(query), timeout=60 ) for name, status in result._indices.items(): if status._shards.failed > 0: Log.error("Failure to delete from {{index}}", index=name)
def not_done_test1(self): esquery = self.esq.query({ "from": "private_bugs", "select": "*", "where": {"and": [ {"range": {"expires_on": {"gte": 1393804800000}}}, {"range": {"modified_ts": {"lte": 1394074529000}}}, {"term": {"changes.field_name": "assigned_to"}}, {"term": {"changes.new_value": "klahnakoski"}} ]}, "limit": 10 }) expecting = {} assert convert.value2json(esquery, pretty=True) == convert.value2json(expecting, pretty=True)
def _index_columns(self, columns): # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS indexed_values = [None] * len(columns) for i, s in enumerate(columns): index = self._index.get(s.value, None) if index is not None: indexed_values[i] = index continue function_name = convert.value2json(s.value.to_dict(), sort_keys=True) index = self._index.get(function_name, None) indexed_values[i] = index if index is not None: continue indexed_values[i] = index = self._index[function_name] = {} accessor = jx.get(s.value) for k, ii in self._unique_index.items(): v = accessor(self._source[ii]) j = index.get(v) if j is None: j = index[v] = set() j |= {ii} return indexed_values
def create_index( self, index, alias=None, schema=None, limit_replicas=None, settings=None ): if not settings.alias: settings.alias = settings.index settings.index = proto_name(settings.alias) if settings.alias == settings.index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, paths=True) else: schema = convert.json2value(convert.value2json(schema), paths=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self._post( "/" + settings.index, data=convert.value2json(schema).encode("utf8"), headers={"Content-Type": "application/json"} ) while True: time.sleep(1) try: self.head("/" + settings.index) break except Exception, _: Log.note("{{index}} does not exist yet", index= settings.index)
def store_data(path): try: request = flask.request auth = request.headers.get('Authorization') if not auth: # USE PATTERN MATCHING AUTH for c in all_creds: if c.path == path: return store_public_data(path, c) raise Log.error( "No authentication provided. path={{path}} data.length={{length}}", path=path, length=len(request.get_data()), ) try: receiver = Receiver( lookup_credentials, auth, request.url, request.method, content=request.get_data(), content_type=request.headers['Content-Type'], seen_nonce=seen_nonce ) except Exception, e: e = Except.wrap(e) raise Log.error( "Authentication failed. path={{path}} data.length={{length}}\n{{auth|indent}}", path=path, length=len(request.get_data()), auth=auth, cause=e ) permissions = lookup_user(receiver.parsed_header["id"]) if path not in listwrap(permissions.resources): Log.error("{{user}} not allowed access to {{resource}}", user=permissions.hawk.id, resource=path) link, id = submit_data(path, permissions, request.json) response_content = convert.unicode2utf8(convert.value2json({ "link": link, "etl": {"id": id} })) receiver.respond( content=response_content, content_type=RESPONSE_CONTENT_TYPE ) return Response( response_content, status=200, headers={ b'Server-Authorization': receiver.response_header, b'content-type': RESPONSE_CONTENT_TYPE } )
def ping(self): self.ping_time = Date.now() self.synch.write(convert.value2json({ "action": "ping", "next_key": self.next_key, "source_key": self.source_key, "timestamp": self.ping_time.milli }))
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if b"json" in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"])) elif b'data': kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"])) else: Log.error("Expecting `json` parameter") response = post(url, **kwargs) c = response.content try: details = convert.json2value(convert.utf82unicode(c)) except Exception, e: Log.error("Unexpected return value {{content}}", content=c, cause=e)
def handle(self, record): o = convert.value2json(DataObject(record)) if record: pass if "this is a problem" not in e: Log.error("We expect Python to, at least, report the first order problem") if "this is the root cause" in e: Log.error("We do not expect Python to report exception chains")
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, kwargs=None ): if not alias: alias = kwargs.alias = kwargs.index index = kwargs.index = proto_name(alias, create_timestamp) if kwargs.alias == index: Log.error("Expecting index name to conform to pattern") if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = mo_json.json2value(schema, leaves=True) else: schema = mo_json.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.put( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) es = Index(kwargs=kwargs) return es
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, kwargs=None ): if not alias: alias = kwargs.alias = kwargs.index index = kwargs.index = proto_name(alias, create_timestamp) if kwargs.alias == index: Log.error("Expecting index name to conform to pattern") if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = mo_json.json2value(schema, leaves=True) else: schema = mo_json.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) es = Index(kwargs=kwargs) return es
def create_index(self, index, alias=None, schema=None, limit_replicas=None, settings=None): if not settings.alias: settings.alias = settings.index settings.index = proto_name(settings.alias) if settings.alias == settings.index: Log.error("Expecting index name to conform to pattern") if settings.schema_file: Log.error( 'schema_file attribute not supported. Use {"$ref":<filename>} instead' ) if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = convert.json2value(schema, paths=True) else: schema = convert.json2value(convert.value2json(schema), paths=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning( "Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to=health.number_of_nodes - 1) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self._post("/" + settings.index, data=convert.value2json(schema).encode("utf8"), headers={"Content-Type": "application/json"}) while True: time.sleep(1) try: self.head("/" + settings.index) break except Exception, _: Log.note("{{index}} does not exist yet", index=settings.index)
def test_timing(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "count", "value": "run.duration", "aggregate": "count" }, { "name": "total", "value": "run.duration", "aggregate": "sum" }], "edges": [{ "name": "chunk", "value": ["run.suite", "run.chunk"] }, "result.ok"], "where": { "and": [{ "lt": { "timestamp": Date.floor(Date.now()).milli / 1000 } }, { "gte": { "timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000 } }] }, "format": "cube", "samples": { "limit": 30 } } }) query = unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"])) response = post(url, **kwargs) c = response.all_content return convert.json2value(convert.utf82unicode(c))
def test_missing_auth(self): # MAKE SOME DATA data = { "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } response = requests.post(settings.bad_url, data=convert.unicode2utf8(convert.value2json(data))) self.assertEqual(response.status_code, 403)
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"])) response = post(url, **kwargs) c=response.all_content return convert.json2value(convert.utf82unicode(c))
def _shorten(value, source): value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True if source.name.startswith("active-data-test-result"): value.repo.changeset.files=None shorter_length = len(convert.value2json(value)) if shorter_length > MAX_RECORD_LENGTH: Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length)
def ping(self): self.ping_time = Date.now() self.synch.write( convert.value2json({ "action": "ping", "next_key": self.next_key, "source_key": self.source_key, "timestamp": self.ping_time.milli }))
def log_loop(settings, synch, queue, bucket, please_stop): with aws.Queue(settings.work_queue) as work_queue: for i, g in qb.groupby(queue, size=settings.param.size): Log.note( "Preparing {{num}} pulse messages to bucket={{bucket}}", num=len(g), bucket=bucket.name ) full_key = unicode(synch.next_key) + ":" + unicode(MIN(g.select("_meta.count"))) try: output = [ set_default( d, {"etl": { "name": "Pulse block", "bucket": settings.destination.bucket, "timestamp": Date.now().unix, "id": synch.next_key, "source": { "name": "pulse.mozilla.org", "id": d._meta.count, "count": d._meta.count, "message_id": d._meta.message_id, "sent": Date(d._meta.sent), }, "type": "aggregation" }} ) for i, d in enumerate(g) if d != None # HAPPENS WHEN PERSISTENT QUEUE FAILS TO LOG start ] bucket.write(full_key, "\n".join(convert.value2json(d) for d in output)) synch.advance() synch.source_key = MAX(g.select("_meta.count")) + 1 now = Date.now() work_queue.add({ "bucket": bucket.name, "key": full_key, "timestamp": now.unix, "date/time": now.format() }) synch.ping() queue.commit() Log.note("Wrote {{num}} pulse messages to bucket={{bucket}}, key={{key}} ", num= len(g), bucket= bucket.name, key= full_key) except Exception, e: queue.rollback() if not queue.closed: Log.warning("Problem writing {{key}} to S3", key=full_key, cause=e) if please_stop: break
def shutdown(self): self.pinger_thread.stop() self.pinger_thread.join() self.synch.write(convert.value2json({ "action": "shutdown", "next_key": self.next_key, "source_key": self.source_key, "timestamp": Date.now().milli }))
def test_select_w_dot(self): data = [{ "sustained_result": { "diff": 2.2356859541328733, "confidence": 0.85313030049257099 }, "point_result": { "diff": -1.3195117613213274, "confidence": 0.15889902861667249 } }] result = jx.select(data, "point_result.confidence") assert result[0] == 0.15889902861667249, "problem pulling deep values" result = jx.select(data, ["point_result.confidence", "sustained_result.confidence"]) expected = {"point_result": {"confidence": 0.15889902861667249}, "sustained_result": {"confidence": 0.85313030049257099}} assert convert.value2json(result[0]) == convert.value2json(expected)
class Json2Redshift(object): @use_settings def __init__( self, host, user, password, table, meta, # REDSHIFT COPY COMMAND REQUIRES A BUCKET TO HOLD PARAMETERS database=None, port=5439, settings=None): self.settings = settings self.db = Redshift(settings) INDEX_CACHE[settings.table] = wrap( {"name": settings.table}) # HACK TO GET parse_columns TO WORK columns = parse_columns(settings.table, settings.mapping.test_result.properties) nested = [c.name for c in columns if c.type == "nested"] self.columns = wrap([ c for c in columns if c.type not in ["object"] and not any( c.name.startswith(n + ".") for n in nested) ]) try: self.db.execute(""" CREATE TABLE {{table_name}} ( "_id" character varying UNIQUE, {{columns}} )""", { "table_name": self.db.quote_column(settings.table), "columns": SQL(",\n".join( self.db.quote_column(c.name) + " " + self.db.es_type2pg_type(c.type) for c in self.columns)) }, retry=False) except Exception, e: if "already exists" in e: Log.alert("Table {{table}} exists in Redshift", table=settings.table) else: Log.error("Could not make table", e) # MAKE jsonpaths FOR COPY COMMAND jsonpaths = { "jsonpaths": [ "$" + "".join("[" + convert.string2quote(p) + "]" for p in split_field(c.name)) for c in self.columns ] } content = convert.value2json(jsonpaths) content = content.replace("\\\"", "'") # PUSH TO S3 s3.Bucket(meta).write(meta.jsonspath, content)
def full_etl(settings): schema = convert.json2value(convert.value2json(SCHEMA), leaves=True) Cluster(settings.destination).get_or_create_index(settings=settings.destination, schema=schema, limit_replicas=True) destq = FromES(settings.destination) if settings.incremental: min_bug_id = destq.query({ "from": coalesce(settings.destination.alias, settings.destination.index), "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"} }) min_bug_id = int(MAX(min_bug_id-1000, 0)) else: min_bug_id = 0 sourceq = FromES(settings.source) max_bug_id = sourceq.query({ "from": coalesce(settings.source.alias, settings.source.index), "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"} }) + 1 max_bug_id = int(coalesce(max_bug_id, 0)) # FIRST, GET ALL MISSING BUGS for s, e in qb.reverse(list(qb.intervals(min_bug_id, max_bug_id, 10000))): with Timer("pull {{start}}..{{end}} from ES", {"start": s, "end": e}): children = sourceq.query({ "from": settings.source.alias, "select": ["bug_id", "dependson", "blocked", "modified_ts", "expires_on"], "where": {"and": [ {"range": {"bug_id": {"gte": s, "lt": e}}}, {"or": [ {"exists": "dependson"}, {"exists": "blocked"} ]} ]}, "limit": 10000 }) with Timer("fixpoint work"): to_fix_point(settings, destq, children.data) # PROCESS RECENT CHANGES with Timer("pull recent dependancies from ES"): children = sourceq.query({ "from": settings.source.alias, "select": ["bug_id", "dependson", "blocked"], "where": {"and": [ {"range": {"modified_ts": {"gte": convert.datetime2milli(datetime.utcnow() - timedelta(days=7))}}}, {"or": [ {"exists": "dependson"}, {"exists": "blocked"} ]} ]}, "limit": 100000 }) to_fix_point(settings, destq, children.data)
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_schema() # GET IDS OF DOCUMENTS results = self._es.search( { "fields": listwrap(schema._routing.path), "query": { "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()} }, "size": 200000, } ) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_keyword(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append( { "update": { "_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]), } } ) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8") response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"consistency": self.settings.consistency}, ) if response.errors: Log.error( "could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)], )
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if b"json" in kwargs: kwargs[b"data"] = convert.unicode2utf8( convert.value2json(kwargs[b"json"])) elif b'data' in kwargs: kwargs[b"data"] = convert.unicode2utf8( convert.value2json(kwargs[b"data"])) else: Log.error("Expecting `json` parameter") response = post(url, **kwargs) c = response.content try: details = mo_json.json2value(convert.utf82unicode(c)) except Exception, e: Log.error("Unexpected return value {{content}}", content=c, cause=e)
def quote_value(self, value): if value == None: return SQL("NULL") if isinstance(value, list): json = convert.value2json(value) return self.quote_value(json) if isinstance(value, basestring) and len(value) > 256: value = value[:256] return SQL(adapt(value))
def quote_value(self, value): if value ==None: return SQL("NULL") if isinstance(value, list): json = convert.value2json(value) return self.quote_value(json) if isinstance(value, basestring) and len(value) > 256: value = value[:256] return SQL(adapt(value))
def shutdown(self): self.pinger_thread.stop() self.pinger_thread.join() self.synch.write( convert.value2json({ "action": "shutdown", "next_key": self.next_key, "source_key": self.source_key, "timestamp": Date.now().milli }))
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), paths=True), limit_replicas=True, settings=settings ) self.queue = self.es.threaded_queue(max_size=max_size, batch_size=batch_size)
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if isinstance(value, Mapping): if depth == 0: return strings.limit(convert.value2json(value), LOG_STRING_LENGTH) return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()} elif isinstance(value, list): return strings.limit(convert.value2json(value), LOG_STRING_LENGTH) elif isinstance(value, (float, int, long)): return value elif isinstance(value, basestring): return strings.limit(value, LOG_STRING_LENGTH) else: return strings.limit(convert.value2json(value), LOG_STRING_LENGTH)
def extend(self, records): """ JUST SO WE MODEL A Queue """ records = {v["id"]: v["value"] for v in records} unwrap(self.data).update(records) data_as_json = convert.value2json(self.data, pretty=True) File(self.filename).write(data_as_json) Log.note("{{num}} documents added", num= len(records))
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if b"json" in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"])) elif b'data' in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"])) else: Log.error("Expecting `json` parameter") response = post(url, **kwargs) c = response.content try: details = mo_json.json2value(convert.utf82unicode(c)) except Exception as e: Log.error("Unexpected return value {{content}}", content=c, cause=e) if response.status_code not in [200, 201]: Log.error("Bad response", cause=Except.wrap(details)) return details
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, settings=settings ) self.batch_size=batch_size self.es.add_alias("debug") self.queue = Queue("debug logs to es", max=max_size, silent=True) Thread.run("add debug logs to es", self._insert_loop)