def monitor(self, please_stop): please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: try: if not self.todo: with self.columns.locker: old_columns = filter( lambda c: (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in ["object", "nested"], self.columns ) if old_columns: self.todo.extend(old_columns) # TEST CONSISTENCY for c, d in product(list(self.todo.queue), list(self.todo.queue)): if c.abs_name==d.abs_name and c.table==d.table and c!=d: Log.error("") else: Log.note("no more metatdata to update") column = self.todo.pop(timeout=10*MINUTE) if column: if column.type in ["object", "nested"]: continue elif column.last_updated >= Date.now()-TOO_OLD: continue try: self._update_cardinality(column) Log.note("updated {{column.name}}", column=column) except Exception, e: Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) except Exception, e: Log.warning("problem in cardinality monitor", cause=e)
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "time" self.NULL = Null self.min = Date(self.min) self.max = Date(self.max) self.interval = Duration(self.interval) if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") Log.error("not implemented yet") # VERIFY PARTITIONS DO NOT OVERLAP return elif not all([self.min, self.max, self.interval]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([{ "min": v, "max": v + self.interval, "dataIndex": i } for i, v in enumerate(Date.range(self.min, self.max, self.interval)) ])
def metas(self, prefix=None, limit=None, delimiter=None): """ RETURN THE METADATA DESCRIPTORS FOR EACH KEY """ keys = self.bucket.list(prefix=prefix, delimiter=delimiter) if limit: output = [] for i, k in enumerate(keys): output.append({ "key": strip_extension(k.key), "etag": convert.quote2string(k.etag), "expiry_date": Date(k.expiry_date), "last_modified": Date(k.last_modified) }) if i >= limit: break return wrap(output) output = [{ "key": strip_extension(k.key), "etag": convert.quote2string(k.etag), "expiry_date": Date(k.expiry_date), "last_modified": Date(k.last_modified) } for k in keys] return wrap(output)
def __init__(self, host, index, type="query", max_size=10, batch_size=10, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ es = Cluster(settings).get_or_create_index(schema=convert.json2value( convert.value2json(SCHEMA), leaves=True), limit_replicas=True, settings=settings) #ENSURE THE TYPE EXISTS FOR PROBING try: es.add({ "id": "dummy", "value": { "hash": "dummy", "create_time": Date.now(), "last_used": Date.now(), "query": {} } }) except Exception, e: Log.warning("Problem saving query", cause=e)
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: c = self.todo.pop() if c == Thread.STOP: break if not c.last_updated or c.last_updated >= Date.now() - TOO_OLD: continue with self.meta.columns.locker: self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "partitions", ], "where": { "eq": { "es_index": c.es_index, "es_column": c.es_column } } }) Log.note("Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
def test_big_result_works(self): result = http.post_json(global_settings.service_url, data={ "from": "unittest", "where": { "and": [{ "gte": { "run.timestamp": Date.today() - DAY } }, { "lt": { "run.timestamp": Date.today() } }, { "eq": { "result.ok": False } }] }, "format": "list", "limit": 10000 }) if result.template: result = Except.new_instance(result) Log.error("problem with call", cause=result) Log.note("Got {{num}} test failures", num=len(result.data))
def get_columns(self, table_name, column_name=None, force=False): """ RETURN METADATA COLUMNS """ try: # LAST TIME WE GOT INFO FOR THIS TABLE short_name = join_field(split_field(table_name)[0:1]) table = self.get_table(short_name)[0] if not table: table = Table( name=short_name, url=None, query_path=None, timestamp=Date.now() ) with self.meta.tables.locker: self.meta.tables.add(table) self._get_columns(table=short_name) elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE: table.timestamp = Date.now() self._get_columns(table=short_name) with self.meta.columns.locker: columns = self.meta.columns.find(table_name, column_name) if columns: columns = jx.sort(columns, "name") # AT LEAST WAIT FOR THE COLUMNS TO UPDATE while len(self.todo) and not all(columns.get("last_updated")): Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated]) Thread.sleep(seconds=1) return columns except Exception, e: Log.error("Not expected", cause=e)
def _worker(self, please_stop): curr = "0.0" acc = [] last_count_written = -1 next_write = Date.now() while not please_stop: d = self.temp_queue.pop(timeout=MINUTE) if d == None: if not acc: continue # WRITE THE INCOMPLETE DATA TO S3, BUT NOT TOO OFTEN next_write = Date.now() + MINUTE try: if last_count_written != len(acc): if DEBUG: Log.note("write incomplete data ({{num}} lines) to {{uid}} in S3 next (time = {{next_write}})", uid=curr, next_write=next_write, num=len(acc)) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = len(acc) except Exception, e: Log.note("Problem with write to S3", cause=e) elif d[UID_PATH] != curr: # WRITE acc TO S3 IF WE ARE MOVING TO A NEW KEY try: if acc: if DEBUG: Log.note("write complete data ({{num}} lines) to {{curr}} in S3", num=len(acc), curr=curr) self.bucket.write_lines(curr, (convert.value2json(a) for a in acc)) last_count_written = 0 curr = d[UID_PATH] acc = [d] except Exception, e: Log.warning("Can not store data", cause=e) Thread.sleep(30*MINUTE)
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = convert.json2value(row.json), None timestamp = Date(self.rollover_field(wrap(row).value)) if timestamp == None or timestamp < Date.today() - self.rollover_max: return Null rounded_timestamp = timestamp.floor(self.rollover_interval) queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": self.cluster.get_aliases(), "where": { "regex": { "index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d" } }, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) else: try: es = self.cluster.create_index( create_timestamp=rounded_timestamp, settings=self.settings) es.add_alias(self.settings.index) except Exception, e: if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 10, timeout=5) self._delete_old_indexes(candidates) queue = self.known_queues[ rounded_timestamp.unix] = es.threaded_queue( max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
def test_timing(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "count", "value": "run.duration", "aggregate": "count" }, { "name": "total", "value": "run.duration", "aggregate": "sum" }], "edges": [{ "name": "chunk", "value": ["run.suite", "run.chunk"] }, "result.ok"], "where": { "and": [{ "lt": { "timestamp": Date.floor(Date.now()).milli / 1000 } }, { "gte": { "timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000 } }] }, "format": "cube", "samples": { "limit": 30 } } }) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def log_loop(settings, synch, queue, bucket, please_stop): with aws.Queue(settings.work_queue) as work_queue: for i, g in qb.groupby(queue, size=settings.param.size): Log.note( "Preparing {{num}} pulse messages to bucket={{bucket}}", num=len(g), bucket=bucket.name ) full_key = unicode(synch.next_key) + ":" + unicode(MIN(g.select("_meta.count"))) try: output = [ set_default( d, {"etl": { "name": "Pulse block", "bucket": settings.destination.bucket, "timestamp": Date.now().unix, "id": synch.next_key, "source": { "name": "pulse.mozilla.org", "id": d._meta.count, "count": d._meta.count, "message_id": d._meta.message_id, "sent": Date(d._meta.sent), }, "type": "aggregation" }} ) for i, d in enumerate(g) if d != None # HAPPENS WHEN PERSISTENT QUEUE FAILS TO LOG start ] bucket.write(full_key, "\n".join(convert.value2json(d) for d in output)) synch.advance() synch.source_key = MAX(g.select("_meta.count")) + 1 now = Date.now() work_queue.add({ "bucket": bucket.name, "key": full_key, "timestamp": now.unix, "date/time": now.format() }) synch.ping() queue.commit() Log.note("Wrote {{num}} pulse messages to bucket={{bucket}}, key={{key}} ", num= len(g), bucket= bucket.name, key= full_key) except Exception, e: queue.rollback() if not queue.closed: Log.warning("Problem writing {{key}} to S3", key=full_key, cause=e) if please_stop: break
def test_timeout(self): def test(please_stop): Till(seconds=10).wait() now = Date.now() thread = Thread.run("sleeper", test) Till(seconds=0.5).wait() thread.stop() self.assertGreater(now.unix+1, Date.now().unix, "Expecting quick stop") Log.note("done")
def save(self, query): query.meta = None json = convert.value2json(query) hash = convert.unicode2utf8(json) #TRY MANY HASHES AT ONCE hashes = [None] * HASH_BLOCK_SIZE for i in range(HASH_BLOCK_SIZE): hash = hashlib.sha1(hash).digest() hashes[i] = hash short_hashes = [ convert.bytes2base64(h[0:6]).replace("/", "_") for h in hashes ] available = {h: True for h in short_hashes} existing = self.es.query({ "from": { "type": "elasticsearch", "settings": self.es.settings }, "where": { "terms": { "hash": short_hashes } }, "meta": { "timeout": "2second" } }) for e in Cube(select=existing.select, edges=existing.edges, data=existing.data).values(): if e.query == json: return e.hash available[e.hash] = False # THIS WILL THROW AN ERROR IF THERE ARE NONE, HOW UNLUCKY! best = [h for h in short_hashes if available[h]][0] self.queue.add({ "id": best, "value": { "hash": best, "create_time": Date.now(), "last_used": Date.now(), "query": json } }) Log.note("Saved query as {{hash}}", hash=best) return best
def trigger_job(self): while self.please_stop: now = Date.now() next = now + DAY for j in self.jobs: if j.next_run_time < now: j.next_run_time = next_run(j) self.run_job(j) next = Date.min(next, j.next_run_time) Thread.sleep(till=next, please_stop=self.please_stop)
def next_run(job): if job.settings.start_next: formula = next_run(job.settings.start_next) elif job.settings.start_interval: formula = "now|" + job.settings.start_interval + "+" + job.settings.start_interval else: Log.error("Expecting `start_next` or `start_interval` for job {{job}}", job=job.name) now = Date.now() next = Date(formula) if next < now: Log.error("{{formula|quote}} does not calculate a future date") return next
def _send_email(self): try: if self.accumulation: with Emailer(self.settings) as emailer: emailer.send_email(from_address=self.settings.from_address, to_address=self.settings.to_address, subject=self.settings.subject, text_data="\n\n".join( self.accumulation)) self.next_send = Date.now() + self.settings.max_interval self.accumulation = [] except Exception, e: self.next_send = Date.now() + self.settings.max_interval Log.warning("Could not send", e)
def startup(self): try: json = self.synch.read() if not json: Log.note("{{synchro_key}} does not exist. Starting.", synchro_key=SYNCHRONIZATION_KEY) return last_run = convert.json2value(json) self.next_key = last_run.next_key self.source_key = last_run.source_key if last_run.action == "shutdown": Log.note( "{{synchro_key}} shutdown detected. Starting at {{num}}", synchro_key=SYNCHRONIZATION_KEY, num=self.next_key) else: resume_time = Date(last_run.timestamp) + WAIT_FOR_ACTIVITY Log.note( "Shutdown not detected, waiting until {{time}} to see if existing pulse_logger is running...", time=resume_time) while resume_time > Date.now(): Thread.sleep(seconds=10) json = self.synch.read() if json == None: Log.note( "{{synchro_key}} disappeared! Starting over.", synchro_key=SYNCHRONIZATION_KEY) self._start() self.pinger_thread = Thread.run( "synch pinger", self._pinger) return self.next_key = last_run.next_key self.source_key = last_run.source_key if last_run.action == "shutdown": Log.note("Shutdown detected! Resuming...") self._start() self.pinger_thread = Thread.run( "synch pinger", self._pinger) return if last_run.timestamp > self.ping_time: Log.error( "Another instance of pulse_logger is running!") Log.note("No activity, still waiting...") Log.note("No activity detected! Resuming...") except Exception, e: Log.error("Can not start", e)
def test_time_variables(self): test = { "metadata": {}, "data": test_data_1, "query": { "from": TEST_TABLE, "edges": [ { "value": "t", "domain": { "type": "time", "min": "today-week", "max": "today", "interval": "day" } } ], "select": { "value": "v", "aggregate": "sum" } }, "expecting_list": { "meta": {"format": "list"}, "data": [r for r in expected_list_1] }, "expecting_table": { "meta": {"format": "table"}, "header": ["t", "v"], "data": [[r.t, r.v] for r in expected_list_1] }, "expecting_cube": { "meta": {"format": "cube"}, "edges": [ { "name": "t", "domain": { "type": "time", "key": "min", "min": Date("today-week").unix, "max": TODAY.unix, "interval": DAY.seconds, "partitions": [{"min": r.t, "max": (Date(r.t) + DAY).unix} for r in expected_list_1 if r.t != None] } } ], "data": {"v": [r.v for r in expected_list_1]} } } self.utils.execute_es_tests(test)
def _send_email(self): try: if self.accumulation: with Emailer(self.settings) as emailer: emailer.send_email( from_address=self.settings.from_address, to_address=self.settings.to_address, subject=self.settings.subject, text_data="\n\n".join(self.accumulation) ) self.next_send = Date.now() + self.settings.max_interval self.accumulation = [] except Exception, e: self.next_send = Date.now() + self.settings.max_interval Log.warning("Could not send", e)
def record_request(request, query_, data, error): try: if request_log_queue == None: return if data and len(data) > 10000: data = data[:10000] log = wrap({ "timestamp": Date.now(), "http_user_agent": request.headers.get("user_agent"), "http_accept_encoding": request.headers.get("accept_encoding"), "path": request.headers.environ["werkzeug.request"].full_path, "content_length": request.headers.get("content_length"), "remote_addr": request.remote_addr, "query": query_, "data": data, "error": error }) log["from"] = request.headers.get("from") request_log_queue.add({"value": log}) except Exception, e: Log.warning("Can not record", cause=e)
def __init__(self, host, index, alias=None, name=None, port=9200, settings=None): global _elasticsearch if hasattr(self, "settings"): return from pyLibrary.queries.containers.list_usingPythonList import ListContainer from pyLibrary.env import elasticsearch as _elasticsearch self.settings = settings self.default_name = coalesce(name, alias, index) self.default_es = _elasticsearch.Cluster(settings=settings) self.todo = Queue("refresh metadata", max=100000, unique=True) self.es_metadata = Null self.last_es_metadata = Date.now()-OLD_METADATA self.meta=Dict() table_columns = metadata_tables() column_columns = metadata_columns() self.meta.tables = ListContainer("meta.tables", [], wrap({c.name: c for c in table_columns})) self.meta.columns = ColumnList() self.meta.columns.insert(column_columns) self.meta.columns.insert(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def __init__( self, from_address, to_address, subject, host, username, password, port=465, use_ssl=1, log_type="email", settings=None ): """ SEND WARNINGS AND ERRORS VIA EMAIL settings = { "log_type":"email", "from_address": "*****@*****.**", "to_address": "*****@*****.**", "subject": "Problem in Pulse Logger", "host": "mail.mozilla.com", "port": 465, "username": "******", "password": "******", "use_ssl": 1 } """ assert settings.log_type == "email", "Expecing settings to be of type 'email'" self.settings = settings self.accumulation = [] self.last_sent = Date.now()-Duration.YEAR self.locker = Lock()
def __init__(self, from_address, to_address, subject, host, username, password, port=465, use_ssl=1, log_type="email", max_interval=HOUR, settings=None): """ SEND WARNINGS AND ERRORS VIA EMAIL settings = { "log_type":"email", "from_address": "*****@*****.**", "to_address": "*****@*****.**", "subject": "Problem in Pulse Logger", "host": "mail.mozilla.com", "port": 465, "username": "******", "password": "******", "use_ssl": 1 } """ assert settings.log_type == "email", "Expecing settings to be of type 'email'" self.settings = settings self.accumulation = [] self.next_send = Date.now() + MINUTE self.locker = Lock() self.settings.max_interval = Duration(settings.max_interval)
def loop(self, please_stop): with self.work_queue: while not please_stop: if self.settings.wait_forever: todo = None while not please_stop and not todo: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop(wait=EXTRA_WAIT_TIME) else: todo = self.work_queue.pop() else: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop() else: todo = self.work_queue.pop(till=Date.now()) if todo == None: please_stop.go() return try: is_ok = self._dispatch_work(todo) if is_ok: self.work_queue.commit() else: self.work_queue.rollback() except Exception, e: self.work_queue.rollback() Log.warning( "could not processs {{key}}. Returned back to work queue.", key=todo.key, cause=e)
def loop(self, please_stop): with self.work_queue: while not please_stop: if self.settings.wait_forever: todo = None while not please_stop and not todo: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop(wait=EXTRA_WAIT_TIME) else: todo = self.work_queue.pop() else: if isinstance(self.work_queue, aws.Queue): todo = self.work_queue.pop() else: todo = self.work_queue.pop(till=Date.now()) if todo == None: please_stop.go() return try: is_ok = self._dispatch_work(todo) if is_ok: self.work_queue.commit() else: self.work_queue.rollback() except Exception, e: self.work_queue.rollback() Log.warning("could not processs {{key}}. Returned back to work queue.", key=todo.key, cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) some_failures = http.post_json("http://activedata.allizom.org/query", data={ "from": "unittest", "select": [ {"name": "branch", "value": "build.branch"}, {"name": "revision", "value": "build.revision12"}, {"name": "suite", "value": "run.suite"}, {"name": "chunk", "value": "run.chunk"}, {"name": "test", "value": "result.test"} ], "where": {"and": [ {"eq": {"result.ok": False}}, {"gt": {"run.timestamp": Date.today() - WEEK}}, {"missing": "treeherder.job.note"} ]}, "format": "list", "limit": 10 }) th = TreeHerder(settings={}) # th.get_job_classification("mozilla-inbound", "7380457b8ba0") for f in some_failures.data: th.get_job_classification(f.branch, f.revision) except Exception, e: Log.error("Problem with etl", e)
def _upsert_column(self, c): # ASSUMING THE self.meta.columns.locker IS HAD existing_columns = [ r for r in self.meta.columns.data if r.table == c.table and r.name == c.name ] if not existing_columns: self.meta.columns.add(c) Log.note("todo: {{table}}.{{column}}", table=c.table, column=c.es_column) self.todo.add(c) # MARK meta.columns AS DIRTY TOO cols = [ r for r in self.meta.columns.data if r.table == "meta.columns" ] for cc in cols: cc.partitions = cc.cardinality = None cc.last_updated = Date.now() self.todo.extend(cols) else: canonical = existing_columns[0] if canonical.relative and not c.relative: return # RELATIVE COLUMNS WILL SHADOW ABSOLUTE COLUMNS for key in Column.__slots__: canonical[key] = c[key] Log.note("todo: {{table}}.{{column}}", table=canonical.table, column=canonical.es_column) self.todo.add(canonical)
def write(self, template, params): with self.locker: if params.params.warning.template or params.params.warning.template: self.accumulation.append(expand_template(template, params)) if Date.now() > self.last_sent + WAIT_TO_SEND_MORE: self._send_email()
def setUpClass(self): # REMOVE OLD INDEXES cluster = elasticsearch.Cluster(global_settings.backend_es) aliases = cluster.get_aliases() for a in aliases: try: if a.index.startswith("testing_"): create_time = Date( a.index[-15:], "%Y%m%d_%H%M%S" ) # EXAMPLE testing_0ef53e45b320160118_180420 if create_time < Date.now() - 10 * MINUTE: cluster.delete_index(a.index) except Exception, e: Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception, _: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = { k: v for k, v in _cache.items() if v[0] == None or v[0] > now } setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get( args, (Null, Null, Null, Null))
def find_some_work(th): # th.get_markup("fx-team", "036f62007472", "B8kS5IJ5Rom8l-kcSIRIlA") # th.get_markup("mozilla-inbound", "971c1ee26cad", "fNuzNmZxS6m3i_p9jDh8iA") # GET SOME TASKS result = http.post_json(url="http://activedata.allizom.org/query", data={ "from": "task", "select": ["build.branch", "build.revision", "task.id"], "where": { "and": [{ "gt": { "task.run.start_time": (Date.today() - DAY).unix } }, { "exists": "build.revision" }, { "exists": "build.branch" }] }, "format": "list" }) # TRY TO GET THEM OUT OF OUR CACHE for r in result.data: Log.note("look for task {{task_id}}", task_id=r.task.id) th.get_markup(r.build.branch, r.build.revision, r.task.id)
def write(self, template, params): with self.locker: if params.context not in [NOTE, ALARM]: # DO NOT SEND THE BORING STUFF self.accumulation.append(expand_template(template, params)) if Date.now() > self.next_send: self._send_email()
def get_branches(hg, branches, use_cache=True, settings=None): if not settings.branches or not use_cache: found_branches = _get_branches_from_hg(hg) es = elasticsearch.Cluster(settings=branches).get_or_create_index(settings=branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches) es.flush() return found_branches # TRY ES try: es = elasticsearch.Cluster(settings=branches).get_index(settings=branches) query = { "query": {"match_all": {}}, "size": 20000 } docs = es.search(query).hits.hits._source # IF IT IS TOO OLD, THEN PULL FROM HG oldest = Date(Math.MAX(docs.etl.timestamp)) if Date.now() - oldest > OLD_BRANCH: return get_branches(use_cache=False, settings=settings) try: return UniqueIndex(["name", "locale"], data=docs, fail_on_dup=False) except Exception, e: Log.error("Bad branch in ES index", cause=e) except Exception, e: if "Can not find index " in e: return get_branches(use_cache=False, settings=settings) Log.error("problem getting branches", cause=e)
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "time" self.NULL = Null self.min = Date(self.min) self.max = Date(self.max) self.interval = Duration(self.interval) if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") Log.error("not implemented yet") # VERIFY PARTITIONS DO NOT OVERLAP return elif not all([self.min, self.max, self.interval]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([ {"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(Date.range(self.min, self.max, self.interval)) ])
def get_markup(self, branch, revision, task_id=None, buildername=None, timestamp=None): # TRY CACHE if not branch or not revision: Log.error("expecting branch and revision") if self.settings.use_cache: if task_id: _filter = {"term": {"task.id": task_id}} else: _filter = {"term": {"ref_data_name": buildername}} query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ _filter, {"term": {"repo.branch": branch}}, {"prefix": {"repo.revision": revision}}, {"or": [ {"range": {"etl.timestamp": {"gte": (Date.now() - HOUR).unix}}}, {"range": {"job.timing.last_modified": {"lt": (Date.now() - DAY).unix}}} ]} ]} }}, "size": 10000 } try: docs = self.cache.search(query, timeout=120).hits.hits except Exception, e: docs = None Log.warning("Bad ES call, fall back to TH", cause=e) if not docs: pass elif len(docs) == 1: if DEBUG: Log.note("Used ES cache to get TH details on {{value|quote}}", value=coalesce(task_id, buildername)) return docs[0]._source elif timestamp == None: Log.error("timestamp required to find best match") else: # MISSING docs._source.job.timing.end WHEN A PLACEHOLDER WAS ADDED # TODO: SHOULD DELETE OVERAPPING PLACEHOLDER RECORDS timestamp = Date(timestamp).unix best_index = jx.sort([(i, abs(coalesce(e, 0) - timestamp)) for i, e in enumerate(docs._source.job.timing.end)], 1)[0][0] return docs[best_index]._source
def write(self, template, params): with self.locker: if params.context not in [NOTE, ALARM]: # SEND ONLY THE NOT BORING STUFF self.accumulation.append(expand_template(template, params)) if Date.now() > self.next_send: self._send_email()
def ping(self): self.ping_time = Date.now() self.synch.write(convert.value2json({ "action": "ping", "next_key": self.next_key, "source_key": self.source_key, "timestamp": self.ping_time.milli }))
def monitor(self, please_stop): please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: try: if not self.todo: with self.meta.columns.locker: old_columns = [ c for c in self.meta.columns if (c.last_updated == None or c.last_updated < Date.now() - TOO_OLD) and c.type not in STRUCT ] if old_columns: Log.note("Old columns wth dates {{dates|json}}", dates=wrap(old_columns).last_updated) self.todo.extend(old_columns) # TEST CONSISTENCY for c, d in product(list(self.todo.queue), list(self.todo.queue)): if c.es_column == d.es_column and c.table == d.table and c != d: Log.error("") else: Log.note("no more metatdata to update") column = self.todo.pop(Till(timeout=10 * MINUTE)) if column: Log.note("update {{table}}.{{column}}", table=column.table, column=column.es_column) if column.type in STRUCT: with self.meta.columns.locker: column.last_updated = Date.now() continue elif column.last_updated >= Date.now() - TOO_OLD: continue try: self._update_cardinality(column) if DEBUG and not column.table.startswith( TEST_TABLE_PREFIX): Log.note("updated {{column.name}}", column=column) except Exception, e: Log.warning( "problem getting cardinality for {{column.name}}", column=column, cause=e) except Exception, e: Log.warning("problem in cardinality monitor", cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) some_failures = http.post_json("http://activedata.allizom.org/query", data={ "from": "unittest", "select": [{ "name": "branch", "value": "build.branch" }, { "name": "revision", "value": "build.revision12" }, { "name": "suite", "value": "run.suite" }, { "name": "chunk", "value": "run.chunk" }, { "name": "test", "value": "result.test" }], "where": { "and": [{ "eq": { "result.ok": False } }, { "gt": { "run.timestamp": Date.today() - WEEK } }, { "missing": "treeherder.job.note" }] }, "format": "list", "limit": 10 }) th = TreeHerder(settings={}) # th.get_job_classification("mozilla-inbound", "7380457b8ba0") for f in some_failures.data: th.get_job_classification(f.branch, f.revision) except Exception, e: Log.error("Problem with etl", e)
def _get_columns(self, table=None): # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE meta = self.es_metadata.indices[table] if not meta or self.last_es_metadata < Date.now() - OLD_METADATA: self.es_metadata = self.default_es.get_metadata(force=True) meta = self.es_metadata.indices[table] for _, properties in meta.mappings.items(): self._parse_properties(meta.index, properties, meta)
def ping(self): self.ping_time = Date.now() self.synch.write( convert.value2json({ "action": "ping", "next_key": self.next_key, "source_key": self.source_key, "timestamp": self.ping_time.milli }))
def shutdown(self): self.pinger_thread.stop() self.pinger_thread.join() self.synch.write(convert.value2json({ "action": "shutdown", "next_key": self.next_key, "source_key": self.source_key, "timestamp": Date.now().milli }))
def _delete_old_indexes(self, candidates): for c in candidates: timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S") if timestamp + self.rollover_interval < Date.today() - self.rollover_max: # Log.warning("Will delete {{index}}", index=c.index) try: self.cluster.delete_index(c.index) except Exception, e: Log.warning("could not delete index {{index}}", index=c.index, cause=e)
def add_column(c, query_path): c.last_updated = Date.now() c.table = join_field([c.es_index]+split_field(query_path[0])) with self.meta.columns.locker: self._upsert_column(c) for alias in meta.aliases: c = copy(c) c.table = join_field([alias]+split_field(query_path[0])) self._upsert_column(c)
def shutdown(self): self.pinger_thread.stop() self.pinger_thread.join() self.synch.write( convert.value2json({ "action": "shutdown", "next_key": self.next_key, "source_key": self.source_key, "timestamp": Date.now().milli }))
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = convert.json2value(row.json), None timestamp = Date(self.rollover_field(wrap(row).value)) if timestamp == None or timestamp < Date.today() - self.rollover_max: return Null rounded_timestamp = timestamp.floor(self.rollover_interval) queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": self.cluster.get_aliases(), "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}}, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) else: try: es = self.cluster.create_index(create_timestamp=rounded_timestamp, settings=self.settings) es.add_alias(self.settings.index) except Exception, e: if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 10, timeout=5) self._delete_old_indexes(candidates) queue = self.known_queues[rounded_timestamp.unix] = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
def _wait_for_stable(self, detect_function, timeout): """ WAIT FOR RESULTS OF detect_function TO BE STABLE """ if not isinstance(timeout, timedelta): Log.error("Expecting a timeout as a timedelta") detectTime = Date.now() oldValue = "probably never an initial value" newValue = detect_function() while True: now = Date.now() potentialValue = detect_function() if potentialValue != newValue: oldValue = newValue newValue = potentialValue detectTime = now if now - detectTime > timeout: return Thread.sleep(seconds=0.5)
def _rate_limited_get_json(self, *args, **kwargs): now = Date.now().unix with self.rate_locker: if self.request_times[self.request_pointer] >= now - 1: Log.note("Rate limiting") Thread.sleep(seconds=self.request_times[self.request_pointer] - now + 1) self.request_times[self.request_pointer] = now self.request_pointer += 1 self.request_pointer %= len(self.request_times) return http.get_json(*args, **kwargs)
def __init__(self, synch): """ synch HAS read() AND write() SO SEPARATE INSTANCES CAN DETERMINE IF OTHERS ARE ALIVE RAISE EXCEPTION IF SOME OTHER INSTANCE HAS BEEN DETECTED RETURN START OF COUNT (always >=1) """ self.synch = synch self.pinger_thread = None self.next_key = 1 self.ping_time = Date.now() self.source_key = 0
def add_column(c, query_path): c.last_updated = Date.now() c.table = join_field([c.es_index] + split_field(query_path[0])) with self.meta.columns.locker: self._upsert_column(c) for alias in meta.aliases: c = copy(c) c.table = join_field([alias] + split_field(query_path[0])) self._upsert_column(c)
def _pinger(self, please_stop): Log.note("pinger started") while not please_stop: Thread.sleep(till=self.ping_time + PING_PERIOD, please_stop=please_stop) if please_stop: #EXIT EARLY, OTHERWISE WE MAY OVERWRITE THE shutdown break if Date.now() < self.ping_time + PING_PERIOD: continue try: self.ping() except Exception, e: Log.warning("synchro.py could not ping", e)
def monitor(self, please_stop): please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: try: if not self.todo: with self.meta.columns.locker: old_columns = [ c for c in self.meta.columns if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in STRUCT ] if old_columns: Log.note("Old columns wth dates {{dates|json}}", dates=wrap(old_columns).last_updated) self.todo.extend(old_columns) # TEST CONSISTENCY for c, d in product(list(self.todo.queue), list(self.todo.queue)): if c.es_column == d.es_column and c.table == d.table and c != d: Log.error("") else: Log.note("no more metatdata to update") column = self.todo.pop(Till(timeout=10*MINUTE)) if column: Log.note("update {{table}}.{{column}}", table=column.table, column=column.es_column) if column.type in STRUCT: with self.meta.columns.locker: column.last_updated = Date.now() continue elif column.last_updated >= Date.now()-TOO_OLD: continue try: self._update_cardinality(column) if DEBUG and not column.table.startswith(TEST_TABLE_PREFIX): Log.note("updated {{column.name}}", column=column) except Exception, e: Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) except Exception, e: Log.warning("problem in cardinality monitor", cause=e)
def _send_email(self): try: if self.accumulation: conn = connect_to_region( self.settings.region, aws_access_key_id=unwrap(self.settings.aws_access_key_id), aws_secret_access_key=unwrap(self.settings.aws_secret_access_key) ) conn.send_email( source=self.settings.from_address, to_addresses=listwrap(self.settings.to_address), subject=self.settings.subject, body="\n\n".join(self.accumulation), format="text" ) conn.close() self.next_send = Date.now() + WAIT_TO_SEND_MORE self.accumulation = [] except Exception, e: self.next_send = Date.now() + WAIT_TO_SEND_MORE Log.warning("Could not send", e)
def add_to_queue(work_queue, redo, bucket_name): now = Date.now() for r in redo: k = literal_field(r) counter[k] += 1 if counter[k] > 3: Log.error("Problem backfilling {{key}}: Tried >=3 times, giving up", key= r) continue work_queue.add({ "bucket": bucket_name, "key": r, "timestamp": now.unix, "date/time": now.format() })
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: c = self.todo.pop() if c == Thread.STOP: break if not c.last_updated or c.last_updated >= Date.now()-TOO_OLD: continue with self.meta.columns.locker: self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear":[ "count", "cardinality", "partitions", ], "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}} }) Log.note("Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
def add_column(c, query_path): c.last_updated = Date.now() if query_path: c.table = c.es_index + "." + query_path.last() else: c.table = c.es_index with self.meta.columns.locker: self._upsert_column(c) for alias in meta.aliases: c = copy(c) if query_path: c.table = alias + "." + query_path.last() else: c.table = alias self._upsert_column(c)