class Process(object): def __init__(self, name, params, cwd=None, env=None, debug=False): self.name = name self.service_stopped = Signal("stopped signal for " + convert.string2quote(name)) self.stdin = Queue("stdin for process " + convert.string2quote(name), silent=True) self.stdout = Queue("stdout for process " + convert.string2quote(name), silent=True) self.stderr = Queue("stderr for process " + convert.string2quote(name), silent=True) try: self.debug=debug or DEBUG self.service = service = subprocess.Popen( params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1, cwd=cwd, env=env ) self.stopper = Signal() self.stopper.on_go(self._kill) self.thread_locker = Lock() self.children = [ Thread.run(self.name + " waiter", self._monitor, parent_thread=self), Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stdout", self._reader, service.stdout, self.stdout, please_stop=self.stopper, parent_thread=self), # Thread.run(self.name + " stderr", self._reader, service.stderr, self.stderr, please_stop=self.stopper, parent_thread=self), ] except Exception, e: Log.error("Can not call", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) queries.config.default = { "type": "elasticsearch", "settings": settings.elasticsearch.copy() } if settings.args.id: work_queue = Queue("local work queue") work_queue.extend(parse_id_argument(settings.args.id)) else: work_queue = aws.Queue(settings=settings.work_queue) Log.note("Listen to queue {{queue}}, and read off of {{s3}}", queue=settings.work_queue.name, s3=settings.source.bucket) es = MultiDayIndex(settings.elasticsearch, queue_size=100000) threads = [] please_stop = Signal() for _ in range(settings.threads): p = Thread.run("copy to es", copy2es, es, settings, work_queue, please_stop=please_stop) threads.append(p) def monitor_progress(please_stop): while not please_stop: Log.note("Remaining: {{num}}", num=len(work_queue)) Thread.sleep(seconds=10) Thread.run(name="monitor progress", target=monitor_progress, please_stop=please_stop) aws.capture_termination_signal(please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) please_stop.go() Log.note("Shutdown started") except Exception, e: Log.error("Problem with etl", e)
def query(self, command): """ WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED :param command: COMMAND FOR SQLITE :return: list OF RESULTS """ signal = Signal() result = Data() self.queue.add((command, result, signal, None)) signal.wait() if result.exception: Log.error("Problem with Sqlite call", cause=result.exception) return result
def query(self, command): """ WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED :param command: COMMAND FOR SQLITE :return: list OF RESULTS """ signal = Signal() result = Dict() self.queue.add((command, result, signal, None)) signal.wait_for_go() if result.exception: Log.error("Problem with Sqlite call", cause=result.exception) return result
class Process(object): def __init__(self, name, params, cwd=None, env=None, debug=False): self.name = name self.service_stopped = Signal("stopped signal for " + convert.string2quote(name)) self.stdin = Queue("stdin for process " + convert.string2quote(name), silent=True) self.stdout = Queue("stdout for process " + convert.string2quote(name), silent=True) self.stderr = Queue("stderr for process " + convert.string2quote(name), silent=True) try: self.debug = debug or DEBUG self.service = service = subprocess.Popen(params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1, cwd=cwd, env=env) self.stopper = Signal() self.stopper.on_go(self._kill) self.thread_locker = Lock() self.children = [ Thread.run(self.name + " waiter", self._monitor, parent_thread=self), Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stdout", self._reader, service.stdout, self.stdout, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stderr", self._reader, service.stderr, self.stderr, please_stop=self.stopper, parent_thread=self), ] except Exception, e: Log.error("Can not call", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL(name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def __init__(self, _file): """ file - USES FILE FOR PERSISTENCE """ self.file = File.new_instance(_file) self.lock = Lock("lock for persistent queue using file " + self.file.name) self.please_stop = Signal() self.db = Dict() self.pending = [] if self.file.exists: for line in self.file: try: delta = convert.json2value(line) apply_delta(self.db, delta) except: pass if self.db.status.start == None: # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH self.db.status.start = 0 self.start = self.db.status.start # SCRUB LOST VALUES lost = 0 for k in self.db.keys(): try: if k != "status" and int(k) < self.start: self.db[k] = None lost += 1 except Exception: pass # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO if lost: Log.warning("queue file had {{num}} items lost", num=lost) if DEBUG: Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self)) else: self.db.status = Dict(start=0, end=0) self.start = self.db.status.start if DEBUG: Log.note("New persistent queue {{name}}", name=self.file.abspath)
def main(): try: config = startup.read_settings() with startup.SingleInstance(flavor_id=config.args.filename): constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") coverage_index = elasticsearch.Cluster(config.source).get_index(settings=config.source) config.destination.schema = coverage_index.get_schema() coverage_summary_index = elasticsearch.Cluster(config.destination).get_or_create_index(read_only=False, settings=config.destination) coverage_summary_index.add_alias(config.destination.index) Thread.run( "processing loop", loop, config.source, coverage_summary_index, config, please_stop=please_stop ) Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with code coverage score calculation", cause=e)
def _get_job_results_from_th(self, branch, revision): output = [] with self.locker: waiting_threads = self.pending.get((branch, revision)) if waiting_threads is None: sig = None waiting_threads = self.pending[(branch, revision)] = [output] else: sig = Signal() waiting_threads.append(Signal()) if sig is not None: Log.note("Holding thread for {{branch}}/{{revision}}", branch=branch, revision=revision) sig.wait_for_go() return waiting_threads[0] try: results = DictList() while True: response = self._rate_limited_get_json( expand_template(RESULT_SET_URL, { "branch": branch, "revision": revision[0:12:] })) results.extend(response.results) if len(response.results) != 1000: break for g, repo_ids in jx.groupby(results.id, size=10): jobs = DictList() with Timer("Get {{num}} jobs", {"num": len(repo_ids)}, debug=DEBUG): while True: response = self._rate_limited_get_json( expand_template( JOBS_URL, { "branch": branch, "offset": len(jobs), "result_set_id": ",".join(map(unicode, repo_ids)) })) jobs.extend(response.results) if len(response.results) != 2000: break with Timer("Get (up to {{num}}) details from TH", {"num": len(jobs)}, debug=DEBUG): details = [] for _, ids in jx.groupby(jobs.id, size=40): details.extend( self._rate_limited_get_json(url=expand_template( DETAILS_URL, { "branch": branch, "job_id": ",".join(map(unicode, ids)) }), retry={ "times": 3 }).results) details = { k.job_guid: list(v) for k, v in jx.groupby(details, "job_guid") } with Timer("Get (up to {{num}}) stars from TH", {"num": len(jobs)}, debug=DEBUG): stars = [] for _, ids in jx.groupby(jobs.id, size=40): response = self._rate_limited_get_json( expand_template( JOB_BUG_MAP, { "branch": branch, "job_id": "&job_id=".join(map( unicode, ids)) })) stars.extend(response), stars = { k.job_id: list(v) for k, v in jx.groupby(stars, "job_id") } with Timer("Get notes from TH", debug=DEBUG): notes = [] for jid in set([ j.id for j in jobs if j.failure_classification_id != 1 ] + stars.keys()): response = self._rate_limited_get_json( expand_template(NOTES_URL, { "branch": branch, "job_id": unicode(jid) })) notes.extend(response), notes = { k.job_id: list(v) for k, v in jx.groupby(notes, "job_id") } for j in jobs: output.append( self._normalize_job_result(branch, revision, j, details, notes, stars)) if output: with Timer("Write to ES cache", debug=DEBUG): self.cache.extend( { "id": "-".join([c.repo.branch, unicode(c.job.id)]), "value": c } for c in output) try: self.cache.flush() except Exception, e: Log.warning("problem flushing. nevermind.", cause=e) finally: with self.locker: for p in waiting_threads[1:]: if DEBUG: Log.note( "releasing thread for {{branch}}/{{revision}}", branch=branch, revision=revision) p.go() self.pending[(branch, revision)] = None return output
def _get_job_results_from_th(self, branch, revision): output = [] with self.locker: waiting_threads = self.pending.get((branch, revision)) if waiting_threads is None: sig = None waiting_threads = self.pending[(branch, revision)] = [output] else: sig = Signal() waiting_threads.append(Signal()) if sig is not None: Log.note("Holding thread for {{branch}}/{{revision}}", branch=branch, revision=revision) sig.wait_for_go() return waiting_threads[0] try: results = DictList() while True: response = self._rate_limited_get_json(expand_template(RESULT_SET_URL, {"branch": branch, "revision": revision[0:12:]})) results.extend(response.results) if len(response.results) != 1000: break for g, repo_ids in jx.groupby(results.id, size=10): jobs = DictList() with Timer("Get {{num}} jobs", {"num": len(repo_ids)}, debug=DEBUG): while True: response = self._rate_limited_get_json(expand_template(JOBS_URL, {"branch": branch, "offset": len(jobs), "result_set_id": ",".join(map(unicode, repo_ids))})) jobs.extend(response.results) if len(response.results) != 2000: break with Timer("Get (up to {{num}}) details from TH", {"num": len(jobs)}, debug=DEBUG): details = [] for _, ids in jx.groupby(jobs.id, size=40): details.extend(self._rate_limited_get_json( url=expand_template(DETAILS_URL, {"branch": branch, "job_id": ",".join(map(unicode, ids))}), retry={"times": 3} ).results) details = {k.job_guid: list(v) for k, v in jx.groupby(details, "job_guid")} with Timer("Get (up to {{num}}) stars from TH", {"num": len(jobs)}, debug=DEBUG): stars = [] for _, ids in jx.groupby(jobs.id, size=40): response = self._rate_limited_get_json(expand_template(JOB_BUG_MAP, {"branch": branch, "job_id": "&job_id=".join(map(unicode, ids))})) stars.extend(response), stars = {k.job_id: list(v) for k, v in jx.groupby(stars, "job_id")} with Timer("Get notes from TH", debug=DEBUG): notes = [] for jid in set([j.id for j in jobs if j.failure_classification_id != 1] + stars.keys()): response = self._rate_limited_get_json(expand_template(NOTES_URL, {"branch": branch, "job_id": unicode(jid)})) notes.extend(response), notes = {k.job_id: list(v) for k, v in jx.groupby(notes, "job_id")} for j in jobs: output.append(self._normalize_job_result(branch, revision, j, details, notes, stars)) if output: with Timer("Write to ES cache", debug=DEBUG): self.cache.extend({"id": "-".join([c.repo.branch, unicode(c.job.id)]), "value": c} for c in output) try: self.cache.flush() except Exception, e: Log.warning("problem flushing. nevermind.", cause=e) finally: with self.locker: for p in waiting_threads[1:]: if DEBUG: Log.note("releasing thread for {{branch}}/{{revision}}", branch=branch, revision=revision) p.go() self.pending[(branch, revision)] = None return output
# Author: Kyle Lahnakoski ([email protected]) # ############################################################################### # Intended to test exit behaviour from timeout, SIGINT (CTRL-C), or "exit" ############################################################################### from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import from pyLibrary.debugs.logs import Log from pyLibrary.thread.threads import Thread, Signal from pyLibrary.thread.till import Till please_stop = Signal() def timeout(please_stop): (Till(seconds=20) | please_stop).wait() please_stop.go() Thread.run("timeout", target=timeout, please_stop=please_stop) Log.note("you must type 'exit', and press Enter, or wait 20seconds") Thread.wait_for_shutdown_signal(allow_exit=True, please_stop=please_stop) if not please_stop: Log.note("'exit' detected") else:
continue try: for i in parse_id_argument(settings.args.id): data = source.get_key(i) if data != None: already_in_queue.add(id(source)) queue.add(Dict(bucket=w.source.bucket, key=i)) except Exception, e: if "Key {{key}} does not exist" in e: already_in_queue.add(id(source)) queue.add(Dict(bucket=w.source.bucket, key=settings.args.id)) Log.warning("Problem", cause=e) resources = Dict(hg=HgMozillaOrg(settings=settings.hg)) stopper = Signal() ETL(name="ETL Loop Test", work_queue=queue, workers=settings.workers, settings=settings.param, resources=resources, please_stop=stopper) aws.capture_termination_signal(stopper) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) def parse_id_argument(id): if id.find("..") >= 0: #range of ids min_, max_ = map(int, map(strings.trim, id.split("..")))