Example #1
0
class Process(object):
    def __init__(self, name, params, cwd=None, env=None, debug=False):
        self.name = name
        self.service_stopped = Signal("stopped signal for " + convert.string2quote(name))
        self.stdin = Queue("stdin for process " + convert.string2quote(name), silent=True)
        self.stdout = Queue("stdout for process " + convert.string2quote(name), silent=True)
        self.stderr = Queue("stderr for process " + convert.string2quote(name), silent=True)

        try:
            self.debug=debug or DEBUG
            self.service = service = subprocess.Popen(
                params,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                bufsize=-1,
                cwd=cwd,
                env=env
            )

            self.stopper = Signal()
            self.stopper.on_go(self._kill)
            self.thread_locker = Lock()
            self.children = [
                Thread.run(self.name + " waiter", self._monitor, parent_thread=self),
                Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.stopper, parent_thread=self),
                Thread.run(self.name + " stdout", self._reader, service.stdout, self.stdout, please_stop=self.stopper, parent_thread=self),
                # Thread.run(self.name + " stderr", self._reader, service.stderr, self.stderr, please_stop=self.stopper, parent_thread=self),
            ]
        except Exception, e:
            Log.error("Can not call", e)
Example #2
0
def main():
    try:
        settings = startup.read_settings(defs=[{
            "name": ["--id"],
            "help": "id (prefix, really) to process",
            "type": str,
            "dest": "id",
            "required": False
        }])
        constants.set(settings.constants)
        Log.start(settings.debug)

        queries.config.default = {
            "type": "elasticsearch",
            "settings": settings.elasticsearch.copy()
        }

        if settings.args.id:
            work_queue = Queue("local work queue")
            work_queue.extend(parse_id_argument(settings.args.id))
        else:
            work_queue = aws.Queue(settings=settings.work_queue)

        Log.note("Listen to queue {{queue}}, and read off of {{s3}}",
                 queue=settings.work_queue.name,
                 s3=settings.source.bucket)

        es = MultiDayIndex(settings.elasticsearch, queue_size=100000)

        threads = []
        please_stop = Signal()
        for _ in range(settings.threads):
            p = Thread.run("copy to es",
                           copy2es,
                           es,
                           settings,
                           work_queue,
                           please_stop=please_stop)
            threads.append(p)

        def monitor_progress(please_stop):
            while not please_stop:
                Log.note("Remaining: {{num}}", num=len(work_queue))
                Thread.sleep(seconds=10)

        Thread.run(name="monitor progress",
                   target=monitor_progress,
                   please_stop=please_stop)

        aws.capture_termination_signal(please_stop)
        Thread.wait_for_shutdown_signal(please_stop=please_stop,
                                        allow_exit=True)
        please_stop.go()
        Log.note("Shutdown started")
    except Exception, e:
        Log.error("Problem with etl", e)
Example #3
0
 def query(self, command):
     """
     WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED
     :param command: COMMAND FOR SQLITE
     :return: list OF RESULTS
     """
     signal = Signal()
     result = Data()
     self.queue.add((command, result, signal, None))
     signal.wait()
     if result.exception:
         Log.error("Problem with Sqlite call", cause=result.exception)
     return result
Example #4
0
 def query(self, command):
     """
     WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED
     :param command: COMMAND FOR SQLITE
     :return: list OF RESULTS
     """
     signal = Signal()
     result = Dict()
     self.queue.add((command, result, signal, None))
     signal.wait_for_go()
     if result.exception:
         Log.error("Problem with Sqlite call", cause=result.exception)
     return result
Example #5
0
class Process(object):
    def __init__(self, name, params, cwd=None, env=None, debug=False):
        self.name = name
        self.service_stopped = Signal("stopped signal for " +
                                      convert.string2quote(name))
        self.stdin = Queue("stdin for process " + convert.string2quote(name),
                           silent=True)
        self.stdout = Queue("stdout for process " + convert.string2quote(name),
                            silent=True)
        self.stderr = Queue("stderr for process " + convert.string2quote(name),
                            silent=True)

        try:
            self.debug = debug or DEBUG
            self.service = service = subprocess.Popen(params,
                                                      stdin=subprocess.PIPE,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE,
                                                      bufsize=-1,
                                                      cwd=cwd,
                                                      env=env)

            self.stopper = Signal()
            self.stopper.on_go(self._kill)
            self.thread_locker = Lock()
            self.children = [
                Thread.run(self.name + " waiter",
                           self._monitor,
                           parent_thread=self),
                Thread.run(self.name + " stdin",
                           self._writer,
                           service.stdin,
                           self.stdin,
                           please_stop=self.stopper,
                           parent_thread=self),
                Thread.run(self.name + " stdout",
                           self._reader,
                           service.stdout,
                           self.stdout,
                           please_stop=self.stopper,
                           parent_thread=self),
                Thread.run(self.name + " stderr",
                           self._reader,
                           service.stderr,
                           self.stderr,
                           please_stop=self.stopper,
                           parent_thread=self),
            ]
        except Exception, e:
            Log.error("Can not call", e)
Example #6
0
def main():

    try:
        settings = startup.read_settings(defs=[{
            "name": ["--id"],
            "help": "id(s) to process.  Use \"..\" for a range.",
            "type": str,
            "dest": "id",
            "required": False
        }])
        constants.set(settings.constants)
        Log.start(settings.debug)

        if settings.args.id:
            etl_one(settings)
            return

        hg = HgMozillaOrg(settings=settings.hg)
        resources = Dict(hg=dictwrap(hg))
        stopper = Signal()
        for i in range(coalesce(settings.param.threads, 1)):
            ETL(name="ETL Loop " + unicode(i),
                work_queue=settings.work_queue,
                resources=resources,
                workers=settings.workers,
                settings=settings.param,
                please_stop=stopper)

        Thread.wait_for_shutdown_signal(stopper, allow_exit=True)
    except Exception, e:
        Log.error("Problem with etl", e)
Example #7
0
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)
        please_stop = Signal("main stop signal")
        Thread.wait_for_shutdown_signal(please_stop)
    except Exception, e:
        Log.error("Problem with etl", cause=e)
    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " +
                         self.file.name)
        self.please_stop = Signal()
        self.db = Dict()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                try:
                    delta = convert.json2value(line)
                    apply_delta(self.db, delta)
                except:
                    pass
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                try:
                    if k != "status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                except Exception:
                    pass  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost", num=lost)

            if DEBUG:
                Log.note("Persistent queue {{name}} found with {{num}} items",
                         name=self.file.abspath,
                         num=len(self))
        else:
            self.db.status = Dict(start=0, end=0)
            self.start = self.db.status.start
            if DEBUG:
                Log.note("New persistent queue {{name}}",
                         name=self.file.abspath)
Example #9
0
def main():
    try:
        config = startup.read_settings()
        with startup.SingleInstance(flavor_id=config.args.filename):
            constants.set(config.constants)
            Log.start(config.debug)

            please_stop = Signal("main stop signal")
            coverage_index = elasticsearch.Cluster(config.source).get_index(settings=config.source)
            config.destination.schema = coverage_index.get_schema()
            coverage_summary_index = elasticsearch.Cluster(config.destination).get_or_create_index(read_only=False, settings=config.destination)
            coverage_summary_index.add_alias(config.destination.index)
            Thread.run(
                "processing loop",
                loop,
                config.source,
                coverage_summary_index,
                config,
                please_stop=please_stop
            )
            Thread.wait_for_shutdown_signal(please_stop)
    except Exception, e:
        Log.error("Problem with code coverage score calculation", cause=e)
Example #10
0
    def _get_job_results_from_th(self, branch, revision):
        output = []

        with self.locker:
            waiting_threads = self.pending.get((branch, revision))
            if waiting_threads is None:
                sig = None
                waiting_threads = self.pending[(branch, revision)] = [output]
            else:
                sig = Signal()
                waiting_threads.append(Signal())

        if sig is not None:
            Log.note("Holding thread for {{branch}}/{{revision}}",
                     branch=branch,
                     revision=revision)
            sig.wait_for_go()
            return waiting_threads[0]

        try:
            results = DictList()
            while True:
                response = self._rate_limited_get_json(
                    expand_template(RESULT_SET_URL, {
                        "branch": branch,
                        "revision": revision[0:12:]
                    }))
                results.extend(response.results)
                if len(response.results) != 1000:
                    break

            for g, repo_ids in jx.groupby(results.id, size=10):
                jobs = DictList()
                with Timer("Get {{num}} jobs", {"num": len(repo_ids)},
                           debug=DEBUG):
                    while True:
                        response = self._rate_limited_get_json(
                            expand_template(
                                JOBS_URL, {
                                    "branch":
                                    branch,
                                    "offset":
                                    len(jobs),
                                    "result_set_id":
                                    ",".join(map(unicode, repo_ids))
                                }))
                        jobs.extend(response.results)
                        if len(response.results) != 2000:
                            break

                with Timer("Get (up to {{num}}) details from TH",
                           {"num": len(jobs)},
                           debug=DEBUG):
                    details = []
                    for _, ids in jx.groupby(jobs.id, size=40):
                        details.extend(
                            self._rate_limited_get_json(url=expand_template(
                                DETAILS_URL, {
                                    "branch": branch,
                                    "job_id": ",".join(map(unicode, ids))
                                }),
                                                        retry={
                                                            "times": 3
                                                        }).results)
                    details = {
                        k.job_guid: list(v)
                        for k, v in jx.groupby(details, "job_guid")
                    }

                with Timer("Get (up to {{num}}) stars from TH",
                           {"num": len(jobs)},
                           debug=DEBUG):
                    stars = []
                    for _, ids in jx.groupby(jobs.id, size=40):
                        response = self._rate_limited_get_json(
                            expand_template(
                                JOB_BUG_MAP, {
                                    "branch": branch,
                                    "job_id": "&job_id=".join(map(
                                        unicode, ids))
                                }))
                        stars.extend(response),
                    stars = {
                        k.job_id: list(v)
                        for k, v in jx.groupby(stars, "job_id")
                    }

                with Timer("Get notes from TH", debug=DEBUG):
                    notes = []
                    for jid in set([
                            j.id
                            for j in jobs if j.failure_classification_id != 1
                    ] + stars.keys()):
                        response = self._rate_limited_get_json(
                            expand_template(NOTES_URL, {
                                "branch": branch,
                                "job_id": unicode(jid)
                            }))
                        notes.extend(response),
                    notes = {
                        k.job_id: list(v)
                        for k, v in jx.groupby(notes, "job_id")
                    }

                for j in jobs:
                    output.append(
                        self._normalize_job_result(branch, revision, j,
                                                   details, notes, stars))

            if output:
                with Timer("Write to ES cache", debug=DEBUG):
                    self.cache.extend(
                        {
                            "id": "-".join([c.repo.branch,
                                            unicode(c.job.id)]),
                            "value": c
                        } for c in output)
                    try:
                        self.cache.flush()
                    except Exception, e:
                        Log.warning("problem flushing. nevermind.", cause=e)
        finally:
            with self.locker:
                for p in waiting_threads[1:]:
                    if DEBUG:
                        Log.note(
                            "releasing thread for {{branch}}/{{revision}}",
                            branch=branch,
                            revision=revision)
                    p.go()
                self.pending[(branch, revision)] = None

        return output
Example #11
0
    def _get_job_results_from_th(self, branch, revision):
        output = []

        with self.locker:
            waiting_threads = self.pending.get((branch, revision))
            if waiting_threads is None:
                sig = None
                waiting_threads = self.pending[(branch, revision)] = [output]
            else:
                sig = Signal()
                waiting_threads.append(Signal())

        if sig is not None:
            Log.note("Holding thread for {{branch}}/{{revision}}", branch=branch, revision=revision)
            sig.wait_for_go()
            return waiting_threads[0]

        try:
            results = DictList()
            while True:
                response = self._rate_limited_get_json(expand_template(RESULT_SET_URL, {"branch": branch, "revision": revision[0:12:]}))
                results.extend(response.results)
                if len(response.results) != 1000:
                    break

            for g, repo_ids in jx.groupby(results.id, size=10):
                jobs = DictList()
                with Timer("Get {{num}} jobs", {"num": len(repo_ids)}, debug=DEBUG):
                    while True:
                        response = self._rate_limited_get_json(expand_template(JOBS_URL, {"branch": branch, "offset": len(jobs), "result_set_id": ",".join(map(unicode, repo_ids))}))
                        jobs.extend(response.results)
                        if len(response.results) != 2000:
                            break

                with Timer("Get (up to {{num}}) details from TH", {"num": len(jobs)}, debug=DEBUG):
                    details = []
                    for _, ids in jx.groupby(jobs.id, size=40):
                        details.extend(self._rate_limited_get_json(
                            url=expand_template(DETAILS_URL, {"branch": branch, "job_id": ",".join(map(unicode, ids))}),
                            retry={"times": 3}
                        ).results)
                    details = {k.job_guid: list(v) for k, v in jx.groupby(details, "job_guid")}

                with Timer("Get (up to {{num}}) stars from TH", {"num": len(jobs)}, debug=DEBUG):
                    stars = []
                    for _, ids in jx.groupby(jobs.id, size=40):
                        response = self._rate_limited_get_json(expand_template(JOB_BUG_MAP, {"branch": branch, "job_id": "&job_id=".join(map(unicode, ids))}))
                        stars.extend(response),
                    stars = {k.job_id: list(v) for k, v in jx.groupby(stars, "job_id")}

                with Timer("Get notes from TH", debug=DEBUG):
                    notes = []
                    for jid in set([j.id for j in jobs if j.failure_classification_id != 1] + stars.keys()):
                        response = self._rate_limited_get_json(expand_template(NOTES_URL, {"branch": branch, "job_id": unicode(jid)}))
                        notes.extend(response),
                    notes = {k.job_id: list(v) for k, v in jx.groupby(notes, "job_id")}

                for j in jobs:
                    output.append(self._normalize_job_result(branch, revision, j, details, notes, stars))

            if output:
                with Timer("Write to ES cache", debug=DEBUG):
                    self.cache.extend({"id": "-".join([c.repo.branch, unicode(c.job.id)]), "value": c} for c in output)
                    try:
                        self.cache.flush()
                    except Exception, e:
                        Log.warning("problem flushing. nevermind.", cause=e)
        finally:
            with self.locker:
                for p in waiting_threads[1:]:
                    if DEBUG:
                        Log.note("releasing thread for {{branch}}/{{revision}}", branch=branch, revision=revision)
                    p.go()
                self.pending[(branch, revision)] = None

        return output
Example #12
0
# Author: Kyle Lahnakoski ([email protected])
#

###############################################################################
# Intended to test exit behaviour from timeout, SIGINT (CTRL-C), or "exit"
###############################################################################

from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from pyLibrary.debugs.logs import Log
from pyLibrary.thread.threads import Thread, Signal
from pyLibrary.thread.till import Till

please_stop = Signal()


def timeout(please_stop):
    (Till(seconds=20) | please_stop).wait()
    please_stop.go()


Thread.run("timeout", target=timeout, please_stop=please_stop)

Log.note("you must type 'exit', and press Enter, or wait 20seconds")
Thread.wait_for_shutdown_signal(allow_exit=True, please_stop=please_stop)

if not please_stop:
    Log.note("'exit' detected")
else:
Example #13
0
            continue
        try:
            for i in parse_id_argument(settings.args.id):
                data = source.get_key(i)
                if data != None:
                    already_in_queue.add(id(source))
                    queue.add(Dict(bucket=w.source.bucket, key=i))
        except Exception, e:
            if "Key {{key}} does not exist" in e:
                already_in_queue.add(id(source))
                queue.add(Dict(bucket=w.source.bucket, key=settings.args.id))
            Log.warning("Problem", cause=e)

    resources = Dict(hg=HgMozillaOrg(settings=settings.hg))

    stopper = Signal()
    ETL(name="ETL Loop Test",
        work_queue=queue,
        workers=settings.workers,
        settings=settings.param,
        resources=resources,
        please_stop=stopper)

    aws.capture_termination_signal(stopper)
    Thread.wait_for_shutdown_signal(stopper, allow_exit=True)


def parse_id_argument(id):
    if id.find("..") >= 0:
        #range of ids
        min_, max_ = map(int, map(strings.trim, id.split("..")))