Example #1
0
 def extract_worker_info(self, worker):
     info = OrderedDict()
     proc = worker.get("process")
     err = worker.get("err") and " !" or ""
     info["pid"] = str(worker["info"]["id"]) + err
     info["source"] = norm(worker["info"].get("source") or "", 25)
     info["category"] = norm(worker["info"].get("category") or "", 10)
     info["step"] = norm(worker["info"].get("step") or "", 20)
     info["description"] = norm(worker["info"].get("description") or "", 30)
     info["mem"] = proc and sizeof_fmt(proc.memory_info().rss)
     info["cpu"] = proc and "%.1f%%" % proc.cpu_percent()
     info["started_at"] = worker.get("started_at") or ""
     if worker.get("duration"):
         info["duration"] = worker["duration"]
     else:
         info["duration"] = timesofar(worker.get("started_at", 0))
     info["files"] = []
     if proc:
         for pfile in proc.open_files():
             # skip 'a' (logger)
             if pfile.mode == 'r':
                 finfo = OrderedDict()
                 finfo["path"] = pfile.path
                 finfo["read"] = sizeof_fmt(pfile.position)
                 size = os.path.getsize(pfile.path)
                 finfo["size"] = sizeof_fmt(size)
                 info["files"].append(finfo)
     return info
Example #2
0
    def __init__(self,
                 loop,
                 process_queue=None,
                 thread_queue=None,
                 max_memory_usage=None):
        self.loop = loop
        self.process_queue = process_queue
        self.thread_queue = thread_queue
        self.ok_to_run = asyncio.Semaphore()

        if max_memory_usage == "auto":
            # try to find a nice limit...
            limited = int(psutil.virtual_memory().available * .6)
            logger.info("Auto-setting memory usage limit to %s" %
                        sizeof_fmt(limited))
            max_memory_usage = limited
        elif max_memory_usage:
            logger.info("Setting memory usage to %s" %
                        sizeof_fmt(max_memory_usage))
        self.max_memory_usage = max_memory_usage
        self.avail_memory = int(psutil.virtual_memory().available)
        self._phub = None
        donedir = os.path.join(config.RUN_DIR, "done")
        if not os.path.exists(donedir):
            os.makedirs(donedir)
Example #3
0
    def __init__(self,
                 loop,
                 process_queue=None,
                 thread_queue=None,
                 max_memory_usage=None,
                 num_workers=None,
                 num_threads=None,
                 default_executor="thread",
                 auto_recycle=True):
        if not os.path.exists(config.RUN_DIR):
            logger.info("Creating RUN_DIR directory '%s'", config.RUN_DIR)
            os.makedirs(config.RUN_DIR)
        self.loop = loop
        self.num_workers = num_workers
        if self.num_workers == 0:
            logger.debug("Adjusting number of worker to 1")
            self.num_workers = 1
        self.num_threads = num_threads or self.num_workers
        self.process_queue = process_queue or concurrent.futures.ProcessPoolExecutor(
            max_workers=self.num_workers)
        # TODO: limit the number of threads (as argument) ?
        self.thread_queue = thread_queue or concurrent.futures.ThreadPoolExecutor(
            max_workers=self.num_threads)
        if default_executor == "thread":
            self.loop.set_default_executor(self.thread_queue)
        else:
            self.loop.set_default_executor(self.process_queue)
        self.ok_to_run = asyncio.Semaphore()
        # auto-creata RUN_DIR
        if not os.path.exists(config.RUN_DIR):
            os.makedirs(config.RUN_DIR)

        if max_memory_usage == "auto":
            # try to find a nice limit...
            limited = int(psutil.virtual_memory().available * .6)
            logger.info("Auto-setting memory usage limit to %s",
                        sizeof_fmt(limited))
            max_memory_usage = limited
        elif max_memory_usage:
            logger.info("Setting memory usage to %s",
                        sizeof_fmt(max_memory_usage))
        else:
            logger.info("No memory limit set")
        self.max_memory_usage = max_memory_usage
        self.avail_memory = int(psutil.virtual_memory().available)
        self._phub = None
        self.auto_recycle = auto_recycle  # active
        self.auto_recycle_setting = auto_recycle  # keep setting if we need to restore it its orig value
        self.jobs = {}  # all active jobs (thread/process)
        self._pchildren = []
        self.clean_staled()
Example #4
0
 def checkmem(self, pinfo=None):
     mem_req = pinfo and pinfo.get("__reqs__", {}).get("mem") or 0
     t0 = time.time()
     waited = False
     sleep_time = 5
     if mem_req:
         logger.info("Job {cat:%s,source:%s,step:%s} requires %s memory, checking if available" % \
                 (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req)))
     if self.max_memory_usage:
         hub_mem = self.hub_memory
         while hub_mem >= self.max_memory_usage:
             logger.info("Hub is using too much memory to launch job {cat:%s,source:%s,step:%s} (%s used, more than max allowed %s), wait a little (job's already been postponed for %s)" % \
                     (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(hub_mem),
                      sizeof_fmt(self.max_memory_usage),timesofar(t0)))
             yield from asyncio.sleep(sleep_time)
             waited = True
             hub_mem = self.hub_memory
     if mem_req:
         # max allowed mem is either the limit we gave and the os limit
         max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory
         # TODO: check projected memory (jobs with mem requirements currently running
         # as those jobs may not have reached their max mem usage yet)
         hub_mem = self.hub_memory
         while mem_req >= (max_mem - hub_mem):
             logger.info("Job {cat:%s,source:%s,step:%s} needs %s to run, not enough to launch it (hub consumes %s while max allowed is %s), wait a little  (job's already been postponed for %s)" % \
                     (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req), sizeof_fmt(hub_mem),
                      sizeof_fmt(max_mem), timesofar(t0)))
             yield from asyncio.sleep(sleep_time)
             waited = True
             # refresh limites and usage (manager can be modified from hub
             # thus memory usage can be modified on-the-fly
             hub_mem = self.hub_memory
             max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory
     pendings = len(self.process_queue._pending_work_items.keys()
                    ) - config.HUB_MAX_WORKERS
     while pendings >= config.MAX_QUEUED_JOBS:
         if not waited:
             logger.info("Can't run job {cat:%s,source:%s,step:%s} right now, too much pending jobs in the queue (max: %s), will retry until possible" % \
                     (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), config.MAX_QUEUED_JOBS))
         yield from asyncio.sleep(sleep_time)
         pendings = len(self.process_queue._pending_work_items.keys()
                        ) - config.HUB_MAX_WORKERS
         waited = True
     if waited:
         logger.info(
             "Job {cat:%s,source:%s,step:%s} now can be launched (total waiting time: %s)"
             % (pinfo.get("category"), pinfo.get("source"),
                pinfo.get("step"), timesofar(t0)))
Example #5
0
 def extract_worker_info(self, worker):
     info = OrderedDict()
     proc = worker.get("process", worker)
     err = worker.get("err") and " !" or ""
     info["pid"] = str(worker["job"]["id"]) + err
     info["source"] = norm(worker["job"].get("source") or "", 25)
     info["category"] = norm(worker["job"].get("category") or "", 10)
     info["step"] = norm(worker["job"].get("step") or "", 20)
     info["description"] = norm(worker["job"].get("description") or "", 30)
     info["mem"] = sizeof_fmt(proc.get("memory", {}).get("size", 0.0))
     info["cpu"] = "%.1f%%" % proc.get("cpu", {}).get("percent", 0.0)
     info["started_at"] = worker["job"]["started_at"]
     if worker.get("duration"):
         info["duration"] = worker["duration"]
     else:
         info["duration"] = timesofar(worker["job"]["started_at"])
     # for now, don't display files used by the process
     info["files"] = []
     #if proc:
     #    for pfile in proc.open_files():
     #        # skip 'a' (logger)
     #        if pfile.mode == 'r':
     #            finfo = OrderedDict()
     #            finfo["path"] = pfile.path
     #            finfo["read"] = sizeof_fmt(pfile.position)
     #            size = os.path.getsize(pfile.path)
     #            finfo["size"] = sizeof_fmt(size)
     #            #info["files"].append(finfo)
     return info
Example #6
0
 def recycled(f):
     res = f.result()
     # still out of memory ?
     avail_mem = self.max_memory_usage - self.hub_memory
     if avail_mem <= 0:
         logger.error(
             "After recycling process queue, "
             "memory usage is still too high (needs at least %s more)"
             "now turn auto-recycling off to prevent infinite recycling...",
             sizeof_fmt(abs(avail_mem)))
         self.auto_recycle = False
Example #7
0
    def prepare(self):
        # sanity check + extract boundary
        ct = self.request.headers.get("Content-Type")
        if not ct:
            self.write_error(
                400, exc_info=[None, "No Content-Type header found", None])
            return
        try:
            ct, boundary = ct.split(";")
        except Exception as e:
            self.write_error(400, exc_info=[None, str(e), None])
            return
        ct = ct.strip()
        if ct != "multipart/form-data":
            self.write_error(
                400,
                exc_info=[
                    None,
                    "Excepting 'Content-Type: multipart/form-data', got %s" %
                    ct, None
                ])
            return
        boundname, boundary = boundary.strip().split("=")
        if boundname != "boundary":
            self.write_error(
                400,
                exc_info=[None, "No boundary field found in headers", None])
        self.boundary = boundary
        cl = self.request.headers.get("Content-Length")
        if cl:
            self.contentlength = sizeof_fmt(int(cl))
        else:
            self.contentlength = "unknown size"

        self.begin = False
        self.src_name = self.request.uri.split("/")[-1]
        self.fp = None
        self.head = ""
Example #8
0
    def check_constraints(self, pinfo=None):
        mem_req = pinfo and pinfo.get("__reqs__", {}).get("mem") or 0
        t0 = time.time()
        waited = False
        sleep_time = 5
        if mem_req:
            logger.info("Job {cat:%s,source:%s,step:%s} requires %s memory, checking if available" % \
                    (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req)))
        if self.max_memory_usage:
            hub_mem = self.hub_memory
            while hub_mem >= self.max_memory_usage:
                if self.auto_recycle:
                    pworkers = self.get_pid_files()
                    tworkers = self.get_thread_files()
                    if len(pworkers) == 0 and len(tworkers) == 0:
                        logger.info(
                            "No worker running, recycling the process queue..."
                        )
                        fut = self.recycle_process_queue()

                        def recycled(f):
                            res = f.result()
                            # still out of memory ?
                            avail_mem = self.max_memory_usage - self.hub_memory
                            if avail_mem <= 0:
                                logger.error("After recycling process queue, " + \
                                             "memory usage is still too high (needs at least %s more)" % sizeof_fmt(abs(avail_mem)) + \
                                             "now turn auto-recycling off to prevent infinite recycling...")
                                self.auto_recycle = False

                        fut.add_done_callback(recycled)
                logger.info("Hub is using too much memory to launch job {cat:%s,source:%s,step:%s} (%s used, more than max allowed %s), wait a little (job's already been postponed for %s)" % \
                        (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(hub_mem),
                         sizeof_fmt(self.max_memory_usage),timesofar(t0)))
                yield from asyncio.sleep(sleep_time)
                waited = True
                hub_mem = self.hub_memory
        if mem_req:
            # max allowed mem is either the limit we gave and the os limit
            max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory
            # TODO: check projected memory (jobs with mem requirements currently running
            # as those jobs may not have reached their max mem usage yet)
            hub_mem = self.hub_memory
            while mem_req >= (max_mem - hub_mem):
                logger.info("Job {cat:%s,source:%s,step:%s} needs %s to run, not enough to launch it (hub consumes %s while max allowed is %s), wait a little  (job's already been postponed for %s)" % \
                        (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req), sizeof_fmt(hub_mem),
                         sizeof_fmt(max_mem), timesofar(t0)))
                yield from asyncio.sleep(sleep_time)
                waited = True
                # refresh limites and usage (manager can be modified from hub
                # thus memory usage can be modified on-the-fly
                hub_mem = self.hub_memory
                max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory
        pendings = len(self.process_queue._pending_work_items.keys()
                       ) - config.HUB_MAX_WORKERS
        while pendings >= config.MAX_QUEUED_JOBS:
            if not waited:
                logger.info("Can't run job {cat:%s,source:%s,step:%s} right now, too much pending jobs in the queue (max: %s), will retry until possible" % \
                        (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), config.MAX_QUEUED_JOBS))
            yield from asyncio.sleep(sleep_time)
            pendings = len(self.process_queue._pending_work_items.keys()
                           ) - config.HUB_MAX_WORKERS
            waited = True
        # finally check custom predicates
        predicates = pinfo and pinfo.get("__predicates__", [])
        failed_predicate = None
        while True:
            for predicate in predicates:
                if not predicate(self):
                    failed_predicate = predicate
                    break  # for loop (most inner one)
                else:
                    # reset flag
                    failed_predicate = None
            if failed_predicate:
                logger.info("Can't run job {cat:%s,source:%s,step:%s} right now, predicate %s failed, will retry until possible" % \
                        (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"),failed_predicate))
                yield from asyncio.sleep(sleep_time)
                waited = True
            else:
                break  # while loop
        if waited:
            logger.info(
                "Job {cat:%s,source:%s,step:%s} now can be launched (total waiting time: %s)"
                % (pinfo.get("category"), pinfo.get("source"),
                   pinfo.get("step"), timesofar(t0)))
            # auto-recycle could have been temporarily disabled until more mem is assigned.
            # if we've been able to run the job, it means we had enough mem so restore
            # recycling setting (if auto_recycle was False, it's ignored
            if self.auto_recycle_setting:
                self.auto_recycle = self.auto_recycle_setting