def extract_worker_info(self, worker): info = OrderedDict() proc = worker.get("process") err = worker.get("err") and " !" or "" info["pid"] = str(worker["info"]["id"]) + err info["source"] = norm(worker["info"].get("source") or "", 25) info["category"] = norm(worker["info"].get("category") or "", 10) info["step"] = norm(worker["info"].get("step") or "", 20) info["description"] = norm(worker["info"].get("description") or "", 30) info["mem"] = proc and sizeof_fmt(proc.memory_info().rss) info["cpu"] = proc and "%.1f%%" % proc.cpu_percent() info["started_at"] = worker.get("started_at") or "" if worker.get("duration"): info["duration"] = worker["duration"] else: info["duration"] = timesofar(worker.get("started_at", 0)) info["files"] = [] if proc: for pfile in proc.open_files(): # skip 'a' (logger) if pfile.mode == 'r': finfo = OrderedDict() finfo["path"] = pfile.path finfo["read"] = sizeof_fmt(pfile.position) size = os.path.getsize(pfile.path) finfo["size"] = sizeof_fmt(size) info["files"].append(finfo) return info
def __init__(self, loop, process_queue=None, thread_queue=None, max_memory_usage=None): self.loop = loop self.process_queue = process_queue self.thread_queue = thread_queue self.ok_to_run = asyncio.Semaphore() if max_memory_usage == "auto": # try to find a nice limit... limited = int(psutil.virtual_memory().available * .6) logger.info("Auto-setting memory usage limit to %s" % sizeof_fmt(limited)) max_memory_usage = limited elif max_memory_usage: logger.info("Setting memory usage to %s" % sizeof_fmt(max_memory_usage)) self.max_memory_usage = max_memory_usage self.avail_memory = int(psutil.virtual_memory().available) self._phub = None donedir = os.path.join(config.RUN_DIR, "done") if not os.path.exists(donedir): os.makedirs(donedir)
def __init__(self, loop, process_queue=None, thread_queue=None, max_memory_usage=None, num_workers=None, num_threads=None, default_executor="thread", auto_recycle=True): if not os.path.exists(config.RUN_DIR): logger.info("Creating RUN_DIR directory '%s'", config.RUN_DIR) os.makedirs(config.RUN_DIR) self.loop = loop self.num_workers = num_workers if self.num_workers == 0: logger.debug("Adjusting number of worker to 1") self.num_workers = 1 self.num_threads = num_threads or self.num_workers self.process_queue = process_queue or concurrent.futures.ProcessPoolExecutor( max_workers=self.num_workers) # TODO: limit the number of threads (as argument) ? self.thread_queue = thread_queue or concurrent.futures.ThreadPoolExecutor( max_workers=self.num_threads) if default_executor == "thread": self.loop.set_default_executor(self.thread_queue) else: self.loop.set_default_executor(self.process_queue) self.ok_to_run = asyncio.Semaphore() # auto-creata RUN_DIR if not os.path.exists(config.RUN_DIR): os.makedirs(config.RUN_DIR) if max_memory_usage == "auto": # try to find a nice limit... limited = int(psutil.virtual_memory().available * .6) logger.info("Auto-setting memory usage limit to %s", sizeof_fmt(limited)) max_memory_usage = limited elif max_memory_usage: logger.info("Setting memory usage to %s", sizeof_fmt(max_memory_usage)) else: logger.info("No memory limit set") self.max_memory_usage = max_memory_usage self.avail_memory = int(psutil.virtual_memory().available) self._phub = None self.auto_recycle = auto_recycle # active self.auto_recycle_setting = auto_recycle # keep setting if we need to restore it its orig value self.jobs = {} # all active jobs (thread/process) self._pchildren = [] self.clean_staled()
def checkmem(self, pinfo=None): mem_req = pinfo and pinfo.get("__reqs__", {}).get("mem") or 0 t0 = time.time() waited = False sleep_time = 5 if mem_req: logger.info("Job {cat:%s,source:%s,step:%s} requires %s memory, checking if available" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req))) if self.max_memory_usage: hub_mem = self.hub_memory while hub_mem >= self.max_memory_usage: logger.info("Hub is using too much memory to launch job {cat:%s,source:%s,step:%s} (%s used, more than max allowed %s), wait a little (job's already been postponed for %s)" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(hub_mem), sizeof_fmt(self.max_memory_usage),timesofar(t0))) yield from asyncio.sleep(sleep_time) waited = True hub_mem = self.hub_memory if mem_req: # max allowed mem is either the limit we gave and the os limit max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory # TODO: check projected memory (jobs with mem requirements currently running # as those jobs may not have reached their max mem usage yet) hub_mem = self.hub_memory while mem_req >= (max_mem - hub_mem): logger.info("Job {cat:%s,source:%s,step:%s} needs %s to run, not enough to launch it (hub consumes %s while max allowed is %s), wait a little (job's already been postponed for %s)" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req), sizeof_fmt(hub_mem), sizeof_fmt(max_mem), timesofar(t0))) yield from asyncio.sleep(sleep_time) waited = True # refresh limites and usage (manager can be modified from hub # thus memory usage can be modified on-the-fly hub_mem = self.hub_memory max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory pendings = len(self.process_queue._pending_work_items.keys() ) - config.HUB_MAX_WORKERS while pendings >= config.MAX_QUEUED_JOBS: if not waited: logger.info("Can't run job {cat:%s,source:%s,step:%s} right now, too much pending jobs in the queue (max: %s), will retry until possible" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), config.MAX_QUEUED_JOBS)) yield from asyncio.sleep(sleep_time) pendings = len(self.process_queue._pending_work_items.keys() ) - config.HUB_MAX_WORKERS waited = True if waited: logger.info( "Job {cat:%s,source:%s,step:%s} now can be launched (total waiting time: %s)" % (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), timesofar(t0)))
def extract_worker_info(self, worker): info = OrderedDict() proc = worker.get("process", worker) err = worker.get("err") and " !" or "" info["pid"] = str(worker["job"]["id"]) + err info["source"] = norm(worker["job"].get("source") or "", 25) info["category"] = norm(worker["job"].get("category") or "", 10) info["step"] = norm(worker["job"].get("step") or "", 20) info["description"] = norm(worker["job"].get("description") or "", 30) info["mem"] = sizeof_fmt(proc.get("memory", {}).get("size", 0.0)) info["cpu"] = "%.1f%%" % proc.get("cpu", {}).get("percent", 0.0) info["started_at"] = worker["job"]["started_at"] if worker.get("duration"): info["duration"] = worker["duration"] else: info["duration"] = timesofar(worker["job"]["started_at"]) # for now, don't display files used by the process info["files"] = [] #if proc: # for pfile in proc.open_files(): # # skip 'a' (logger) # if pfile.mode == 'r': # finfo = OrderedDict() # finfo["path"] = pfile.path # finfo["read"] = sizeof_fmt(pfile.position) # size = os.path.getsize(pfile.path) # finfo["size"] = sizeof_fmt(size) # #info["files"].append(finfo) return info
def recycled(f): res = f.result() # still out of memory ? avail_mem = self.max_memory_usage - self.hub_memory if avail_mem <= 0: logger.error( "After recycling process queue, " "memory usage is still too high (needs at least %s more)" "now turn auto-recycling off to prevent infinite recycling...", sizeof_fmt(abs(avail_mem))) self.auto_recycle = False
def prepare(self): # sanity check + extract boundary ct = self.request.headers.get("Content-Type") if not ct: self.write_error( 400, exc_info=[None, "No Content-Type header found", None]) return try: ct, boundary = ct.split(";") except Exception as e: self.write_error(400, exc_info=[None, str(e), None]) return ct = ct.strip() if ct != "multipart/form-data": self.write_error( 400, exc_info=[ None, "Excepting 'Content-Type: multipart/form-data', got %s" % ct, None ]) return boundname, boundary = boundary.strip().split("=") if boundname != "boundary": self.write_error( 400, exc_info=[None, "No boundary field found in headers", None]) self.boundary = boundary cl = self.request.headers.get("Content-Length") if cl: self.contentlength = sizeof_fmt(int(cl)) else: self.contentlength = "unknown size" self.begin = False self.src_name = self.request.uri.split("/")[-1] self.fp = None self.head = ""
def check_constraints(self, pinfo=None): mem_req = pinfo and pinfo.get("__reqs__", {}).get("mem") or 0 t0 = time.time() waited = False sleep_time = 5 if mem_req: logger.info("Job {cat:%s,source:%s,step:%s} requires %s memory, checking if available" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req))) if self.max_memory_usage: hub_mem = self.hub_memory while hub_mem >= self.max_memory_usage: if self.auto_recycle: pworkers = self.get_pid_files() tworkers = self.get_thread_files() if len(pworkers) == 0 and len(tworkers) == 0: logger.info( "No worker running, recycling the process queue..." ) fut = self.recycle_process_queue() def recycled(f): res = f.result() # still out of memory ? avail_mem = self.max_memory_usage - self.hub_memory if avail_mem <= 0: logger.error("After recycling process queue, " + \ "memory usage is still too high (needs at least %s more)" % sizeof_fmt(abs(avail_mem)) + \ "now turn auto-recycling off to prevent infinite recycling...") self.auto_recycle = False fut.add_done_callback(recycled) logger.info("Hub is using too much memory to launch job {cat:%s,source:%s,step:%s} (%s used, more than max allowed %s), wait a little (job's already been postponed for %s)" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(hub_mem), sizeof_fmt(self.max_memory_usage),timesofar(t0))) yield from asyncio.sleep(sleep_time) waited = True hub_mem = self.hub_memory if mem_req: # max allowed mem is either the limit we gave and the os limit max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory # TODO: check projected memory (jobs with mem requirements currently running # as those jobs may not have reached their max mem usage yet) hub_mem = self.hub_memory while mem_req >= (max_mem - hub_mem): logger.info("Job {cat:%s,source:%s,step:%s} needs %s to run, not enough to launch it (hub consumes %s while max allowed is %s), wait a little (job's already been postponed for %s)" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), sizeof_fmt(mem_req), sizeof_fmt(hub_mem), sizeof_fmt(max_mem), timesofar(t0))) yield from asyncio.sleep(sleep_time) waited = True # refresh limites and usage (manager can be modified from hub # thus memory usage can be modified on-the-fly hub_mem = self.hub_memory max_mem = self.max_memory_usage and self.max_memory_usage or self.avail_memory pendings = len(self.process_queue._pending_work_items.keys() ) - config.HUB_MAX_WORKERS while pendings >= config.MAX_QUEUED_JOBS: if not waited: logger.info("Can't run job {cat:%s,source:%s,step:%s} right now, too much pending jobs in the queue (max: %s), will retry until possible" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), config.MAX_QUEUED_JOBS)) yield from asyncio.sleep(sleep_time) pendings = len(self.process_queue._pending_work_items.keys() ) - config.HUB_MAX_WORKERS waited = True # finally check custom predicates predicates = pinfo and pinfo.get("__predicates__", []) failed_predicate = None while True: for predicate in predicates: if not predicate(self): failed_predicate = predicate break # for loop (most inner one) else: # reset flag failed_predicate = None if failed_predicate: logger.info("Can't run job {cat:%s,source:%s,step:%s} right now, predicate %s failed, will retry until possible" % \ (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"),failed_predicate)) yield from asyncio.sleep(sleep_time) waited = True else: break # while loop if waited: logger.info( "Job {cat:%s,source:%s,step:%s} now can be launched (total waiting time: %s)" % (pinfo.get("category"), pinfo.get("source"), pinfo.get("step"), timesofar(t0))) # auto-recycle could have been temporarily disabled until more mem is assigned. # if we've been able to run the job, it means we had enough mem so restore # recycling setting (if auto_recycle was False, it's ignored if self.auto_recycle_setting: self.auto_recycle = self.auto_recycle_setting