def main(args=None): warn("the usage of this tool is deprecated and will be removed in a future release.", DeprecationWarning) usage = "Usage: %(prog)s [options]" description = "run watchdog" parser = ArgumentParser(usage=usage, description=description) parser.add_argument("--site", dest="site", type=str, default=None, help='name of site', required=False) parser.add_argument("--dry", dest="dry", action='store_true', default=False, help='test-run') parser.add_argument("--force", dest="force", action='store_true', default=False, help='run regardless of being deprecated') opts = parser.parse_args(args) ratio_cpu_max = float(cfg.get("watchdog", "ratio_cpu")) ratio_mem_max = float(cfg.get("watchdog", "ratio_mem")) log = getLogger("script") batchEngine = HPC.BatchEngine() batchEngine.update() batchsite = BATCH_DEFAULTS['name'] if opts.site is None else opts.site # first, get running jobs. jobs = [] if opts.force: jobs = __getRunningJobs(batchsite) log.info("watchdog settings: max_cpu %1.2f max_mem %1.2f (ratio with respect to max. allocated)", ratio_cpu_max, ratio_mem_max) for j in jobs: max_cpu = float(j['max_mem']) max_mem = float(j['max_cpu']) if max_cpu in [-1., 0.]: max_cpu = float(convertHHMMtoSec(BATCH_DEFAULTS['cputime'])) if max_mem in [-1., 0.]: max_mem = float(BATCH_DEFAULTS['memory']) bj = HPC.BatchJob(name="%s-%s" % (j['t_id'], getSixDigits(j['inst_id'])), batchId=j['batchId'], defaults=BATCH_DEFAULTS) bid = str(bj.batchId) if bid in batchEngine.allJobs: current_cpu = batchEngine.getCPUtime(bid) current_mem = batchEngine.getMemory(bid, unit='MB') ratio_cpu = current_cpu / max_cpu ratio_mem = current_mem / max_mem __updateStatus([j, bid, current_mem, current_cpu], batchEngine=batchEngine, dry=opts.dry) if (ratio_cpu >= ratio_cpu_max) or (ratio_mem >= ratio_mem_max): log.info('%s cpu %1.1f mem %1.1f', bid, ratio_cpu, ratio_mem) log.warning('Watchdog identified job %s to exceed its sending kill signal', bid) if opts.dry: continue try: bj.kill() except Exception as err: log.exception("could not schedule job for removal, reason below\n%s", err) if bj.status == "Failed": __reportKilledJob(j) log.info("completed cycle")
def registerDS(self, filename=None, overwrite=False): site = cfg.get("site", "name") if filename is None: files = [fi['target'] for fi in self.OutputFiles] else: files = [filename] for fi in files: tg = oPath.expandvars(fi) res = Rpost("%s/datacat/" % DAMPE_WORKFLOW_URL, data={"filename": tg, "site": site, "action": "register", "overwrite": str(overwrite)}) res.raise_for_status() if not res.json().get("result", "nok") == "ok": raise Exception(res.json().get("error", "No error provided."))
from DmpWorkflow.config.defaults import cfg from DmpWorkflow import version from socket import getfqdn kind = cfg.get("global", "installation") if kind == 'server': from flask import Flask from flask.ext.mongoengine import MongoEngine app = Flask(__name__) app.config.update(LOGGER_NAME="core") app.config['MONGODB_DB'] = cfg.get("database", "name") app.config['MONGODB_USERNAME'] = cfg.get("database", "user") app.config['MONGODB_PASSWORD'] = cfg.get("database", "password") app.config['MONGODB_HOST'] = cfg.get("database", "host") app.config['MONGODB_PORT'] = int(cfg.get("database", "port")) app.config["SECRET_KEY"] = "KeepThisS3cr3t" db = MongoEngine(app) def register_blueprints(app): # Prevents circular imports from DmpWorkflow.core.views import jobs from DmpWorkflow.core.admin import admin app.register_blueprint(jobs) app.register_blueprint(admin) register_blueprints(app) def main(): app.logger.info("started DmpWorkflow Server Version: %s on %s",version,getfqdn())
from DmpWorkflow.config.defaults import cfg from DmpWorkflow import version from socket import getfqdn kind = cfg.get("global", "installation") if kind == 'server': from flask import Flask from flask.ext.mongoengine import MongoEngine app = Flask(__name__) app.config.update(LOGGER_NAME="core") app.config['MONGODB_DB'] = cfg.get("database", "name") app.config['MONGODB_USERNAME'] = cfg.get("database", "user") app.config['MONGODB_PASSWORD'] = cfg.get("database", "password") app.config['MONGODB_HOST'] = cfg.get("database", "host") app.config['MONGODB_PORT'] = int(cfg.get("database", "port")) app.config["SECRET_KEY"] = "KeepThisS3cr3t" db = MongoEngine(app) def register_blueprints(app): # Prevents circular imports from DmpWorkflow.core.views import jobs from DmpWorkflow.core.admin import admin app.register_blueprint(jobs) app.register_blueprint(admin) register_blueprints(app) def main(): app.logger.info("started DmpWorkflow Server Version: %s on %s", version, getfqdn())
if unit in defaults['memory']: defaults['memory']=float(defaults['memory'].replace(unit,"")) max_mem = float(defaults['memory']) if max_mem >= 1e6: # must be in kB! max_mem/=1024 # get the max ratios try: executor.job.updateStatus("Running", "PreparingJob", hostname=gethostname(), body=executor.job.getJSONbody(), batchId=executor.batchId, cpu_max=max_cpu, mem_max=max_mem) except Exception as err: executor.logThis("EXCEPTION: %s", err) executor.logThis('Watchdog: maximum cpu: %s -- maximum memory: %s',str(max_cpu),str(max_mem)) ratio_cpu_max = float(cfg.get("watchdog", "ratio_cpu")) ratio_mem_max = float(cfg.get("watchdog", "ratio_mem")) now = datetime.utcnow() proc = Process(target=executor.execute) proc.start() ps = ps_proc(proc.pid) prm = ProcessResourceMonitor(ps) #this monitor uses psutil for its information. while proc.is_alive(): syst_cpu = prm.getCpuTime() memory = prm.getMemory() ## check time out conditions executor.logThis('Watchdog: current cpu: %s -- current memory: %s', str(syst_cpu),str(memory)) if (syst_cpu / max_cpu >= ratio_cpu_max): killJob = True reason = "exceeding CPU time" if (memory/max_mem >= ratio_mem_max):
""" Created on Mar 10, 2016 @author: zimmer """ from DmpWorkflow.config.defaults import cfg from functools import wraps from flask import request, Response USERNAME = cfg.get("server", "admin_user") PASSWORD = cfg.get("server", "admin_password") def check_auth(username, password): """This function is called to check if a username / password combination is valid. """ return username == USERNAME and password == PASSWORD def authenticate(): """Sends a 401 response that enables basic auth""" return Response( 'Could not verify your access level for that URL.\n' 'You have to login with proper credentials', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) def requires_auth(f): @wraps(f)
from sys import path as sys_path sys_path.append(abspath(oPjoin(dirname(__file__), '..'))) from flask.ext.script import Manager, Server, Shell import DmpWorkflow.core.models as DmpWorkflowModels from DmpWorkflow.core import app, db from DmpWorkflow.config.defaults import cfg def _make_context(): return dict(app=app, db=db, models=DmpWorkflowModels) manager = Manager(app) # Turn on debugger by default and reloader manager.add_command( "runserver", Server(use_debugger=cfg.getboolean("server", "use_debugger"), use_reloader=cfg.getboolean("server", "use_reloader"), host=cfg.get("server", "host"))) manager.add_command("shell", Shell(make_context=_make_context)) def main(): manager.run() if __name__ == "__main__": main()
from os.path import abspath, join as oPjoin, dirname from sys import path as sys_path sys_path.append(abspath(oPjoin(dirname(__file__), '..'))) from flask.ext.script import Manager, Server, Shell import DmpWorkflow.core.models as DmpWorkflowModels from DmpWorkflow.core import app, db from DmpWorkflow.config.defaults import cfg def _make_context(): return dict(app=app, db=db, models=DmpWorkflowModels) manager = Manager(app) # Turn on debugger by default and reloader manager.add_command("runserver", Server(use_debugger=cfg.getboolean("server", "use_debugger"), use_reloader=cfg.getboolean("server", "use_reloader"), host=cfg.get("server", "host")) ) manager.add_command("shell", Shell(make_context=_make_context)) def main(): manager.run() if __name__ == "__main__": main()
def getWorkDir(self): wdROOT = cfg.get("site", "workdir") wd = oPath.join(wdROOT, str(self.title), str(self.type), self.getSixDigits(asPath=True)) return wd
import os.path as oPath from ast import literal_eval from os import environ from jsonpickle import encode as Jencode, decode as Jdecode from json import dumps from time import ctime from requests import post as Rpost from importlib import import_module from copy import deepcopy from DmpWorkflow.config.defaults import FINAL_STATII, DAMPE_WORKFLOW_URL, DAMPE_WORKFLOW_ROOT, BATCH_DEFAULTS, cfg from DmpWorkflow.utils.tools import mkdir, touch, rm, safe_copy, parseJobXmlToDict, getSixDigits, ResourceMonitor from DmpWorkflow.utils.shell import run, make_executable # , source_bash HPC = import_module("DmpWorkflow.hpc.%s" % BATCH_DEFAULTS['system']) PYTHONBIN = "" ExtScript = cfg.get("site", "ExternalsScript") NUMLINES_LOG = 20 # todo2: add cfg parsing variables. class DmpJob(object): def __init__(self, job_id, body=None, **kwargs): self.wd = oPath.abspath(".") self.title = None self.jobId = str(job_id) self.instanceId = None self.batchId = None self.InputFiles = [] self.OutputFiles = [] self.MetaData = [] self.type = None self.release = None