예제 #1
0
def main(args=None):
    warn("the usage of this tool is deprecated and will be removed in a future release.", DeprecationWarning)
    usage = "Usage: %(prog)s [options]"
    description = "run watchdog"
    parser = ArgumentParser(usage=usage, description=description)
    parser.add_argument("--site", dest="site", type=str, default=None, help='name of site', required=False)
    parser.add_argument("--dry", dest="dry", action='store_true', default=False, help='test-run')
    parser.add_argument("--force", dest="force", action='store_true', default=False, help='run regardless of being deprecated')
    opts = parser.parse_args(args)

    ratio_cpu_max = float(cfg.get("watchdog", "ratio_cpu"))
    ratio_mem_max = float(cfg.get("watchdog", "ratio_mem"))

    log = getLogger("script")
    batchEngine = HPC.BatchEngine()
    batchEngine.update()
    batchsite = BATCH_DEFAULTS['name'] if opts.site is None else opts.site
    # first, get running jobs.
    jobs = []
    if opts.force:
        jobs = __getRunningJobs(batchsite)
        log.info("watchdog settings: max_cpu %1.2f max_mem %1.2f (ratio with respect to max. allocated)", ratio_cpu_max,
                 ratio_mem_max)
    for j in jobs:
        max_cpu = float(j['max_mem'])
        max_mem = float(j['max_cpu'])
        if max_cpu in [-1., 0.]:
            max_cpu = float(convertHHMMtoSec(BATCH_DEFAULTS['cputime']))
        if max_mem in [-1., 0.]:
            max_mem = float(BATCH_DEFAULTS['memory'])
        bj = HPC.BatchJob(name="%s-%s" % (j['t_id'], getSixDigits(j['inst_id'])),
                          batchId=j['batchId'], defaults=BATCH_DEFAULTS)
        bid = str(bj.batchId)
        if bid in batchEngine.allJobs:
            current_cpu = batchEngine.getCPUtime(bid)
            current_mem = batchEngine.getMemory(bid, unit='MB')
            ratio_cpu = current_cpu / max_cpu
            ratio_mem = current_mem / max_mem
            __updateStatus([j, bid, current_mem, current_cpu], batchEngine=batchEngine, dry=opts.dry)
            if (ratio_cpu >= ratio_cpu_max) or (ratio_mem >= ratio_mem_max):
                log.info('%s cpu %1.1f mem %1.1f', bid, ratio_cpu, ratio_mem)
                log.warning('Watchdog identified job %s to exceed its sending kill signal', bid)
                if opts.dry:
                    continue
                try:
                    bj.kill()
                except Exception as err:
                    log.exception("could not schedule job for removal, reason below\n%s", err)
                if bj.status == "Failed":
                    __reportKilledJob(j)
    log.info("completed cycle")
예제 #2
0
 def registerDS(self, filename=None, overwrite=False):
     site = cfg.get("site", "name")
     if filename is None:
         files = [fi['target'] for fi in self.OutputFiles]
     else:
         files = [filename]
     for fi in files:
         tg = oPath.expandvars(fi)
         res = Rpost("%s/datacat/" % DAMPE_WORKFLOW_URL, data={"filename": tg, "site": site,
                                                               "action": "register",
                                                               "overwrite": str(overwrite)})
         res.raise_for_status()
         if not res.json().get("result", "nok") == "ok":
             raise Exception(res.json().get("error", "No error provided."))
예제 #3
0
from DmpWorkflow.config.defaults import cfg
from DmpWorkflow import version
from socket import getfqdn
kind = cfg.get("global", "installation")

if kind == 'server':    
    
    from flask import Flask
    from flask.ext.mongoengine import MongoEngine
    app = Flask(__name__)
    app.config.update(LOGGER_NAME="core")
    app.config['MONGODB_DB'] = cfg.get("database", "name")
    app.config['MONGODB_USERNAME'] = cfg.get("database", "user")
    app.config['MONGODB_PASSWORD'] = cfg.get("database", "password")
    app.config['MONGODB_HOST'] = cfg.get("database", "host")
    app.config['MONGODB_PORT'] = int(cfg.get("database", "port"))
    app.config["SECRET_KEY"] = "KeepThisS3cr3t"
    db = MongoEngine(app)
    
    def register_blueprints(app):
        # Prevents circular imports
        from DmpWorkflow.core.views import jobs
        from DmpWorkflow.core.admin import admin
        app.register_blueprint(jobs)
        app.register_blueprint(admin)
    

    register_blueprints(app)
    
    def main():
        app.logger.info("started DmpWorkflow Server Version: %s on %s",version,getfqdn())
예제 #4
0
from DmpWorkflow.config.defaults import cfg
from DmpWorkflow import version
from socket import getfqdn
kind = cfg.get("global", "installation")

if kind == 'server':

    from flask import Flask
    from flask.ext.mongoengine import MongoEngine
    app = Flask(__name__)
    app.config.update(LOGGER_NAME="core")
    app.config['MONGODB_DB'] = cfg.get("database", "name")
    app.config['MONGODB_USERNAME'] = cfg.get("database", "user")
    app.config['MONGODB_PASSWORD'] = cfg.get("database", "password")
    app.config['MONGODB_HOST'] = cfg.get("database", "host")
    app.config['MONGODB_PORT'] = int(cfg.get("database", "port"))
    app.config["SECRET_KEY"] = "KeepThisS3cr3t"
    db = MongoEngine(app)

    def register_blueprints(app):
        # Prevents circular imports
        from DmpWorkflow.core.views import jobs
        from DmpWorkflow.core.admin import admin
        app.register_blueprint(jobs)
        app.register_blueprint(admin)

    register_blueprints(app)

    def main():
        app.logger.info("started DmpWorkflow Server Version: %s on %s",
                        version, getfqdn())
예제 #5
0
        if unit in defaults['memory']:
            defaults['memory']=float(defaults['memory'].replace(unit,""))
    max_mem = float(defaults['memory'])
    if max_mem >= 1e6:
        # must be in kB!
        max_mem/=1024 
    # get the max ratios
    try:
        executor.job.updateStatus("Running", "PreparingJob", hostname=gethostname(), body=executor.job.getJSONbody(),
                                  batchId=executor.batchId, cpu_max=max_cpu, mem_max=max_mem)
    except Exception as err:
        executor.logThis("EXCEPTION: %s", err)

    
    executor.logThis('Watchdog: maximum cpu: %s -- maximum memory: %s',str(max_cpu),str(max_mem))
    ratio_cpu_max = float(cfg.get("watchdog", "ratio_cpu"))
    ratio_mem_max = float(cfg.get("watchdog", "ratio_mem"))
    now = datetime.utcnow()
    proc = Process(target=executor.execute)
    proc.start()
    ps = ps_proc(proc.pid)
    prm = ProcessResourceMonitor(ps) #this monitor uses psutil for its information.
    while proc.is_alive():
        syst_cpu = prm.getCpuTime()
        memory = prm.getMemory()
        ## check time out conditions
        executor.logThis('Watchdog: current cpu: %s -- current memory: %s', str(syst_cpu),str(memory))
        if (syst_cpu / max_cpu >= ratio_cpu_max):
            killJob = True
            reason = "exceeding CPU time"
        if (memory/max_mem >= ratio_mem_max):
예제 #6
0
"""
Created on Mar 10, 2016

@author: zimmer
"""
from DmpWorkflow.config.defaults import cfg
from functools import wraps
from flask import request, Response

USERNAME = cfg.get("server", "admin_user")
PASSWORD = cfg.get("server", "admin_password")


def check_auth(username, password):
    """This function is called to check if a username /
    password combination is valid.
    """

    return username == USERNAME and password == PASSWORD


def authenticate():
    """Sends a 401 response that enables basic auth"""
    return Response(
        'Could not verify your access level for that URL.\n'
        'You have to login with proper credentials', 401,
        {'WWW-Authenticate': 'Basic realm="Login Required"'})


def requires_auth(f):
    @wraps(f)
예제 #7
0
from sys import path as sys_path

sys_path.append(abspath(oPjoin(dirname(__file__), '..')))
from flask.ext.script import Manager, Server, Shell
import DmpWorkflow.core.models as DmpWorkflowModels
from DmpWorkflow.core import app, db
from DmpWorkflow.config.defaults import cfg


def _make_context():
    return dict(app=app, db=db, models=DmpWorkflowModels)


manager = Manager(app)
# Turn on debugger by default and reloader
manager.add_command(
    "runserver",
    Server(use_debugger=cfg.getboolean("server", "use_debugger"),
           use_reloader=cfg.getboolean("server", "use_reloader"),
           host=cfg.get("server", "host")))

manager.add_command("shell", Shell(make_context=_make_context))


def main():
    manager.run()


if __name__ == "__main__":
    main()
예제 #8
0
from os.path import abspath, join as oPjoin, dirname
from sys import path as sys_path

sys_path.append(abspath(oPjoin(dirname(__file__), '..')))
from flask.ext.script import Manager, Server, Shell
import DmpWorkflow.core.models as DmpWorkflowModels
from DmpWorkflow.core import app, db
from DmpWorkflow.config.defaults import cfg


def _make_context():
    return dict(app=app, db=db, models=DmpWorkflowModels)


manager = Manager(app)
# Turn on debugger by default and reloader
manager.add_command("runserver", Server(use_debugger=cfg.getboolean("server", "use_debugger"),
                                        use_reloader=cfg.getboolean("server", "use_reloader"),
                                        host=cfg.get("server", "host"))
                    )

manager.add_command("shell", Shell(make_context=_make_context))


def main():
    manager.run()


if __name__ == "__main__":
    main()
예제 #9
0
 def getWorkDir(self):
     wdROOT = cfg.get("site", "workdir")
     wd = oPath.join(wdROOT, str(self.title), str(self.type), self.getSixDigits(asPath=True))
     return wd
예제 #10
0
import os.path as oPath
from ast import literal_eval
from os import environ
from jsonpickle import encode as Jencode, decode as Jdecode
from json import dumps
from time import ctime
from requests import post as Rpost
from importlib import import_module
from copy import deepcopy
from DmpWorkflow.config.defaults import FINAL_STATII, DAMPE_WORKFLOW_URL, DAMPE_WORKFLOW_ROOT, BATCH_DEFAULTS, cfg
from DmpWorkflow.utils.tools import mkdir, touch, rm, safe_copy, parseJobXmlToDict, getSixDigits, ResourceMonitor
from DmpWorkflow.utils.shell import run, make_executable  # , source_bash

HPC = import_module("DmpWorkflow.hpc.%s" % BATCH_DEFAULTS['system'])
PYTHONBIN = ""
ExtScript = cfg.get("site", "ExternalsScript")
NUMLINES_LOG = 20

# todo2: add cfg parsing variables.
class DmpJob(object):
    def __init__(self, job_id, body=None, **kwargs):
        self.wd = oPath.abspath(".")
        self.title = None
        self.jobId = str(job_id)
        self.instanceId = None
        self.batchId = None
        self.InputFiles = []
        self.OutputFiles = []
        self.MetaData = []
        self.type = None
        self.release = None