Ejemplo n.º 1
0
def createJob(workflow, cmssw_ver, arch):
  workflow_args = FixWFArgs(cmssw_ver, arch, workflow, GetMatrixOptions(cmssw_ver, arch))
  cmd = format("rm -rf %(workflow)s %(workflow)s_*; mkdir %(workflow)s; cd %(workflow)s; PATH=%(das_utils)s:$PATH runTheMatrix.py --maxSteps=0 -l %(workflow)s %(workflow_args)s",workflow=workflow,workflow_args=workflow_args, das_utils=CMS_BOT_DIR+"/das-utils")
  print "Running ",cmd
  getstatusoutput(cmd)
  try:
    workflow_dir = glob.glob(format("%(workflow)s/%(workflow)s_*", workflow=workflow))[0]
    getstatusoutput(format("mv %(workflow)s/runall-report-step123-.log %(workflow_dir)s/workflow.log; touch %(workflow_dir)s/cmdLog; mv %(workflow_dir)s .; rm -rf %(workflow)s", workflow=workflow, workflow_dir=workflow_dir))
    print "Commands for workflow %s generated" % workflow
  except Exception, e:
    print "ERROR: Creating workflow job:",workflow,str(e)
    getstatusoutput("rm -rf %s %s_*" % (workflow,workflow))
Ejemplo n.º 2
0
def createJob(workflow, cmssw_ver, arch):
  workflow_args = FixWFArgs(cmssw_ver, arch, workflow, GetMatrixOptions(cmssw_ver, arch))
  cmd = format("rm -rf %(workflow)s %(workflow)s_*; mkdir %(workflow)s; cd %(workflow)s; PATH=%(das_utils)s:$PATH runTheMatrix.py --maxSteps=0 -l %(workflow)s %(workflow_args)s",workflow=workflow,workflow_args=workflow_args, das_utils=CMS_BOT_DIR+"/das-utils")
  print "Running ",cmd
  getstatusoutput(cmd)
  try:
    workflow_dir = glob.glob(format("%(workflow)s/%(workflow)s_*", workflow=workflow))[0]
    getstatusoutput(format("mv %(workflow)s/runall-report-step123-.log %(workflow_dir)s/workflow.log; touch %(workflow_dir)s/cmdLog; mv %(workflow_dir)s .; rm -rf %(workflow)s", workflow=workflow, workflow_dir=workflow_dir))
    print "Commands for workflow %s generated" % workflow
  except Exception, e:
    print "ERROR: Creating workflow job:",workflow,str(e)
    getstatusoutput("rm -rf %s %s_*" % (workflow,workflow))
Ejemplo n.º 3
0
def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0):

    stats = es_query(index='relvals_stats_*',
                 query=format('(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s',
                              release_cycle=release+"_*",
                              architecture=arch
                             ),
                 start_time=1000*int(time()-(86400*lastNdays)),
                     end_time=1000*int(time()),scroll=True)
    return stats['hits']['hits']
Ejemplo n.º 4
0
def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0):

    stats = es_query(
        index='relvals_stats_*',
        query=format(
            '(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s',
            release_cycle=release + "_*",
            architecture=arch),
        start_time=1000 * int(time() - (86400 * lastNdays)),
        end_time=1000 * int(time()),
        scroll=True)
    return stats['hits']['hits']
Ejemplo n.º 5
0
    t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
    thrds.append(t)
    t.start()
for t in thrds:
    t.join()

#Get Workflow stats from ES
print("Getting Workflow stats from ES.....")
stats = {}
release_cycle = str.lower(cmssw_ver.split("_X_")[0] + "_X")
while True:
    stats = es_query(
        index='relvals_stats_*',
        query=format(
            '(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
            release_cycle=release_cycle + "_*",
            architecture=arch,
            workflows=wf_query[4:]),
        start_time=1000 * int(time() - (86400 * 30)),
        end_time=1000 * int(time()),
        scroll=True)
    if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (
            not stats['hits']['hits']):
        xrelease_cycle = str.lower("_".join(cmssw_ver.split("_", 4)[0:3]) +
                                   "_X")
        if xrelease_cycle != release_cycle:
            release_cycle = xrelease_cycle
            print("Retry: Setting release cycle to ", release_cycle)
            continue
    break
Ejemplo n.º 6
0
    while len(thrds) >= jobs:
        sleep(1)
        thrds = [t for t in thrds if t.is_alive()]
    t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
    thrds.append(t)
    t.start()
for t in thrds:
    t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."
stats = es_query(
    index='relvals_stats_*',
    query=format(
        'exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
        release_cycle=cmssw_ver.split("_X_")[0] + "_X_*",
        architecture=arch,
        workflows=wf_query[4:]),
    start_time=1000 * int(time() - (86400 * 10)),
    end_time=1000 * int(time()))

wf_stats = es_workflow_stats(stats)

#Create Jobs
print "Creating jobs.json file ...."
jobs = {}
jobs["final_job"] = "echo All Done"
jobs["final_per_group"] = {
    "command": SCRIPT_DIR + "/workflow_final.py %(jobs_results)s",
    "cpu": 10,
    "rss": 10 * 1024 * 1024,
Ejemplo n.º 7
0
    "architecture": "slc6_amd64_gcc530",
    "release_cycle": "CMSSW_9_3_X_*",
    "workflows": ""
}
workflows = []
wfs = []

for wf in [w for w in sys.argv[1].split(",") if w]:
    wfs.append(wf)
while wfs:
    queryInfo["workflows"] = " OR ".join(wfs[0:50])
    wfs = wfs[50:]
    wf_hits = es_query(
        index='relvals_stats_*',
        query=format(
            'release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
            **queryInfo),
        start_time=int(time() * 1000) - int(86400 * 1000 * 7),
        end_time=int(time() * 1000))
    stats = es_workflow_stats(wf_hits)
    for wf in stats:
        wf_weight = 0
        for step in stats[wf]:
            stat = stats[wf][step]
            wf_weight += stat["cpu"]
        workflows.append({"workflow": wf, "weight": wf_weight})
order_workflows = []
for item in sorted(workflows, key=itemgetter("weight"), reverse=True):
    order_workflows.append([item["workflow"], item["weight"]])

wfs = []
Ejemplo n.º 8
0
    sleep(1)
    thrds = [ t for t in thrds if t.is_alive() ]
  t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
  thrds.append(t)
  t.start()
for t in thrds: t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."
stats = {}
release_cycle=str.lower(cmssw_ver.split("_X_")[0]+"_X")
while True:
  stats = es_query(index='relvals_stats_*',
                 query=format('(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
                              release_cycle=release_cycle+"_*",
                              architecture=arch,
                              workflows=wf_query[4:]
                             ),
                 start_time=1000*int(time()-(86400*30)),
                 end_time=1000*int(time()))
  if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (not stats['hits']['hits']):
    xrelease_cycle = str.lower("_".join(cmssw_ver.split("_",4)[0:3])+"_X")
    if xrelease_cycle!=release_cycle:
      release_cycle=xrelease_cycle
      print "Retry: Setting release cycle to ",release_cycle
      continue
  break

wf_stats = es_workflow_stats(stats)

#Create Jobs
Ejemplo n.º 9
0
    t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."

stats = {}
release_cycle = cmssw_ver.split("_X_")[0] + "_X"
st = 1000 * int(time() - (86400 * 10))
et = 1000 * int(time())
use_krb = False

while True:

    es_q = format(
        'exit_code:0 AND release:%(release_cycle)s* AND architecture:%(architecture)s AND (%(workflows)s)',
        release_cycle=release_cycle,
        architecture=arch,
        workflows=wf_query[4:])
    if '_DEVEL_' in cmssw_ver:
        use_krb = True
        release_cycle = (cmssw_ver.split("_X_")[0] + "_X").lower()
        es_q = format(
            'exit_code:0 AND release:/%(release_cycle)s.*/ AND architecture:/%(architecture)s.*/ AND (%(workflows)s)',
            release_cycle=release_cycle,
            architecture=arch,
            workflows=wf_query[4:])

    if use_krb:
        stats = es_krb_query_exe(index='cmssdt-relvals_stats_summary*',
                                 query=es_q,
                                 start_time=st,
Ejemplo n.º 10
0
for wf in sys.argv[1].split(","):
  wf_query+=" OR workflow:"+wf
  while len(thrds)>=jobs:
    sleep(1)
    thrds = [ t for t in thrds if t.is_alive() ]
  t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
  thrds.append(t)
  t.start()
for t in thrds: t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."
stats = es_query(index='relvals_stats_*',
                 query=format('exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
                              release_cycle=cmssw_ver.split("_X_")[0]+"_X_*",
                              architecture=arch,
                              workflows=wf_query[4:]
                             ),
                 start_time=1000*int(time()-(86400*10)),
                 end_time=1000*int(time()))

wf_stats = es_workflow_stats(stats)

#Create Jobs
print "Creating jobs.json file ...."
jobs = {}
jobs["final_job"] = "echo All Done"
jobs["final_per_group"] = {"command": SCRIPT_DIR+"/workflow_final.py %(jobs_results)s", "cpu": 10,  "rss": 10*1024*1024, "time" : 30}
jobs["env"]={}
jobs["jobs"]=[]
e , o = getstatusoutput ("find . -name workflow.log -type f | sed 's|^./||'")