def createJob(workflow, cmssw_ver, arch): workflow_args = FixWFArgs(cmssw_ver, arch, workflow, GetMatrixOptions(cmssw_ver, arch)) cmd = format("rm -rf %(workflow)s %(workflow)s_*; mkdir %(workflow)s; cd %(workflow)s; PATH=%(das_utils)s:$PATH runTheMatrix.py --maxSteps=0 -l %(workflow)s %(workflow_args)s",workflow=workflow,workflow_args=workflow_args, das_utils=CMS_BOT_DIR+"/das-utils") print "Running ",cmd getstatusoutput(cmd) try: workflow_dir = glob.glob(format("%(workflow)s/%(workflow)s_*", workflow=workflow))[0] getstatusoutput(format("mv %(workflow)s/runall-report-step123-.log %(workflow_dir)s/workflow.log; touch %(workflow_dir)s/cmdLog; mv %(workflow_dir)s .; rm -rf %(workflow)s", workflow=workflow, workflow_dir=workflow_dir)) print "Commands for workflow %s generated" % workflow except Exception, e: print "ERROR: Creating workflow job:",workflow,str(e) getstatusoutput("rm -rf %s %s_*" % (workflow,workflow))
def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0): stats = es_query(index='relvals_stats_*', query=format('(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s', release_cycle=release+"_*", architecture=arch ), start_time=1000*int(time()-(86400*lastNdays)), end_time=1000*int(time()),scroll=True) return stats['hits']['hits']
def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0): stats = es_query( index='relvals_stats_*', query=format( '(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s', release_cycle=release + "_*", architecture=arch), start_time=1000 * int(time() - (86400 * lastNdays)), end_time=1000 * int(time()), scroll=True) return stats['hits']['hits']
t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print("Getting Workflow stats from ES.....") stats = {} release_cycle = str.lower(cmssw_ver.split("_X_")[0] + "_X") while True: stats = es_query( index='relvals_stats_*', query=format( '(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=release_cycle + "_*", architecture=arch, workflows=wf_query[4:]), start_time=1000 * int(time() - (86400 * 30)), end_time=1000 * int(time()), scroll=True) if (not 'hits' in stats) or (not 'hits' in stats['hits']) or ( not stats['hits']['hits']): xrelease_cycle = str.lower("_".join(cmssw_ver.split("_", 4)[0:3]) + "_X") if xrelease_cycle != release_cycle: release_cycle = xrelease_cycle print("Retry: Setting release cycle to ", release_cycle) continue break
while len(thrds) >= jobs: sleep(1) thrds = [t for t in thrds if t.is_alive()] t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = es_query( index='relvals_stats_*', query=format( 'exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=cmssw_ver.split("_X_")[0] + "_X_*", architecture=arch, workflows=wf_query[4:]), start_time=1000 * int(time() - (86400 * 10)), end_time=1000 * int(time())) wf_stats = es_workflow_stats(stats) #Create Jobs print "Creating jobs.json file ...." jobs = {} jobs["final_job"] = "echo All Done" jobs["final_per_group"] = { "command": SCRIPT_DIR + "/workflow_final.py %(jobs_results)s", "cpu": 10, "rss": 10 * 1024 * 1024,
"architecture": "slc6_amd64_gcc530", "release_cycle": "CMSSW_9_3_X_*", "workflows": "" } workflows = [] wfs = [] for wf in [w for w in sys.argv[1].split(",") if w]: wfs.append(wf) while wfs: queryInfo["workflows"] = " OR ".join(wfs[0:50]) wfs = wfs[50:] wf_hits = es_query( index='relvals_stats_*', query=format( 'release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', **queryInfo), start_time=int(time() * 1000) - int(86400 * 1000 * 7), end_time=int(time() * 1000)) stats = es_workflow_stats(wf_hits) for wf in stats: wf_weight = 0 for step in stats[wf]: stat = stats[wf][step] wf_weight += stat["cpu"] workflows.append({"workflow": wf, "weight": wf_weight}) order_workflows = [] for item in sorted(workflows, key=itemgetter("weight"), reverse=True): order_workflows.append([item["workflow"], item["weight"]]) wfs = []
sleep(1) thrds = [ t for t in thrds if t.is_alive() ] t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = {} release_cycle=str.lower(cmssw_ver.split("_X_")[0]+"_X") while True: stats = es_query(index='relvals_stats_*', query=format('(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=release_cycle+"_*", architecture=arch, workflows=wf_query[4:] ), start_time=1000*int(time()-(86400*30)), end_time=1000*int(time())) if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (not stats['hits']['hits']): xrelease_cycle = str.lower("_".join(cmssw_ver.split("_",4)[0:3])+"_X") if xrelease_cycle!=release_cycle: release_cycle=xrelease_cycle print "Retry: Setting release cycle to ",release_cycle continue break wf_stats = es_workflow_stats(stats) #Create Jobs
t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = {} release_cycle = cmssw_ver.split("_X_")[0] + "_X" st = 1000 * int(time() - (86400 * 10)) et = 1000 * int(time()) use_krb = False while True: es_q = format( 'exit_code:0 AND release:%(release_cycle)s* AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=release_cycle, architecture=arch, workflows=wf_query[4:]) if '_DEVEL_' in cmssw_ver: use_krb = True release_cycle = (cmssw_ver.split("_X_")[0] + "_X").lower() es_q = format( 'exit_code:0 AND release:/%(release_cycle)s.*/ AND architecture:/%(architecture)s.*/ AND (%(workflows)s)', release_cycle=release_cycle, architecture=arch, workflows=wf_query[4:]) if use_krb: stats = es_krb_query_exe(index='cmssdt-relvals_stats_summary*', query=es_q, start_time=st,
for wf in sys.argv[1].split(","): wf_query+=" OR workflow:"+wf while len(thrds)>=jobs: sleep(1) thrds = [ t for t in thrds if t.is_alive() ] t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = es_query(index='relvals_stats_*', query=format('exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=cmssw_ver.split("_X_")[0]+"_X_*", architecture=arch, workflows=wf_query[4:] ), start_time=1000*int(time()-(86400*10)), end_time=1000*int(time())) wf_stats = es_workflow_stats(stats) #Create Jobs print "Creating jobs.json file ...." jobs = {} jobs["final_job"] = "echo All Done" jobs["final_per_group"] = {"command": SCRIPT_DIR+"/workflow_final.py %(jobs_results)s", "cpu": 10, "rss": 10*1024*1024, "time" : 30} jobs["env"]={} jobs["jobs"]=[] e , o = getstatusoutput ("find . -name workflow.log -type f | sed 's|^./||'")