def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0): stats = es_query(index='relvals_stats_*', query=format('(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s', release_cycle=release+"_*", architecture=arch ), start_time=1000*int(time()-(86400*lastNdays)), end_time=1000*int(time()),scroll=True) return stats['hits']['hits']
def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0): stats = es_query( index='relvals_stats_*', query=format( '(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s', release_cycle=release + "_*", architecture=arch), start_time=1000 * int(time() - (86400 * lastNdays)), end_time=1000 * int(time()), scroll=True) return stats['hits']['hits']
t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print("Getting Workflow stats from ES.....") stats = {} release_cycle = str.lower(cmssw_ver.split("_X_")[0] + "_X") while True: stats = es_query( index='relvals_stats_*', query=format( '(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=release_cycle + "_*", architecture=arch, workflows=wf_query[4:]), start_time=1000 * int(time() - (86400 * 30)), end_time=1000 * int(time()), scroll=True) if (not 'hits' in stats) or (not 'hits' in stats['hits']) or ( not stats['hits']['hits']): xrelease_cycle = str.lower("_".join(cmssw_ver.split("_", 4)[0:3]) + "_X") if xrelease_cycle != release_cycle: release_cycle = xrelease_cycle print("Retry: Setting release cycle to ", release_cycle) continue break wf_stats = es_workflow_stats(stats)
else: cmsbot_dir=dirname(dirname(abspath(argv[0]))) sys.path.insert(0,cmsbot_dir) from es_utils import es_query if __name__ == "__main__": from optparse import OptionParser parser = OptionParser(usage="%prog ") parser.add_option("-r", "--release", dest="release", help="Release filter", type=str, default=".*") parser.add_option("-a", "--architecture", dest="arch", help="SCRAM_ARCH filter. Production arch for a release cycle is used if found otherwise slc6_amd64_gcc530", type=str, default=None) parser.add_option("-d", "--days", dest="days", help="Files access in last n days", type=int, default=7) parser.add_option("-j", "--job", dest="job", help="Parallel jobs to run", type=int, default=4) parser.add_option("-p", "--page", dest="page_size", help="Page size, default 0 means no page and get all data in one go", type=int, default=0) opts, args = parser.parse_args() if not opts.arch: if opts.release==".*": opts.arch=".*" else: script_path = abspath(dirname(argv[0])) err, out = getstatusoutput("grep 'RELEASE_QUEUE=%s;' %s/config.map | grep -v 'DISABLED=1;' | grep 'PROD_ARCH=1;' | tr ';' '\n' | grep 'SCRAM_ARCH=' | sed 's|.*=||'" % (opts.release, script_path)) if err: opts.arch="slc6_amd64_gcc530" else: opts.arch=out if opts.release!=".*": opts.release=opts.release+".*" end_time = int(time()*1000) start_time = end_time -int(86400*1000*opts.days) query = "release:/%s/ AND architecture:/%s/" % (opts.release.lower(), opts.arch) es_data = es_query('ib-dataset-*', query, start_time,end_time,scroll=True,page_size=10000) print json.dumps(es_data, indent=2, sort_keys=True, separators=(',',': '))
while len(thrds) >= jobs: sleep(1) thrds = [t for t in thrds if t.is_alive()] t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = es_query( index='relvals_stats_*', query=format( 'exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=cmssw_ver.split("_X_")[0] + "_X_*", architecture=arch, workflows=wf_query[4:]), start_time=1000 * int(time() - (86400 * 10)), end_time=1000 * int(time())) wf_stats = es_workflow_stats(stats) #Create Jobs print "Creating jobs.json file ...." jobs = {} jobs["final_job"] = "echo All Done" jobs["final_per_group"] = { "command": SCRIPT_DIR + "/workflow_final.py %(jobs_results)s", "cpu": 10, "rss": 10 * 1024 * 1024,
parser.add_option("-d", "--days", dest="days", default=7, type="int", help="Number of days to search data for, default is 7") parser.add_option( "-o", "--offset", dest="offset", default=0, type="int", help="Number of days to offset from the current day. Default is 0") opts, args = parser.parse_args() end_time = int(time()) - (opts.offset * 86400) stats = es_query(index=opts.index, query=opts.query, start_time=1000 * (end_time - (86400 * opts.days)), end_time=end_time * 1000) matched = [] for h in stats['hits']['hits']: hit = h["_source"] if 'TBranchElement::GetBasket' in hit['exception']: matched.append(hit) for hit in sorted(matched, key=itemgetter('@timestamp')): print "Release:", hit['release'], "\nArchitecture:", hit[ 'architecture'], "\nWorkflow:", hit['workflow'] + "/" + hit[ 'step'], "\nException:", hit[ 'exception'], "\n-----------------------------------------"
#!/usr/bin/env python from operator import itemgetter from time import sleep, time from es_utils import es_query, format, es_workflow_stats from optparse import OptionParser parser = OptionParser(usage="%prog [-m|--memory <memory>] [-c|--cpu <cpu>] [-j|--jobs <jobs-json-file>]") parser.add_option("-i", "--index", dest="index", default=None, help="Name of the ElasticSearch Index e.g. ib-matrix-*") parser.add_option("-q", "--query", dest="query", default=None, help="Query string e.g. release:RELEASE AND architecture:ARCH") parser.add_option("-d", "--days", dest="days", default=7, type="int", help="Number of days to search data for, default is 7") parser.add_option("-o", "--offset", dest="offset", default=0, type="int", help="Number of days to offset from the current day. Default is 0") opts, args = parser.parse_args() end_time=int(time())-(opts.offset*86400) stats = es_query(index=opts.index,query=opts.query,start_time=1000*(end_time-(86400*opts.days)),end_time=end_time*1000) matched=[] for h in stats['hits']['hits']: hit = h["_source"] if 'TBranchElement::GetBasket' in hit['exception']: matched.append(hit) for hit in sorted(matched,key=itemgetter('@timestamp')): print "Release:",hit['release'],"\nArchitecture:",hit['architecture'],"\nWorkflow:",hit['workflow']+"/"+hit['step'],"\nException:",hit['exception'],"\n-----------------------------------------"
"--page", dest="page_size", help="Page size, default 0 means no page and get all data in one go", type=int, default=0) opts, args = parser.parse_args() if not opts.arch: if opts.release == ".*": opts.arch = ".*" else: script_path = abspath(dirname(argv[0])) err, out = run_cmd( "grep 'RELEASE_QUEUE=%s;' %s/config.map | grep -v 'DISABLED=1;' | grep 'PROD_ARCH=1;' | tr ';' '\n' | grep 'SCRAM_ARCH=' | sed 's|.*=||'" % (opts.release, script_path)) if err: opts.arch = "slc6_amd64_gcc530" else: opts.arch = out if opts.release != ".*": opts.release = opts.release + ".*" end_time = int(time() * 1000) start_time = end_time - int(86400 * 1000 * opts.days) query = "release:/%s/ AND architecture:/%s/" % (opts.release.lower(), opts.arch) es_data = es_query('ib-dataset-*', query, start_time, end_time, scroll=True, fields=["lfn"]) print(json.dumps(es_data, indent=2, sort_keys=True, separators=(',', ': ')))
"--page", dest="page_size", help="Page size, default 0 means no page and get all data in one go", type=int, default=0) opts, args = parser.parse_args() if not opts.arch: if opts.release == ".*": opts.arch = ".*" else: script_path = abspath(dirname(argv[0])) err, out = run_cmd( "grep 'RELEASE_QUEUE=%s;' %s/config.map | grep -v 'DISABLED=1;' | grep 'PROD_ARCH=1;' | tr ';' '\n' | grep 'SCRAM_ARCH=' | sed 's|.*=||'" % (opts.release, script_path)) if err: opts.arch = "slc6_amd64_gcc530" else: opts.arch = out if opts.release != ".*": opts.release = opts.release + ".*" end_time = int(time() * 1000) start_time = end_time - int(86400 * 1000 * opts.days) query = "release:/%s/ AND architecture:/%s/" % (opts.release.lower(), opts.arch) es_data = es_query('ib-dataset-*', query, start_time, end_time, scroll=True, page_size=10000) print(json.dumps(es_data, indent=2, sort_keys=True, separators=(',', ': ')))
"architecture": "slc6_amd64_gcc530", "release_cycle": "CMSSW_9_3_X_*", "workflows": "" } workflows = [] wfs = [] for wf in [w for w in sys.argv[1].split(",") if w]: wfs.append(wf) while wfs: queryInfo["workflows"] = " OR ".join(wfs[0:50]) wfs = wfs[50:] wf_hits = es_query( index='relvals_stats_*', query=format( 'release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', **queryInfo), start_time=int(time() * 1000) - int(86400 * 1000 * 7), end_time=int(time() * 1000)) stats = es_workflow_stats(wf_hits) for wf in stats: wf_weight = 0 for step in stats[wf]: stat = stats[wf][step] wf_weight += stat["cpu"] workflows.append({"workflow": wf, "weight": wf_weight}) order_workflows = [] for item in sorted(workflows, key=itemgetter("weight"), reverse=True): order_workflows.append([item["workflow"], item["weight"]]) wfs = []
sleep(1) thrds = [ t for t in thrds if t.is_alive() ] t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = {} release_cycle=str.lower(cmssw_ver.split("_X_")[0]+"_X") while True: stats = es_query(index='relvals_stats_*', query=format('(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=release_cycle+"_*", architecture=arch, workflows=wf_query[4:] ), start_time=1000*int(time()-(86400*30)), end_time=1000*int(time())) if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (not stats['hits']['hits']): xrelease_cycle = str.lower("_".join(cmssw_ver.split("_",4)[0:3])+"_X") if xrelease_cycle!=release_cycle: release_cycle=xrelease_cycle print "Retry: Setting release cycle to ",release_cycle continue break wf_stats = es_workflow_stats(stats) #Create Jobs print "Creating jobs.json file ...."
use_krb = True release_cycle = (cmssw_ver.split("_X_")[0] + "_X").lower() es_q = format( 'exit_code:0 AND release:/%(release_cycle)s.*/ AND architecture:/%(architecture)s.*/ AND (%(workflows)s)', release_cycle=release_cycle, architecture=arch, workflows=wf_query[4:]) if use_krb: stats = es_krb_query_exe(index='cmssdt-relvals_stats_summary*', query=es_q, start_time=st, end_time=et) else: stats = es_query(index='relvals_stats_*', query=es_q, start_time=st, end_time=et) if (not 'hits' in stats) or (not 'hits' in stats['hits']) or ( not stats['hits']['hits']): xrelease_cycle = "_".join(cmssw_ver.split("_", 4)[0:3]) + "_X" if xrelease_cycle != release_cycle: release_cycle = xrelease_cycle print "Retry: Setting release cycle to ", release_cycle continue break wf_stats = es_workflow_stats(stats) #Create Jobs print "Creating jobs.json file ...."
for wf in sys.argv[1].split(","): wf_query+=" OR workflow:"+wf while len(thrds)>=jobs: sleep(1) thrds = [ t for t in thrds if t.is_alive() ] t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch)) thrds.append(t) t.start() for t in thrds: t.join() #Get Workflow stats from ES print "Getting Workflow stats from ES....." stats = es_query(index='relvals_stats_*', query=format('exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)', release_cycle=cmssw_ver.split("_X_")[0]+"_X_*", architecture=arch, workflows=wf_query[4:] ), start_time=1000*int(time()-(86400*10)), end_time=1000*int(time())) wf_stats = es_workflow_stats(stats) #Create Jobs print "Creating jobs.json file ...." jobs = {} jobs["final_job"] = "echo All Done" jobs["final_per_group"] = {"command": SCRIPT_DIR+"/workflow_final.py %(jobs_results)s", "cpu": 10, "rss": 10*1024*1024, "time" : 30} jobs["env"]={} jobs["jobs"]=[] e , o = getstatusoutput ("find . -name workflow.log -type f | sed 's|^./||'") for cmds_log in o.split("\n"):