def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0):

    stats = es_query(index='relvals_stats_*',
                 query=format('(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s',
                              release_cycle=release+"_*",
                              architecture=arch
                             ),
                 start_time=1000*int(time()-(86400*lastNdays)),
                     end_time=1000*int(time()),scroll=True)
    return stats['hits']['hits']
def getWorkflowStatsFromES(release='*', arch='*', lastNdays=7, page_size=0):

    stats = es_query(
        index='relvals_stats_*',
        query=format(
            '(NOT cpu_max:0) AND (exit_code:0) AND release:%(release_cycle)s AND architecture:%(architecture)s',
            release_cycle=release + "_*",
            architecture=arch),
        start_time=1000 * int(time() - (86400 * lastNdays)),
        end_time=1000 * int(time()),
        scroll=True)
    return stats['hits']['hits']
Exemple #3
0
    t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
    thrds.append(t)
    t.start()
for t in thrds:
    t.join()

#Get Workflow stats from ES
print("Getting Workflow stats from ES.....")
stats = {}
release_cycle = str.lower(cmssw_ver.split("_X_")[0] + "_X")
while True:
    stats = es_query(
        index='relvals_stats_*',
        query=format(
            '(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
            release_cycle=release_cycle + "_*",
            architecture=arch,
            workflows=wf_query[4:]),
        start_time=1000 * int(time() - (86400 * 30)),
        end_time=1000 * int(time()),
        scroll=True)
    if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (
            not stats['hits']['hits']):
        xrelease_cycle = str.lower("_".join(cmssw_ver.split("_", 4)[0:3]) +
                                   "_X")
        if xrelease_cycle != release_cycle:
            release_cycle = xrelease_cycle
            print("Retry: Setting release cycle to ", release_cycle)
            continue
    break

wf_stats = es_workflow_stats(stats)
Exemple #4
0
else: cmsbot_dir=dirname(dirname(abspath(argv[0])))
sys.path.insert(0,cmsbot_dir)

from es_utils import es_query

if __name__ == "__main__":
  from optparse import OptionParser  
  parser = OptionParser(usage="%prog ")
  parser.add_option("-r", "--release",      dest="release", help="Release filter",   type=str, default=".*")
  parser.add_option("-a", "--architecture", dest="arch",    help="SCRAM_ARCH filter. Production arch for a release cycle is used if found otherwise slc6_amd64_gcc530",   type=str, default=None)
  parser.add_option("-d", "--days",         dest="days",    help="Files access in last n days",   type=int, default=7)
  parser.add_option("-j", "--job",          dest="job",     help="Parallel jobs to run",   type=int, default=4)
  parser.add_option("-p", "--page",         dest="page_size", help="Page size, default 0 means no page and get all data in one go",  type=int, default=0)
  opts, args = parser.parse_args()
  
  if not opts.arch:
    if opts.release==".*": opts.arch=".*"
    else:
      script_path = abspath(dirname(argv[0]))
      err, out = getstatusoutput("grep 'RELEASE_QUEUE=%s;' %s/config.map | grep -v 'DISABLED=1;' | grep 'PROD_ARCH=1;' | tr ';' '\n' | grep 'SCRAM_ARCH=' | sed 's|.*=||'" % (opts.release, script_path))
      if err: opts.arch="slc6_amd64_gcc530"
      else: opts.arch=out
  if opts.release!=".*": opts.release=opts.release+".*"

  end_time = int(time()*1000)
  start_time = end_time -int(86400*1000*opts.days)
  query = "release:/%s/ AND architecture:/%s/" % (opts.release.lower(), opts.arch)
  es_data = es_query('ib-dataset-*', query, start_time,end_time,scroll=True,page_size=10000)
  print json.dumps(es_data, indent=2, sort_keys=True, separators=(',',': '))

Exemple #5
0
    while len(thrds) >= jobs:
        sleep(1)
        thrds = [t for t in thrds if t.is_alive()]
    t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
    thrds.append(t)
    t.start()
for t in thrds:
    t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."
stats = es_query(
    index='relvals_stats_*',
    query=format(
        'exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
        release_cycle=cmssw_ver.split("_X_")[0] + "_X_*",
        architecture=arch,
        workflows=wf_query[4:]),
    start_time=1000 * int(time() - (86400 * 10)),
    end_time=1000 * int(time()))

wf_stats = es_workflow_stats(stats)

#Create Jobs
print "Creating jobs.json file ...."
jobs = {}
jobs["final_job"] = "echo All Done"
jobs["final_per_group"] = {
    "command": SCRIPT_DIR + "/workflow_final.py %(jobs_results)s",
    "cpu": 10,
    "rss": 10 * 1024 * 1024,
Exemple #6
0
parser.add_option("-d",
                  "--days",
                  dest="days",
                  default=7,
                  type="int",
                  help="Number of days to search data for, default is 7")
parser.add_option(
    "-o",
    "--offset",
    dest="offset",
    default=0,
    type="int",
    help="Number of days to offset from the current day. Default is 0")
opts, args = parser.parse_args()

end_time = int(time()) - (opts.offset * 86400)
stats = es_query(index=opts.index,
                 query=opts.query,
                 start_time=1000 * (end_time - (86400 * opts.days)),
                 end_time=end_time * 1000)
matched = []
for h in stats['hits']['hits']:
    hit = h["_source"]
    if 'TBranchElement::GetBasket' in hit['exception']: matched.append(hit)

for hit in sorted(matched, key=itemgetter('@timestamp')):
    print "Release:", hit['release'], "\nArchitecture:", hit[
        'architecture'], "\nWorkflow:", hit['workflow'] + "/" + hit[
            'step'], "\nException:", hit[
                'exception'], "\n-----------------------------------------"
Exemple #7
0
#!/usr/bin/env python
from operator import itemgetter
from time import sleep, time
from es_utils import es_query, format, es_workflow_stats
from optparse import OptionParser
parser = OptionParser(usage="%prog [-m|--memory <memory>] [-c|--cpu <cpu>] [-j|--jobs <jobs-json-file>]")
parser.add_option("-i", "--index",  dest="index",  default=None, help="Name of the ElasticSearch Index e.g. ib-matrix-*")
parser.add_option("-q", "--query",  dest="query",  default=None, help="Query string e.g. release:RELEASE AND architecture:ARCH")
parser.add_option("-d", "--days",   dest="days",   default=7, type="int", help="Number of days to search data for, default is 7")
parser.add_option("-o", "--offset", dest="offset", default=0, type="int", help="Number of days to offset from the current day. Default is 0")
opts, args = parser.parse_args()

end_time=int(time())-(opts.offset*86400)
stats = es_query(index=opts.index,query=opts.query,start_time=1000*(end_time-(86400*opts.days)),end_time=end_time*1000)
matched=[]
for h in stats['hits']['hits']:
  hit = h["_source"]
  if 'TBranchElement::GetBasket' in hit['exception']: matched.append(hit)

for hit in sorted(matched,key=itemgetter('@timestamp')):
    print "Release:",hit['release'],"\nArchitecture:",hit['architecture'],"\nWorkflow:",hit['workflow']+"/"+hit['step'],"\nException:",hit['exception'],"\n-----------------------------------------"

Exemple #8
0
        "--page",
        dest="page_size",
        help="Page size, default 0 means no page and get all data in one go",
        type=int,
        default=0)
    opts, args = parser.parse_args()

    if not opts.arch:
        if opts.release == ".*": opts.arch = ".*"
        else:
            script_path = abspath(dirname(argv[0]))
            err, out = run_cmd(
                "grep 'RELEASE_QUEUE=%s;' %s/config.map | grep -v 'DISABLED=1;' | grep 'PROD_ARCH=1;' | tr ';' '\n' | grep 'SCRAM_ARCH=' | sed 's|.*=||'"
                % (opts.release, script_path))
            if err: opts.arch = "slc6_amd64_gcc530"
            else: opts.arch = out
    if opts.release != ".*": opts.release = opts.release + ".*"

    end_time = int(time() * 1000)
    start_time = end_time - int(86400 * 1000 * opts.days)
    query = "release:/%s/ AND architecture:/%s/" % (opts.release.lower(),
                                                    opts.arch)
    es_data = es_query('ib-dataset-*',
                       query,
                       start_time,
                       end_time,
                       scroll=True,
                       fields=["lfn"])
    print(json.dumps(es_data, indent=2, sort_keys=True,
                     separators=(',', ': ')))
Exemple #9
0
        "--page",
        dest="page_size",
        help="Page size, default 0 means no page and get all data in one go",
        type=int,
        default=0)
    opts, args = parser.parse_args()

    if not opts.arch:
        if opts.release == ".*": opts.arch = ".*"
        else:
            script_path = abspath(dirname(argv[0]))
            err, out = run_cmd(
                "grep 'RELEASE_QUEUE=%s;' %s/config.map | grep -v 'DISABLED=1;' | grep 'PROD_ARCH=1;' | tr ';' '\n' | grep 'SCRAM_ARCH=' | sed 's|.*=||'"
                % (opts.release, script_path))
            if err: opts.arch = "slc6_amd64_gcc530"
            else: opts.arch = out
    if opts.release != ".*": opts.release = opts.release + ".*"

    end_time = int(time() * 1000)
    start_time = end_time - int(86400 * 1000 * opts.days)
    query = "release:/%s/ AND architecture:/%s/" % (opts.release.lower(),
                                                    opts.arch)
    es_data = es_query('ib-dataset-*',
                       query,
                       start_time,
                       end_time,
                       scroll=True,
                       page_size=10000)
    print(json.dumps(es_data, indent=2, sort_keys=True,
                     separators=(',', ': ')))
Exemple #10
0
    "architecture": "slc6_amd64_gcc530",
    "release_cycle": "CMSSW_9_3_X_*",
    "workflows": ""
}
workflows = []
wfs = []

for wf in [w for w in sys.argv[1].split(",") if w]:
    wfs.append(wf)
while wfs:
    queryInfo["workflows"] = " OR ".join(wfs[0:50])
    wfs = wfs[50:]
    wf_hits = es_query(
        index='relvals_stats_*',
        query=format(
            'release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
            **queryInfo),
        start_time=int(time() * 1000) - int(86400 * 1000 * 7),
        end_time=int(time() * 1000))
    stats = es_workflow_stats(wf_hits)
    for wf in stats:
        wf_weight = 0
        for step in stats[wf]:
            stat = stats[wf][step]
            wf_weight += stat["cpu"]
        workflows.append({"workflow": wf, "weight": wf_weight})
order_workflows = []
for item in sorted(workflows, key=itemgetter("weight"), reverse=True):
    order_workflows.append([item["workflow"], item["weight"]])

wfs = []
Exemple #11
0
    sleep(1)
    thrds = [ t for t in thrds if t.is_alive() ]
  t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
  thrds.append(t)
  t.start()
for t in thrds: t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."
stats = {}
release_cycle=str.lower(cmssw_ver.split("_X_")[0]+"_X")
while True:
  stats = es_query(index='relvals_stats_*',
                 query=format('(NOT cpu_max:0) AND exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
                              release_cycle=release_cycle+"_*",
                              architecture=arch,
                              workflows=wf_query[4:]
                             ),
                 start_time=1000*int(time()-(86400*30)),
                 end_time=1000*int(time()))
  if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (not stats['hits']['hits']):
    xrelease_cycle = str.lower("_".join(cmssw_ver.split("_",4)[0:3])+"_X")
    if xrelease_cycle!=release_cycle:
      release_cycle=xrelease_cycle
      print "Retry: Setting release cycle to ",release_cycle
      continue
  break

wf_stats = es_workflow_stats(stats)

#Create Jobs
print "Creating jobs.json file ...."
Exemple #12
0
        use_krb = True
        release_cycle = (cmssw_ver.split("_X_")[0] + "_X").lower()
        es_q = format(
            'exit_code:0 AND release:/%(release_cycle)s.*/ AND architecture:/%(architecture)s.*/ AND (%(workflows)s)',
            release_cycle=release_cycle,
            architecture=arch,
            workflows=wf_query[4:])

    if use_krb:
        stats = es_krb_query_exe(index='cmssdt-relvals_stats_summary*',
                                 query=es_q,
                                 start_time=st,
                                 end_time=et)
    else:
        stats = es_query(index='relvals_stats_*',
                         query=es_q,
                         start_time=st,
                         end_time=et)

    if (not 'hits' in stats) or (not 'hits' in stats['hits']) or (
            not stats['hits']['hits']):
        xrelease_cycle = "_".join(cmssw_ver.split("_", 4)[0:3]) + "_X"
        if xrelease_cycle != release_cycle:
            release_cycle = xrelease_cycle
            print "Retry: Setting release cycle to ", release_cycle
            continue
    break

wf_stats = es_workflow_stats(stats)

#Create Jobs
print "Creating jobs.json file ...."
for wf in sys.argv[1].split(","):
  wf_query+=" OR workflow:"+wf
  while len(thrds)>=jobs:
    sleep(1)
    thrds = [ t for t in thrds if t.is_alive() ]
  t = threading.Thread(target=createJob, args=(wf, cmssw_ver, arch))
  thrds.append(t)
  t.start()
for t in thrds: t.join()

#Get Workflow stats from ES
print "Getting Workflow stats from ES....."
stats = es_query(index='relvals_stats_*',
                 query=format('exit_code:0 AND release:%(release_cycle)s AND architecture:%(architecture)s AND (%(workflows)s)',
                              release_cycle=cmssw_ver.split("_X_")[0]+"_X_*",
                              architecture=arch,
                              workflows=wf_query[4:]
                             ),
                 start_time=1000*int(time()-(86400*10)),
                 end_time=1000*int(time()))

wf_stats = es_workflow_stats(stats)

#Create Jobs
print "Creating jobs.json file ...."
jobs = {}
jobs["final_job"] = "echo All Done"
jobs["final_per_group"] = {"command": SCRIPT_DIR+"/workflow_final.py %(jobs_results)s", "cpu": 10,  "rss": 10*1024*1024, "time" : 30}
jobs["env"]={}
jobs["jobs"]=[]
e , o = getstatusoutput ("find . -name workflow.log -type f | sed 's|^./||'")
for cmds_log in o.split("\n"):