print ' number of events = ' + str(args.events) print ' use McM prepID = ' + str(mcm) print ' skipexisting = ' + str(skipexisting) print das_cmd = "/cvmfs/cms.cern.ch/common/dasgoclient" # if mcm is specified, retrieve dataset name from prepID: if mcm: if "/" in str(args.inputdataset): print "not a McM prepID format, please check" sys.exit(1) # load McM sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/') from rest import restful mcm = restful() # retrieve request with given prepid temp = sys.stdout f = open('/dev/null', 'w') sys.stdout = f request = mcm.getA('requests', str(args.inputdataset)) sys.stdout = temp if debug: print 'request prepid',request['prepid'] # search dataset name as returned by mcm dataset_used = str(request['output_dataset'][0]) primary_dataset_name = dataset_used.split('/')[1] else: # search dataset name as name + campaign + datatier primary_dataset_name = args.inputdataset.split('/')[1] command=das_cmd+" --limit=0 --query=\"dataset dataset=/"+primary_dataset_name+"/*"+args.campaign+"*/"+args.datatier+"\"" dataset_used = commands.getstatusoutput(command)[1].split("\n")
#!/usr/bin/env python import argparse, getpass, re, sys parser = argparse.ArgumentParser() parser.add_argument("--user", default=getpass.getuser()) parser.add_argument("-v", "--verbose", action="store_true") args = parser.parse_args() sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/') import rest mcm = rest.restful() requests = mcm.get("requests", query="prepid=HIG-RunIIFall17wmLHEGS-*&actor=" + args.user) firstchain = "HIG-chain_RunIIFall17wmLHEGS_flowRunIIFall17DRPremix_flowRunIIFall17MiniAODv2_flowRunIIFall17NanoAOD-[0-9]*" secondchain = "HIG-chain_RunIIFall17wmLHEGS_flowRunIIFall17DRPremixPU2017_flowRunIIFall17MiniAODv2_flowRunIIFall17NanoAOD-[0-9]*" for req in requests: if not req["member_of_chain"]: continue chains = [_ for _ in req["member_of_chain"] if "NanoAOD" in _] chains.sort(reverse=True) if len(chains) == 2 and re.match(firstchain, chains[0]) and re.match( secondchain, chains[1]): print "good:", req["prepid"] else: print "bad: ", req["prepid"] if args.verbose: if len(chains) == 1: print " ", "only one chain"
import os import sys #if not os.path.isfile('rest.py'): # print "getting the mcm rest api" # os.system('ln -s /afs/cern.ch/cms/PPD/PdmV/tools/McM/rest.py') from rest import restful McM = restful(dev=False) dataset = sys.argv[1] rs = McM.getA('requests', query='produce=%s' % dataset) if len(rs) > 1: print "this cannot really be" elif len(rs) == 0: print dataset, "is not produced via mcm" else: print dataset, "produced by", rs[0]['prepid'] r = rs[0] ## pull out the chains crs = McM.getA('chained_requests', query='contains=%s' % r['prepid']) if len(crs) > 1: print "unlikely to have more than one chain for an analysis dataset" elif len(crs) == 0: print r['prepid'], "is not in any chain" else: infos = [] for r in reversed(crs[0]['chain']): rr = McM.getA('requests', r)
def main_do(options): logger.info("Running main") if options.check: logger.info('Check') # we check if this script is already running with same parameters# checks = ['ps -f -u $USER'] for arg in sys.argv[1:]: checks.append('grep "%s"' % (arg.split('/')[-1].replace('--', ''))) checks.append('grep -v grep') check = filter(None, os.popen("|".join(checks)).read().split('\n')) if len(check) != 1: logger.error("Already running with that exact setting") logger.info(check) sys.exit(1) else: logger.info("ok to operate") start_time = time.asctime() global statsCouch, docs, FORCE # interface to the couchDB statsCouch = Interface(options.db + ':5984/stats') # get from stats couch the list of requests view = 'yearAgo' if options.do == 'update' else 'all' # in case we want to force update even older workflows if options.force: view = 'all' logger.info("Getting all stats ...") allDocs = statsCouch.get_view(view) docs = set([doc['id'] for doc in allDocs['rows']]) # remove the _design/stats if view == 'all': docs = set(filter(lambda doc: not doc.startswith('_'), docs)) logger.info("... done") nproc = 4 limit = None if options.test: limit = 10 if options.do == 'insert': logger.info('do = insert') # get from wm couch from statsMonitoring import parallel_test, get_requests_list logger.info("Getting all req ...") req_list = get_requests_list() logger.info("... done") # insert new requests, not already in stats couch into stats couch # insertAll(req_list,docs,options.search,limit) logger.info('Will filter') if options.search: req_list = filter(lambda req: options.search in req["request_name"], req_list) logger.info('%d requests after search' % (len(req_list))) # print "req_list: " % (req_list) # skip malformated ones req_list = filter(lambda req: "status" in req, req_list) logger.info('%d requests after skipping malformed' % (len(req_list))) # take only the ones not already in there req_list = filter(lambda req: req["request_name"] not in docs, req_list) logger.info('%d after taking only those not already in there' % (len(req_list))) # skip trying to insert aborted and rejected or failed # req_list = filter( lambda req : not req["status"] in ['aborted','rejected','failed','aborted-archived','rejected-archived','failed-archived'], req_list ) req_list = filter(lambda req: not req["status"] in ['aborted', 'rejected', 'failed', None], req_list) logger.info('%d after skipping aborted, rejected, failed and None' % (len(req_list))) # do not update TaskChain request statuses # req_list = filter( lambda req : 'type' in req and req['type']!='TaskChain', req_list) # logger.info('Requests %d' % (len(req_list))) if limit: req_list = req_list[0:limit] logger.info('%d after limiting' % (len(req_list))) logger.info('Dispatching %d requests to %d processes' % (len(req_list), nproc)) pool = multiprocessing.Pool(nproc) results = pool.map(insertOne, req_list) logger.info('End dispatching') results = filter(lambda item: item is not False, results) logger.info('%d inserted' % (len(results))) logger.info(str(results)) """ showme='' for r in results: showme+='\t'+r+'\n' print showme """ elif options.do == 'kill' or options.do == 'list': logger.info('do = kill OR do = list') # get from wm couch from statsMonitoring import parallel_test, get_requests_list logger.info("Getting all req ...") req_list = get_requests_list() logger.info("... done") removed = [] if options.search: req_list = filter(lambda req: options.search in req["request_name"], req_list) for r in req_list: logger.info("Found %s in status %s?" % (r['request_name'], (r['status'] if 'status' in r else 'undef'))) if options.do == 'kill': # print "killing",r['request_name'],"in status",(r['status'] if 'status' in r else 'undef'),"?" docid = r['request_name'] if docid in docs and docid not in removed: thisDoc = statsCouch.get_file_info(docid) logger.info("Removing record for docid %s" % (docid)) statsCouch.delete_file_info(docid, thisDoc['_rev']) removed.append(docid) else: logger.info("Nothing to kill") elif options.do == 'update': logger.info('do = update') __newest = True if options.search: __newest = False # get from wm couch from statsMonitoring import get_requests_list logger.info("Getting all req ...") req_list = get_requests_list(not_in_wmstats=options.nowmstats, newest=__newest) logger.info("... done") cookie_path = '/home/pdmvserv/private/prod_cookie.txt' if options.mcm: sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/') from rest import restful mcm = restful(dev=False, cookie=cookie_path) rs = mcm.getA('requests', query='status=submitted') rids = map(lambda d: d['prepid'], rs) logger.info("Got %d to update from mcm" % (len(rids))) # print len(docs),len(req_list) # print map( lambda docid : any( map(lambda rid : rid in doc, rids)), docs) docs = filter(lambda docid: any(map(lambda rid: rid in docid, rids)), docs) if len(docs): # req_list = filter(lambda req: any(map(lambda rid: rid in req["request_name"], rids)), req_list) req_list = filter(lambda req: req['request_name'] in docs, req_list) if options.search: if options.force: FORCE = True docs = filter(lambda docid: options.search in docid, docs) if len(docs): # req_list = filter(lambda req: options.search in req["request_name"], req_list) req_list = filter(lambda req: req['request_name'] in docs, req_list) if len(req_list): pprint.pprint(req_list) if limit: req_list = req_list[0:limit] request_dict = {} for request in req_list: if request['request_name'] in request_dict: request_dict[request['request_name']].append(request) logger.info('APPEND! %s' % (request['request_name'])) else: request_dict[request['request_name']] = [request] logger.info("Dispaching %d requests to %d processes..." % (len(request_dict), nproc)) pool = multiprocessing.Pool(nproc) results = pool.map(updateOneIt, request_dict.iteritems()) logger.info("End dispatching") if options.search: dump = dumpSome(docs, limit) logger.info("Result from update with search") pprint.pprint(dump) results = filter(lambda item: item is not False, results) logger.info('%d updated' % (len(results))) logger.info(str(results)) print "\n\n" # for r in results: # try: # withRevisions = statsCouch.get_file_info_withrev(r) # # we shouldnt trigger mcm for ReRecos or Relvals which doesnt exist there # if any(el in withRevisions['pdmv_prep_id'].lower() for el in ['relval', 'rereco']): # logger.info("NOT bothering McM for rereco or relval: %s" % (withRevisions['pdmv_prep_id'])) # continue # # he we should trigger McM update if request is in done. # # because inspection on done doesn't exists. # if (withRevisions['pdmv_type'] != 'Resubmission' and # withRevisions['pdmv_prep_id'].strip() not in ['No-Prepid-Found', '', 'None', '_'] and # withRevisions['pdmv_status_from_reqmngr'] == "normal-archived"): # # we should trigger this only if events_in_das was updated for done # logger.info("Triggering McM completed_evts syncing for a done request %s" % (r)) # update_comm = 'curl -s -k -L --cookie %s https://cms-pdmv.cern.ch/mcm/restapi/requests/fetch_stats_by_wf/%s' % (cookie_path, r) # os.system(update_comm) # else: # logger.info('%s type (%s) is either Resubmission OR prepid (%s) is bad OR it\'s not normal-archived (%s)' % (r, # withRevisions['pdmv_type'], # withRevisions['pdmv_prep_id'], # withRevisions['pdmv_status_from_reqmngr'])) # except: # logger.error("failed to update growth for %s" % (r)) # logger.error(str(traceback.format_exc())) print "\n\n" # set in the log file # serves as forceupdated ! logger.info("start time: %s" % str(start_time)) logger.info("logging updating time: %s" % str(time.asctime())) log_file = open('stats.log', 'a') log_file.write(time.asctime() + '\n') log_file.close()
print 'This is DRYRUN!' else: print 'WARNING!'*10 print 'REAL QUERIES WILL BE MADE!!!' print 'WARNING!'*10 if is_dev_instance: cookie_file = 'dev-cookie.txt' #dev print 'Running on dev instance!' else: cookie_file = 'cookie.txt' #prod print 'WARNING!'*10 print 'Running on prod instance!!!' print 'WARNING!'*10 mcm=restful(dev=is_dev_instance, cookie=cookie_file, debug=True) pwgs=mcm.get('restapi/users/get_pwg')['results'] # submit only these groups #pwgs=['B2G','BPH','BTV','EXO'] print pwgs ochain = '' dchain = '' N_REQUESTS_PER_TICKET = 30 PRIORITY_BLOCK = 1 TICKET_NOTE = "Fall17MiniAOD to Fall17MiniAODv2+NanoAOD central migration" #[2] Choose one campaign types ochain = 'chain_RunIIFall17wmLHEGS_flowRunIIFall17DRPremixPU2017_flowRunIIFall17MiniAOD'
def init(self, url=None): self.rest = restful()
import os import sys #if not os.path.isfile('rest.py'): # print "getting the mcm rest api" # os.system('ln -s /afs/cern.ch/cms/PPD/PdmV/tools/McM/rest.py') from rest import restful McM = restful(dev=False) dataset=sys.argv[1] rs = McM.getA('requests',query='produce=%s'% dataset) if len(rs)>1: print "this cannot really be" elif len(rs)==0: print dataset,"is not produced via mcm" else: print dataset,"produced by",rs[0]['prepid'] r = rs[0] ## pull out the chains crs = McM.getA('chained_requests',query='contains=%s'% r['prepid']) if len(crs)>1: print "unlikely to have more than one chain for an analysis dataset" elif len(crs)==0: print r['prepid'],"is not in any chain" else: infos=[] for r in reversed(crs[0]['chain']): rr = McM.getA('requests',r)
#!/usr/bin/env python import sys, math, getopt import time from collections import defaultdict import pprint import copy import json import csv sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/') from rest import restful mcm=restful(dev=False) def get_rootID(prepid): root_id = prepid crs = mcm.getA('chained_requests', query='contains=%s'%(prepid)) for cr in crs: root_id = cr['chain'][0] break return root_id chains = {'pu1':['RunIISpring15DR74Startup25ns','RunIISpring15DR74Startup25nswmLHE','RunIISpring15DR74Startup25nspLHE'], 'pu2':['RunIISpring15DR74Startup50ns','RunIISpring15DR74Startup50nswmLHE','RunIISpring15DR74Startup50nspLHE'], 'pu3':['RunIISpring15DR74Startup25nsRaw','RunIISpring15DR74Startup25nsRawwmLHE','RunIISpring15DR74Startup25nsRawpLHE'], 'pu4':['RunIISpring15DR74Startup50nsRaw','RunIISpring15DR74Startup50nsRawwmLHE','RunIISpring15DR74Startup50nsRawpLHE'], 'pu5':['RunIISpring15DR74AsymptFlat10to5025nsRaw','RunIISpring15DR74AsymptFlat10to5025nsRawwmLHE','RunIISpring15DR74AsymptFlat10to5025nsRawpLHE'], 'pu6':['RunIISpring15DR74StartupFlat10to5050nsRaw','RunIISpring15DR74StartupFlat10to5050nsRawwmLHE','RunIISpring15DR74StartupFlat10to5050nsRawpLHE'] }
def main_do( options ): if options.check: #we check if this script is already running with same parameters# checks=['ps -f -u $USER'] for arg in sys.argv[1:]: checks.append('grep "%s"'%(arg.split('/')[-1].replace('--',''))) checks.append('grep -v grep') c = " | ".join(checks) check=filter(None,os.popen("|".join(checks)).read().split('\n')) if len(check)!=1: print "already running with that exact setting" print check sys.exit(1) else: print "ok to operate" start_time = time.asctime() global statsCouch, docs, FORCE #interface to the couchDB statsCouch = Interface(options.db+':5984/stats') ## get from stats couch the list of requests print "Getting all stats ..." allDocs = statsCouch.get_view('all') docs = [doc['id'] for doc in allDocs['rows']] #remove the _design/stats docs = filter(lambda doc : not doc.startswith('_'), docs) print "... done" nproc = 5 limit = None if options.test: limit = 10 if options.do == 'insert': ## get from wm couch from statsMonitoring import parallel_test,get_requests_list print "Getting all req ..." req_list = get_requests_list() print "... done" ## insert new requests, not already in stats couch into stats couch #insertAll(req_list,docs,options.search,limit) if options.search: req_list = filter( lambda req : options.search in req["request_name"], req_list ) #print len(req_list) #skip malformated ones req_list = filter( lambda req : "status" in req, req_list ) #print len(req_list) #take only the ones not already in there req_list = filter( lambda req : req["request_name"] not in docs, req_list ) #print len(req_list) #skip trying to insert aborted and rejected or failed #req_list = filter( lambda req : not req["status"] in ['aborted','rejected','failed','aborted-archived','rejected-archived','failed-archived'], req_list ) req_list = filter( lambda req : not req["status"] in ['aborted','rejected','failed'], req_list ) #print len(req_list) #do not update TaskChain request statuses #req_list = filter( lambda req : 'type' in req and req['type']!='TaskChain', req_list) #print len(req_list) pprint.pprint(req_list) if limit: req_list = req_list[0:limit] #print len(req_list) newentries = 0 print "Dispaching", len(req_list), "requests to", str(nproc), "processes..." pool = multiprocessing.Pool(nproc) results = pool.map(insertOne, req_list) print "End dispatching!" results = filter(lambda item : item != False, results) print len(results), "inserted" print str(results) """ showme='' for r in results: showme+='\t'+r+'\n' print showme """ elif options.do =='kill' or options.do =='list' : ## get from wm couch from statsMonitoring import parallel_test,get_requests_list print "Getting all req ..." req_list = get_requests_list() print "... done" removed = [] if options.search: req_list = filter(lambda req : options.search in req["request_name"], req_list) for r in req_list: print "Found", r['request_name'], "in status", (r['status'] if 'status' in r else 'undef'), "?" if options.do == 'kill': #print "killing",r['request_name'],"in status",(r['status'] if 'status' in r else 'undef'),"?" docid = r['request_name'] if docid in docs and not docid in removed: thisDoc = statsCouch.get_file_info(docid) print "removing record for docid" statsCouch.delete_file_info(docid, thisDoc['_rev']) removed.append(docid) else: print "nothing to kill" elif options.do == 'update': __newest = True if options.search: __newest = False ## get from wm couch from statsMonitoring import parallel_test,get_requests_list print "Getting all req ..." req_list = get_requests_list(not_in_wmstats=options.nowmstats, newest=__newest) print "... done" ## unthreaded #updateSeveral(docs,req_list,pattern=None) if options.mcm: sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/') from rest import restful mcm = restful(dev=False, cookie='/afs/cern.ch/user/p/pdmvserv/private/prod-cookie.txt') rs = mcm.getA('requests', query='status=submitted') rids = map(lambda d : d['prepid'], rs) print "Got", len(rids), "to update from mcm" #print len(docs),len(req_list) #print map( lambda docid : any( map(lambda rid : rid in doc, rids)), docs) docs = filter(lambda docid : any(map(lambda rid : rid in docid, rids)), docs) if not len(docs): req_list = filter(lambda req : any(map(lambda rid : rid in req["request_name"], rids)), req_list) if options.search: if options.force: FORCE = True docs = filter(lambda docid : options.search in docid, docs) if not len(docs): req_list = filter(lambda req : options.search in req["request_name"], req_list) if len(req_list): pprint.pprint(req_list) if limit: docs = docs[0:limit] repeated_req_list = itertools.repeat(req_list, len(docs)) print "Dispaching", len(docs), "requests to ", str(nproc), "processes..." pool = multiprocessing.Pool(nproc) results = pool.map(updateOneIt, itertools.izip(docs, repeated_req_list)) print "End dispatching!" if options.search: dump = dumpSome(docs, limit) print "Result from update with search" pprint.pprint(dump) results = filter( lambda item : item != False, results) print len(results), "updated" print results print "\n\n" ##udpdate the growth plots ??? from growth import plotGrowth for r in results: try: withRevisions = statsCouch.get_file_info_withrev(r) plotGrowth(withRevisions,statsCouch,force=FORCE) ## notify McM for update !! if (withRevisions['pdmv_prep_id'].strip() not in ['No-Prepid-Found','','None']) and options.inspect and '_' not in withRevisions['pdmv_prep_id']: inspect = 'curl -s -k --cookie ~/private/prod-cookie.txt https://cms-pdmv.cern.ch/mcm/restapi/requests/inspect/%s' % withRevisions['pdmv_prep_id'] os.system(inspect) ## he we should trigger McM update if request is in done. ## because inspection on done doesn't exists. if (withRevisions['pdmv_type'] != 'Resubmission' and withRevisions['pdmv_prep_id'].strip() not in ['No-Prepid-Found', '', 'None', '_'] and withRevisions['pdmv_status_from_reqmngr'] == "normal-archived"): ## we should trigger this only if events_in_das was updated for done update_comm = 'curl -s -k --cookie ~/private/prod-cookie.txt https://cms-pdmv.cern.ch/mcm/restapi/requests/update_stats/%s/no_refresh' % withRevisions['pdmv_prep_id'] print "Triggering McM completed_evts syncing for a done request %s" % ( withRevisions['pdmv_prep_id']) os.system(update_comm) except: print "failed to update growth for", r print traceback.format_exc() print "\n\n" ## set in the log file #serves as forceupdated ! print "start time: ", start_time print "logging updating time:", time.asctime() l = open('stats.log','a') l.write(time.asctime()+'\n') l.close()