print '                number of events      = ' + str(args.events)
        print '                use McM prepID        = ' + str(mcm)
        print '                skipexisting          = ' + str(skipexisting)
        print

    das_cmd = "/cvmfs/cms.cern.ch/common/dasgoclient"

    # if mcm is specified, retrieve dataset name from prepID:
    if mcm:
        if "/" in str(args.inputdataset): 
            print "not a McM prepID format, please check"
            sys.exit(1)
        # load McM
        sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/')
        from rest import restful
        mcm = restful()
        # retrieve request with given prepid
        temp = sys.stdout
        f = open('/dev/null', 'w')
        sys.stdout = f
        request = mcm.getA('requests', str(args.inputdataset))
        sys.stdout = temp
        if debug: print 'request prepid',request['prepid']
        # search dataset name as returned by mcm
        dataset_used = str(request['output_dataset'][0])
        primary_dataset_name = dataset_used.split('/')[1]
    else:
        # search dataset name as name + campaign + datatier
        primary_dataset_name = args.inputdataset.split('/')[1]
        command=das_cmd+" --limit=0 --query=\"dataset dataset=/"+primary_dataset_name+"/*"+args.campaign+"*/"+args.datatier+"\""
        dataset_used = commands.getstatusoutput(command)[1].split("\n")
Example #2
0
#!/usr/bin/env python

import argparse, getpass, re, sys
parser = argparse.ArgumentParser()
parser.add_argument("--user", default=getpass.getuser())
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()

sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/')
import rest

mcm = rest.restful()

requests = mcm.get("requests",
                   query="prepid=HIG-RunIIFall17wmLHEGS-*&actor=" + args.user)

firstchain = "HIG-chain_RunIIFall17wmLHEGS_flowRunIIFall17DRPremix_flowRunIIFall17MiniAODv2_flowRunIIFall17NanoAOD-[0-9]*"
secondchain = "HIG-chain_RunIIFall17wmLHEGS_flowRunIIFall17DRPremixPU2017_flowRunIIFall17MiniAODv2_flowRunIIFall17NanoAOD-[0-9]*"

for req in requests:
    if not req["member_of_chain"]: continue
    chains = [_ for _ in req["member_of_chain"] if "NanoAOD" in _]
    chains.sort(reverse=True)
    if len(chains) == 2 and re.match(firstchain, chains[0]) and re.match(
            secondchain, chains[1]):
        print "good:", req["prepid"]
    else:
        print "bad: ", req["prepid"]
        if args.verbose:
            if len(chains) == 1:
                print " ", "only one chain"
Example #3
0
import os
import sys

#if not os.path.isfile('rest.py'):
#    print "getting the mcm rest api"
#    os.system('ln -s /afs/cern.ch/cms/PPD/PdmV/tools/McM/rest.py')

from rest import restful

McM = restful(dev=False)

dataset = sys.argv[1]

rs = McM.getA('requests', query='produce=%s' % dataset)
if len(rs) > 1:
    print "this cannot really be"
elif len(rs) == 0:
    print dataset, "is not produced via mcm"
else:
    print dataset, "produced by", rs[0]['prepid']
    r = rs[0]
    ## pull out the chains
    crs = McM.getA('chained_requests', query='contains=%s' % r['prepid'])
    if len(crs) > 1:
        print "unlikely to have more than one chain for an analysis dataset"
    elif len(crs) == 0:
        print r['prepid'], "is not in any chain"
    else:
        infos = []
        for r in reversed(crs[0]['chain']):
            rr = McM.getA('requests', r)
Example #4
0
def main_do(options):
    logger.info("Running main")
    if options.check:
        logger.info('Check')
        # we check if this script is already running with same parameters#
        checks = ['ps -f -u $USER']
        for arg in sys.argv[1:]:
            checks.append('grep "%s"' % (arg.split('/')[-1].replace('--', '')))
        checks.append('grep -v grep')
        check = filter(None, os.popen("|".join(checks)).read().split('\n'))
        if len(check) != 1:
            logger.error("Already running with that exact setting")
            logger.info(check)
            sys.exit(1)
        else:
            logger.info("ok to operate")

    start_time = time.asctime()
    global statsCouch, docs, FORCE
    # interface to the couchDB
    statsCouch = Interface(options.db + ':5984/stats')

    # get from stats couch the list of requests
    view = 'yearAgo' if options.do == 'update' else 'all'
    # in case we want to force update even older workflows
    if options.force:
        view = 'all'

    logger.info("Getting all stats ...")
    allDocs = statsCouch.get_view(view)
    docs = set([doc['id'] for doc in allDocs['rows']])
    # remove the _design/stats
    if view == 'all':
        docs = set(filter(lambda doc: not doc.startswith('_'), docs))

    logger.info("... done")

    nproc = 4
    limit = None
    if options.test:
        limit = 10

    if options.do == 'insert':
        logger.info('do = insert')
        # get from wm couch
        from statsMonitoring import parallel_test, get_requests_list
        logger.info("Getting all req ...")
        req_list = get_requests_list()
        logger.info("... done")

        # insert new requests, not already in stats couch into stats couch
        # insertAll(req_list,docs,options.search,limit)

        logger.info('Will filter')
        if options.search:
            req_list = filter(lambda req: options.search in req["request_name"], req_list)
            logger.info('%d requests after search' % (len(req_list)))

        # print "req_list: " % (req_list)
        # skip malformated ones
        req_list = filter(lambda req: "status" in req, req_list)
        logger.info('%d requests after skipping malformed' % (len(req_list)))

        # take only the ones not already in there
        req_list = filter(lambda req: req["request_name"] not in docs, req_list)
        logger.info('%d after taking only those not already in there' % (len(req_list)))

        # skip trying to insert aborted and rejected or failed
        # req_list = filter( lambda req : not req["status"] in ['aborted','rejected','failed','aborted-archived','rejected-archived','failed-archived'], req_list )
        req_list = filter(lambda req: not req["status"] in ['aborted', 'rejected', 'failed', None], req_list)
        logger.info('%d after skipping aborted, rejected, failed and None' % (len(req_list)))

        # do not update TaskChain request statuses
        # req_list = filter( lambda req : 'type' in req and req['type']!='TaskChain', req_list)
        # logger.info('Requests %d' % (len(req_list)))

        if limit:
            req_list = req_list[0:limit]
            logger.info('%d after limiting' % (len(req_list)))

        logger.info('Dispatching %d requests to %d processes' % (len(req_list), nproc))
        pool = multiprocessing.Pool(nproc)
        results = pool.map(insertOne, req_list)
        logger.info('End dispatching')

        results = filter(lambda item: item is not False, results)
        logger.info('%d inserted' % (len(results)))
        logger.info(str(results))
        """
        showme=''
        for r in results:
            showme+='\t'+r+'\n'
        print showme
        """
    elif options.do == 'kill' or options.do == 'list':
        logger.info('do = kill OR do = list')
        # get from wm couch
        from statsMonitoring import parallel_test, get_requests_list
        logger.info("Getting all req ...")
        req_list = get_requests_list()
        logger.info("... done")

        removed = []
        if options.search:
            req_list = filter(lambda req: options.search in req["request_name"], req_list)
            for r in req_list:
                logger.info("Found %s in status %s?" % (r['request_name'], (r['status'] if 'status' in r else 'undef')))
                if options.do == 'kill':
                    # print "killing",r['request_name'],"in status",(r['status'] if 'status' in r else 'undef'),"?"
                    docid = r['request_name']
                    if docid in docs and docid not in removed:
                        thisDoc = statsCouch.get_file_info(docid)
                        logger.info("Removing record for docid %s" % (docid))
                        statsCouch.delete_file_info(docid, thisDoc['_rev'])
                        removed.append(docid)
                    else:
                        logger.info("Nothing to kill")

    elif options.do == 'update':
        logger.info('do = update')
        __newest = True
        if options.search:
            __newest = False
        # get from wm couch
        from statsMonitoring import get_requests_list
        logger.info("Getting all req ...")
        req_list = get_requests_list(not_in_wmstats=options.nowmstats, newest=__newest)
        logger.info("... done")

        cookie_path = '/home/pdmvserv/private/prod_cookie.txt'
        if options.mcm:
            sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/')
            from rest import restful
            mcm = restful(dev=False, cookie=cookie_path)
            rs = mcm.getA('requests', query='status=submitted')
            rids = map(lambda d: d['prepid'], rs)

            logger.info("Got %d to update from mcm" % (len(rids)))
            # print len(docs),len(req_list)
            # print map( lambda docid : any( map(lambda rid : rid in doc, rids)), docs)
            docs = filter(lambda docid: any(map(lambda rid: rid in docid, rids)), docs)
            if len(docs):
                # req_list = filter(lambda req: any(map(lambda rid: rid in req["request_name"], rids)), req_list)
                req_list = filter(lambda req: req['request_name'] in docs, req_list)

        if options.search:
            if options.force:
                FORCE = True
            docs = filter(lambda docid: options.search in docid, docs)
            if len(docs):
                # req_list = filter(lambda req: options.search in req["request_name"], req_list)
                req_list = filter(lambda req: req['request_name'] in docs, req_list)
                if len(req_list):
                    pprint.pprint(req_list)

        if limit:
            req_list = req_list[0:limit]

        request_dict = {}
        for request in req_list:
            if request['request_name'] in request_dict:
                request_dict[request['request_name']].append(request)
                logger.info('APPEND! %s' % (request['request_name']))
            else:
                request_dict[request['request_name']] = [request]

        logger.info("Dispaching %d requests to %d processes..." % (len(request_dict), nproc))
        pool = multiprocessing.Pool(nproc)
        results = pool.map(updateOneIt, request_dict.iteritems())

        logger.info("End dispatching")

        if options.search:
            dump = dumpSome(docs, limit)
            logger.info("Result from update with search")
            pprint.pprint(dump)

        results = filter(lambda item: item is not False, results)
        logger.info('%d updated' % (len(results)))
        logger.info(str(results))

        print "\n\n"
        # for r in results:
        #     try:
        #         withRevisions = statsCouch.get_file_info_withrev(r)
        #         # we shouldnt trigger mcm for ReRecos or Relvals which doesnt exist there
        #         if any(el in withRevisions['pdmv_prep_id'].lower() for el in ['relval', 'rereco']):
        #             logger.info("NOT bothering McM for rereco or relval: %s" % (withRevisions['pdmv_prep_id']))
        #             continue

        #         # he we should trigger McM update if request is in done.
        #         # because inspection on done doesn't exists.
        #         if (withRevisions['pdmv_type'] != 'Resubmission' and
        #                 withRevisions['pdmv_prep_id'].strip() not in ['No-Prepid-Found', '', 'None', '_'] and
        #                 withRevisions['pdmv_status_from_reqmngr'] == "normal-archived"):
        #             # we should trigger this only if events_in_das was updated for done
        #             logger.info("Triggering McM completed_evts syncing for a done request %s" % (r))
        #             update_comm = 'curl -s -k -L --cookie %s https://cms-pdmv.cern.ch/mcm/restapi/requests/fetch_stats_by_wf/%s' % (cookie_path, r)
        #             os.system(update_comm)
        #         else:
        #             logger.info('%s type (%s) is either Resubmission OR prepid (%s) is bad OR it\'s not normal-archived (%s)' % (r,
        #                                                                                                                          withRevisions['pdmv_type'],
        #                                                                                                                          withRevisions['pdmv_prep_id'],
        #                                                                                                                          withRevisions['pdmv_status_from_reqmngr']))
        #     except:
        #         logger.error("failed to update growth for %s" % (r))
        #         logger.error(str(traceback.format_exc()))

        print "\n\n"
        # set in the log file
        # serves as forceupdated !
        logger.info("start time: %s" % str(start_time))
        logger.info("logging updating time: %s" % str(time.asctime()))
        log_file = open('stats.log', 'a')
        log_file.write(time.asctime() + '\n')
        log_file.close()
Example #5
0
        print 'This is DRYRUN!'
else:
        print 'WARNING!'*10
        print 'REAL QUERIES WILL BE MADE!!!'
        print 'WARNING!'*10

if is_dev_instance:
        cookie_file = 'dev-cookie.txt'  #dev
        print 'Running on dev instance!'
else:
        cookie_file = 'cookie.txt'      #prod
        print 'WARNING!'*10
        print 'Running on prod instance!!!'
        print 'WARNING!'*10

mcm=restful(dev=is_dev_instance, cookie=cookie_file, debug=True)

pwgs=mcm.get('restapi/users/get_pwg')['results']
# submit only these groups
#pwgs=['B2G','BPH','BTV','EXO']
print pwgs

ochain = ''
dchain = ''

N_REQUESTS_PER_TICKET = 30
PRIORITY_BLOCK = 1
TICKET_NOTE = "Fall17MiniAOD to Fall17MiniAODv2+NanoAOD central migration" 

#[2] Choose one campaign types
ochain = 'chain_RunIIFall17wmLHEGS_flowRunIIFall17DRPremixPU2017_flowRunIIFall17MiniAOD'
Example #6
0
File: prox.py Project: lundstrj/TTS
 def init(self, url=None):
   self.rest = restful()
Example #7
0
import os
import sys

#if not os.path.isfile('rest.py'):
#    print "getting the mcm rest api"
#    os.system('ln -s /afs/cern.ch/cms/PPD/PdmV/tools/McM/rest.py')

from rest import restful

McM = restful(dev=False)

dataset=sys.argv[1]

rs = McM.getA('requests',query='produce=%s'% dataset)
if len(rs)>1:
    print "this cannot really be"
elif len(rs)==0:
    print dataset,"is not produced via mcm"
else:
    print dataset,"produced by",rs[0]['prepid']
    r = rs[0]
    ## pull out the chains
    crs = McM.getA('chained_requests',query='contains=%s'% r['prepid'])
    if len(crs)>1:
        print "unlikely to have more than one chain for an analysis dataset"
    elif len(crs)==0:
        print r['prepid'],"is not in any chain"
    else:
        infos=[]
        for r in reversed(crs[0]['chain']):
            rr = McM.getA('requests',r)
Example #8
0
#!/usr/bin/env python

import sys, math, getopt
import time
from collections import defaultdict
import pprint
import copy
import json
import csv


sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/')
from rest import restful
mcm=restful(dev=False)


def get_rootID(prepid):
    root_id = prepid 
    crs = mcm.getA('chained_requests', query='contains=%s'%(prepid))
    for cr in crs:
        root_id = cr['chain'][0]
        break
    return root_id

chains = {'pu1':['RunIISpring15DR74Startup25ns','RunIISpring15DR74Startup25nswmLHE','RunIISpring15DR74Startup25nspLHE'], 
          'pu2':['RunIISpring15DR74Startup50ns','RunIISpring15DR74Startup50nswmLHE','RunIISpring15DR74Startup50nspLHE'], 
          'pu3':['RunIISpring15DR74Startup25nsRaw','RunIISpring15DR74Startup25nsRawwmLHE','RunIISpring15DR74Startup25nsRawpLHE'],           
          'pu4':['RunIISpring15DR74Startup50nsRaw','RunIISpring15DR74Startup50nsRawwmLHE','RunIISpring15DR74Startup50nsRawpLHE'],
          'pu5':['RunIISpring15DR74AsymptFlat10to5025nsRaw','RunIISpring15DR74AsymptFlat10to5025nsRawwmLHE','RunIISpring15DR74AsymptFlat10to5025nsRawpLHE'],
          'pu6':['RunIISpring15DR74StartupFlat10to5050nsRaw','RunIISpring15DR74StartupFlat10to5050nsRawwmLHE','RunIISpring15DR74StartupFlat10to5050nsRawpLHE']
          }
Example #9
0
def main_do( options ):

    if options.check:
        #we check if this script is already running with same parameters#
        checks=['ps -f -u $USER']
        for arg in sys.argv[1:]:
            checks.append('grep "%s"'%(arg.split('/')[-1].replace('--','')))
        checks.append('grep -v grep')
        c = " | ".join(checks)
        check=filter(None,os.popen("|".join(checks)).read().split('\n'))
        if len(check)!=1:
            print "already running with that exact setting"
            print check
            sys.exit(1)
        else:
            print "ok to operate"

    start_time = time.asctime()
    global statsCouch, docs, FORCE
    #interface to the couchDB
    statsCouch = Interface(options.db+':5984/stats')


    ## get from stats couch the list of requests
    print "Getting all stats ..."
    allDocs = statsCouch.get_view('all')
    docs = [doc['id'] for doc in allDocs['rows']]
    #remove the _design/stats
    docs = filter(lambda doc : not doc.startswith('_'), docs)
    print "... done"

    nproc = 5
    limit = None
    if options.test:
        limit = 10

    if options.do == 'insert':
        ## get from wm couch
        from statsMonitoring import parallel_test,get_requests_list
        print "Getting all req ..."
        req_list = get_requests_list()
        print "... done"

        ## insert new requests, not already in stats couch into stats couch
        #insertAll(req_list,docs,options.search,limit)

        if options.search:
            req_list = filter( lambda req : options.search in req["request_name"], req_list )
            #print len(req_list)

        #skip malformated ones
        req_list = filter( lambda req : "status" in req, req_list )
        #print len(req_list)

        #take only the ones not already in there
        req_list = filter( lambda req : req["request_name"] not in docs, req_list )
        #print len(req_list)

        #skip trying to insert aborted and rejected or failed
        #req_list = filter( lambda req : not req["status"] in ['aborted','rejected','failed','aborted-archived','rejected-archived','failed-archived'], req_list )
        req_list = filter( lambda req : not req["status"] in ['aborted','rejected','failed'], req_list )
        #print len(req_list)

        #do not update TaskChain request statuses
        #req_list = filter( lambda req : 'type' in req and req['type']!='TaskChain', req_list)
        #print len(req_list)

        pprint.pprint(req_list)

        if limit:
            req_list = req_list[0:limit]
            #print len(req_list)

        newentries = 0
        print "Dispaching", len(req_list), "requests to", str(nproc), "processes..."
        pool = multiprocessing.Pool(nproc)
        results = pool.map(insertOne, req_list)
        print "End dispatching!"

        results = filter(lambda item : item != False, results)
        print len(results), "inserted"
        print str(results)
        """
        showme=''
        for r in results:
            showme+='\t'+r+'\n'
        print showme
        """
    elif options.do =='kill' or options.do =='list' :
        ## get from wm couch
        from statsMonitoring import parallel_test,get_requests_list
        print "Getting all req ..."
        req_list = get_requests_list()
        print "... done"

        removed = []
        if options.search:
            req_list = filter(lambda req : options.search in req["request_name"], req_list)
            for r in req_list:
                print "Found", r['request_name'], "in status", (r['status'] if 'status' in r else 'undef'), "?"
                if options.do == 'kill':
                    #print "killing",r['request_name'],"in status",(r['status'] if 'status' in r else 'undef'),"?"
                    docid = r['request_name']
                    if docid in docs and not docid in removed:
                        thisDoc = statsCouch.get_file_info(docid)
                        print "removing record for docid"
                        statsCouch.delete_file_info(docid, thisDoc['_rev'])
                        removed.append(docid)
                    else:
                        print "nothing to kill"

    elif options.do == 'update':
        __newest = True
        if options.search:
            __newest = False
        ## get from wm couch
        from statsMonitoring import parallel_test,get_requests_list
        print "Getting all req ..."
        req_list = get_requests_list(not_in_wmstats=options.nowmstats, newest=__newest)
        print "... done"

        ## unthreaded
        #updateSeveral(docs,req_list,pattern=None)

        if options.mcm:
            sys.path.append('/afs/cern.ch/cms/PPD/PdmV/tools/McM/')
            from rest import restful
            mcm = restful(dev=False, cookie='/afs/cern.ch/user/p/pdmvserv/private/prod-cookie.txt')
            rs = mcm.getA('requests', query='status=submitted')
            rids = map(lambda d : d['prepid'], rs)

            print "Got", len(rids), "to update from mcm"
            #print len(docs),len(req_list)
            #print map( lambda docid : any( map(lambda rid : rid in doc, rids)), docs)
            docs = filter(lambda docid : any(map(lambda rid : rid in docid, rids)), docs)
            if not len(docs):
                req_list = filter(lambda req : any(map(lambda rid : rid in req["request_name"], rids)), req_list)

        if options.search:
            if options.force:
                FORCE = True
            docs = filter(lambda docid : options.search in docid, docs)
            if not len(docs):
                req_list = filter(lambda req : options.search in req["request_name"], req_list)
                if len(req_list):
                    pprint.pprint(req_list)
        if limit:
            docs = docs[0:limit]


        repeated_req_list = itertools.repeat(req_list, len(docs))

        print "Dispaching", len(docs), "requests to ", str(nproc), "processes..."
        pool = multiprocessing.Pool(nproc)
        results = pool.map(updateOneIt, itertools.izip(docs, repeated_req_list))

        print "End dispatching!"

        if options.search:
            dump = dumpSome(docs, limit)
            print "Result from update with search"
            pprint.pprint(dump)

        results = filter( lambda item : item != False, results)
        print len(results), "updated"
        print results

        print "\n\n"
        ##udpdate the growth plots ???
        from growth import plotGrowth
        for r in results:
            try:
                withRevisions = statsCouch.get_file_info_withrev(r)
                plotGrowth(withRevisions,statsCouch,force=FORCE)
                ## notify McM for update !!
                if (withRevisions['pdmv_prep_id'].strip() not in ['No-Prepid-Found','','None']) and options.inspect and '_' not in withRevisions['pdmv_prep_id']:
                    inspect = 'curl -s -k --cookie ~/private/prod-cookie.txt https://cms-pdmv.cern.ch/mcm/restapi/requests/inspect/%s' % withRevisions['pdmv_prep_id']
                    os.system(inspect)
                ## he we should trigger McM update if request is in done.
                ## because inspection on done doesn't exists.
                if (withRevisions['pdmv_type'] != 'Resubmission' and
                    withRevisions['pdmv_prep_id'].strip() not in ['No-Prepid-Found',
                            '', 'None', '_'] and
                    withRevisions['pdmv_status_from_reqmngr'] == "normal-archived"):
                    ## we should trigger this only if events_in_das was updated for done
                    update_comm = 'curl -s -k --cookie ~/private/prod-cookie.txt https://cms-pdmv.cern.ch/mcm/restapi/requests/update_stats/%s/no_refresh' % withRevisions['pdmv_prep_id']
                    print "Triggering McM completed_evts syncing for a done request %s" % (
                            withRevisions['pdmv_prep_id'])

                    os.system(update_comm)
            except:
                print "failed to update growth for", r
                print traceback.format_exc()


        print "\n\n"
        ## set in the log file
        #serves as forceupdated !
        print "start time: ", start_time
        print "logging updating time:", time.asctime()
        l = open('stats.log','a')
        l.write(time.asctime()+'\n')
        l.close()