コード例 #1
0
def rejector(url, specific, options=None):

    up = componentInfo()

    if specific.startswith('/'):
        pass
    else:
        wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        results=[]
        wfi = workflowInfo(url, wfo.name)
        reqMgrClient.invalidateWorkflow(url, wfo.name, current_status=wfi.request['RequestStatus'])
        #if wfi.request['RequestStatus'] in ['assignment-approved','new','completed']:
        #    #results.append( reqMgrClient.rejectWorkflow(url, wfo.name))
        #    reqMgrClient.rejectWorkflow(url, wfo.name)
        #else:
        #    #results.append( reqMgrClient.abortWorkflow(url, wfo.name))
        #    reqMgrClient.abortWorkflow(url, wfo.name)
        
        datasets = wfi.request['OutputDatasets']
        for dataset in datasets:
            if options.keep:
                print "keeping",dataset,"in its current status"
            else:
                results.append( setDatasetStatus(dataset, 'INVALID') )

        if all(map(lambda result : result in ['None',None,True],results)):
            wfo.status = 'forget'
            session.commit()
            print wfo.name,"and",datasets,"are rejected"
            if options and options.clone:
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion']+=1
                else:
                    schema['ProcessingVersion']=2
                ##schema.pop('RequestDate') ## ok then, let's not reset the time stamp
                if options.Memory:
                    schema['Memory'] = options.Memory
                response = reqMgrClient.submitWorkflow(url, schema)
                m = re.search("details\/(.*)\'",response)
                if not m:
                    print "error in cloning",wfo.name
                    print response
                    return 
                newWorkflow = m.group(1)
                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfo.status = 'trouble'
                session.commit()
        else:
            print "error in rejecting",wfo.name,results
コード例 #2
0
ファイル: reject.py プロジェクト: amaltaro/WmAgentScripts
def main():
    usage = "\n       python %prog [options] [WORKFLOW_NAME]\n" \
            "WORKFLOW_NAME: if the list file is provided this should be empty\n"

    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', help='Text file of workflows to Reject and Clone', dest='file')
    parser.add_option('-c', '--clone', help='Are the workflows going to be cloned? The default value is False',action="store_true", dest='clone', default=False)
    parser.add_option('-i', '--invalidate', help='Invalidate datasets? The default value is False',action="store_true", dest='invalidate', default=False)
    parser.add_option("-u", "--user", dest="user",help="The user for creating the clone, if empty it will use the OS user running the script")
    parser.add_option("-g", "--group", dest="group", default='DATAOPS',help="The group for creating the clone, if empty it will, use 'DATAOPS' by default")
    (options, args) = parser.parse_args()

    # Check the arguments, get info from them
    if options.file:
        try:
            workflows = [l.strip() for l in open(options.file) if l.strip()]
        except:
            parser.error("Provide a valid file of workflows")
            sys.exit(1)
    elif len(args) >0:
        # name of workflow
        workflows = [args[0]]
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    if not options.user:
        # get os username by default
        uinfo = pwd.getpwuid(os.getuid())
        user = uinfo.pw_name
    else:
        user = options.user

    for workflow in workflows:
        try:
            workflowInfo = reqMgrClient.Workflow(workflow)
        except:
            print("The workflow name: "+ workflow+" is  not valid.")
            continue
        # invalidates workflow
        print("Invalidating the workflow: "+ workflow)
        reqMgrClient.invalidateWorkflow(url,workflow,workflowInfo.status)

        # invalidates datasets
        if options.invalidate:
            print("Invalidating datasets")
            datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
            for dataset in datasets:
                print(dataset)
                dbs3.setDatasetStatus(dataset, 'INVALID', files=True)

        # clones workflow
        if options.clone:
            print("Cloning workflow: "+ workflow)
            cloned = resubmit.cloneWorkflow(workflow, user, options.group)
    sys.exit(0);
コード例 #3
0
ファイル: completor.py プロジェクト: amaltaro/WmAgentScripts
def forceComplete(url, wfi):
    familly = getWorkflowById( url, wfi.request['PrepID'] ,details=True)
    for member in familly:
        ### if member['RequestName'] == wl['RequestName']: continue ## set himself out
        if member['RequestDate'] < wfi.request['RequestDate']: continue
        if member['RequestStatus'] in ['None',None]: continue
        ## then set force complete all members
        if member['RequestStatus'] in ['running-opened','running-closed']:
            #sendEmail("force completing","%s is worth force completing\n%s"%( member['RequestName'] , percent_completions))
            print "setting",member['RequestName'],"force-complete"
            reqMgrClient.setWorkflowForceComplete(url, member['RequestName'])
        elif member['RequestStatus'] in ['acquired','assignment-approved']:
            print "rejecting",member['RequestName']
            reqMgrClient.invalidateWorkflow(url, member['RequestName'], current_status=member['RequestStatus'])
コード例 #4
0
def forceComplete(url, wfi):
    familly = getWorkflowById(url, wfi.request['PrepID'], details=True)
    for member in familly:
        ### if member['RequestName'] == wl['RequestName']: continue ## set himself out
        if member['RequestDate'] < wfi.request['RequestDate']: continue
        if member['RequestStatus'] in ['None', None]: continue
        ## then set force complete all members
        if member['RequestStatus'] in ['running-opened', 'running-closed']:
            #sendEmail("force completing","%s is worth force completing\n%s"%( member['RequestName'] , percent_completions))
            print "setting", member['RequestName'], "force-complete"
            reqMgrClient.setWorkflowForceComplete(url, member['RequestName'])
        elif member['RequestStatus'] in ['acquired', 'assignment-approved']:
            print "rejecting", member['RequestName']
            reqMgrClient.invalidateWorkflow(
                url,
                member['RequestName'],
                current_status=member['RequestStatus'])
コード例 #5
0
def main():
    usage = "\n       python %prog [options] [WORKFLOW_NAME]\n" \
            "WORKFLOW_NAME: if the list file is provided this should be empty\n"

    parser = OptionParser(usage=usage)
    parser.add_option('-f',
                      '--file',
                      help='Text file of workflows to Reject and Clone',
                      dest='file')
    parser.add_option(
        '-c',
        '--clone',
        help='Are the workflows going to be cloned? The default value is False',
        action="store_true",
        dest='clone',
        default=False)
    parser.add_option('-i',
                      '--invalidate',
                      help='Invalidate datasets? The default value is False',
                      action="store_true",
                      dest='invalidate',
                      default=False)
    parser.add_option(
        "-u",
        "--user",
        dest="user",
        help=
        "The user for creating the clone, if empty it will use the OS user running the script"
    )
    parser.add_option(
        "-g",
        "--group",
        dest="group",
        default='DATAOPS',
        help=
        "The group for creating the clone, if empty it will, use 'DATAOPS' by default"
    )
    parser.add_option(
        "-m",
        "--memory",
        dest="memory",
        help=
        "Set max memory for the clone. At assignment, this will be used to calculate maxRSS = memory*1024"
    )

    (options, args) = parser.parse_args()

    # Check the arguments, get info from them
    if options.file:
        try:
            workflows = [l.strip() for l in open(options.file) if l.strip()]
        except:
            parser.error("Provide a valid file of workflows")
            sys.exit(1)
    elif len(args) > 0:
        # name of workflow
        workflows = [args[0]]
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    if not options.user:
        # get os username by default
        uinfo = pwd.getpwuid(os.getuid())
        user = uinfo.pw_name
    else:
        user = options.user

    for workflow in workflows:
        try:
            workflowInfo = reqMgrClient.Workflow(workflow)
        except:
            print("The workflow name: " + workflow + " is  not valid.")
            continue
        # invalidates workflow
        print("Invalidating the workflow: " + workflow)
        reqMgrClient.invalidateWorkflow(url, workflow, workflowInfo.status)

        # invalidates datasets
        if options.invalidate:
            print("Invalidating datasets")
            datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
            for dataset in datasets:
                print(dataset)
                dbs3.setDatasetStatus(dataset, 'INVALID', files=True)

        # clones workflow
        if options.clone:
            print("Cloning workflow: " + workflow)
            if options.memory:
                mem = options.memory
            else:
                mem = workflowInfo.info["Memory"]
            cloned = resubmit.cloneWorkflow(workflow,
                                            user,
                                            options.group,
                                            memory=mem)
    sys.exit(0)
コード例 #6
0
def invalidator(url, invalid_status='INVALID'):
    use_mcm = True
    up = componentInfo(mcm=use_mcm)
    if not up.check(): return
    mcm = McMClient(dev=False)

    invalids = mcm.getA('invalidations',query='status=announced')
    print len(invalids),"Object to be invalidated"
    text_to_batch = defaultdict(str)
    text_to_request = defaultdict(str)
    for invalid in invalids:
        acknowledge= False
        pid = invalid['prepid']
        batch_lookup = invalid['prepid']
        text = ""
        if invalid['type'] == 'request':
            wfn = invalid['object']
            print "need to invalidate the workflow",wfn
            wfo = session.query(Workflow).filter(Workflow.name == wfn).first()
            if wfo:
                ## set forget of that thing (although checkor will recover from it)
                print "setting the status of",wfo.status,"to forget"
                wfo.status = 'forget'
                session.commit()
            else:
                ## do not go on like this, do not acknoledge it
                print wfn,"is set to be rejected, but we do not know about it yet"
                #continue
            wfi = workflowInfo(url, wfn)
            success = "not rejected"
            ## to do, we should find a way to reject the workflow and any related acdc
            success = reqMgrClient.invalidateWorkflow(url, wfn, current_status = wfi.request['RequestStatus'])
            ## need to find the whole familly and reject the whole gang
            familly = getWorkflowById( url, wfi.request['PrepID'] , details=True)
            for fwl in familly:
                ## take out all acdc
                if fwl['RequestDate'] < wfi.request['RequestDate']:continue
                if fwl['RequestType']!='Resubmission': continue
                print "rejecting",fwl['RequestName']
                success = reqMgrClient.invalidateWorkflow(url, fwl['RequestName'], current_status=fwl['RequestStatus'])
                print success
            wfi.sendLog('invalidator',"rejection is performed from McM invalidations request")
            acknowledge= True
            text = "The workflow %s (%s) was rejected due to invalidation in McM" % ( wfn, pid )
            batch_lookup = wfn ##so that the batch id is taken as the one containing the workflow name
        elif invalid['type'] == 'dataset':
            dataset = invalid['object']

            if '?' in dataset: continue
            if 'None' in dataset: continue
            if 'None-' in dataset: continue
            if 'FAKE-' in dataset: continue

            print "setting",dataset,"to",invalid_status
            success = setDatasetStatus(dataset , invalid_status )
            if success:
                acknowledge= True
                text = "The dataset %s (%s) was set INVALID due to invalidation in McM" % ( dataset, pid )
            else:
                print "invalidation of",dataset,"did not go so well"
        else:
            print "\t\t",invalid['type']," type not recognized"

        if acknowledge:
            ## acknoldge invalidation in mcm, provided we can have the api
            print "acknowledgment to mcm"
            mcm.get('/restapi/invalidations/acknowledge/%s'%( invalid['_id'] ))
            # prepare the text for batches
            batches = []
            batches.extend(mcm.getA('batches',query='contains=%s'%batch_lookup))
            batches = filter(lambda b : b['status'] in ['announced','done','reset'], batches)
            if len(batches):
                bid = batches[-1]['prepid']
                print "batch nofication to",bid
                text_to_batch[bid] += text+"\n\n"
            # prepare the text for requests
            text_to_request[pid] += text+"\n\n"

    for bid,text in text_to_batch.items():    
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/batches/notify',{ "notes" : text, "prepid" : bid})
        pass
    for pid,text in text_to_request.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/requests/notify',{ "message" : text, "prepids" : [pid]})
コード例 #7
0
ファイル: rejector.py プロジェクト: dabercro/WmAgentScripts
def rejector(url, specific, options=None):

    #use_mcm = True
    #up = componentInfo(mcm=use_mcm, soft=['mcm'])
    up = componentInfo()
    if not up.check(): return
    #use_mcm = up.status['mcm']
    #mcm = McMClient(dev=False) if use_mcm else None

    if specific and specific.startswith('/'):
        ## this is for a dataset
        print setDatasetStatus(specific, 'INVALID')
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend( session.query(Workflow).filter(Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status == 'assistance-clone').all()
        #wfs.extend( session.query(Workflow).filter(Workflow.status == 'assistance-reject').all())
        ## be careful then on clone case by case
        options.clone = True
        print "not supposed to function yet"
        return 

    print len(wfs),"to reject"

    if len(wfs)>1:
        print "\n".join( [wfo.name for wfo in wfs] )
        answer = raw_input('Reject these')
        if not answer.lower() in ['y','yes']:
            return
        
    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        results=[]
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(url, wfo.name, current_status=wfi.request['RequestStatus'])

        comment=""
        if options.comments: comment = ", reason: "+options.comments

        wfi.sendLog('rejector','invalidating the workflow by unified operator%s'%comment)
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById( url, wfi.request['PrepID'] , details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType']!='Resubmission': continue
            ## does not work on second order acd
            if 'OriginalRequestName' in fwl and fwl['OriginalRequestName'] != wfi.request['RequestName']: continue
            print "rejecting",fwl['RequestName']
            reqMgrClient.invalidateWorkflow(url, fwl['RequestName'], current_status=fwl['RequestStatus'], cascade=False)
            datasets.update( fwl['OutputDatasets'] )

        for dataset in datasets:
            if options.keep:
                print "keeping",dataset,"in its current status"
            else:
                results.append( setDatasetStatus(dataset, 'INVALID') )
                pass


        if all(map(lambda result : result in ['None',None,True],results)):
            print wfo.name,"and",datasets,"are rejected"
            if options and options.clone:
                wfo.status = 'trouble'
                session.commit()                
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(schema['ProcessingVersion'])+1 ## dubious str->int conversion
                else:
                    schema['ProcessingVersion']=2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    if schema['RequestType'] == 'TaskChain':
                        it=1
                        while True:
                            t = 'Task%d'%it
                            it+=1
                            if t in schema:
                                schema[t]['Memory'] = options.Memory
                            else:
                                break
                    else:
                        schema['Memory'] = options.Memory
                        
                if options.Multicore:
                    ## to do : set it properly in taslchains
                    schema['Multicore'] = options.Multicore

                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup']  = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int,options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] =options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1,ntask-1):
                        schema['Task%d'%it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask-1
                    schema.pop('Task%d'%ntask)

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                paramBlacklist = ['BlockCloseMaxEvents', 'BlockCloseMaxFiles', 'BlockCloseMaxSize', 'BlockCloseMaxWaitTime',
                                  'CouchWorkloadDBName', 'CustodialGroup', 'CustodialSubType', 'Dashboard',
                                  'GracePeriod', 'HardTimeout', 'InitialPriority', 'inputMode', 'MaxMergeEvents', 'MaxMergeSize',
                                  'MaxRSS', 'MaxVSize', 'MinMergeSize', 'NonCustodialGroup', 'NonCustodialSubType',
                                  'OutputDatasets', 'ReqMgr2Only', 'RequestDate' 'RequestorDN', 'RequestName', 'RequestStatus',
                                  'RequestTransition', 'RequestWorkflow', 'SiteWhitelist', 'SoftTimeout', 'SoftwareVersions',
                                  'SubscriptionPriority', 'Team', 'timeStamp', 'TrustSitelists', 'TrustPUSitelists',
                                  'TotalEstimatedJobs', 'TotalInputEvents', 'TotalInputLumis', 'TotalInputFiles']
                for p in paramBlacklist:
                    if p in schema:
                        schema.pop( p )
                        #pass
                print "submitting"
                if (options.to_stepchain and (schema['RequestType']=='TaskChain')):
                    ## transform the schema into StepChain schema
                    print "Transforming a TaskChain into a StepChain"
                    schema['RequestType'] = 'StepChain'
                    schema['StepChain'] = schema.pop('TaskChain')
                    step=1
                    while True:
                        if 'Task%d'%step in schema:
                            schema['Step%d'%step] = schema.pop('Task%d'%step)
                            schema['Step%d'%step]['StepName'] = schema['Step%d'%step].pop('TaskName')
                            if 'InputTask' in schema['Step%d'%step]:
                                schema['Step%d'%step]['InputStep'] = schema['Step%d'%step].pop('InputTask')
                            if not 'KeepOutput' in schema['Step%d'%step]:
                                ## this is a weird translation capability. Absence of keepoutput in step means : keep the output. while in TaskChain absence means : drop
                                schema['Step%d'%step]['KeepOutput'] = False
                            step+=1
                        else:
                            break


                print json.dumps( schema, indent=2 )
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning",wfo.name
                    print json.dumps( schema, indent=2 )
                    return 
                print newWorkflow

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfi.sendLog('rejector','Cloned into %s by unified operator %s'%( newWorkflow, comment ))
                wfi.notifyRequestor('Cloned into %s by unified operator %s'%( newWorkflow, comment ),do_batch=False)
            else:
                wfo.status = 'forget'
                wfi.notifyRequestor('Rejected by unified operator %s'%( comment ),do_batch=False)
                session.commit()

        else:
            print "error in rejecting",wfo.name,results
コード例 #8
0
ファイル: actor.py プロジェクト: DAMason/WmAgentScripts
def actor(url,options=None):
    
    if userLock('actor'): return
    
    up = componentInfo(mcm=False, soft=['mcm'])
    if not up.check(): return
    
   # CI = campaignInfo()
    SI = siteInfo()
    UC = unifiedConfiguration()
    
    # Need to look at the actions page https://vocms0113.cern.ch:80/getaction (can add ?days=20) and perform any actions listed
    try:
        action_list = json.loads(os.popen('curl -s -k https://vocms0113.cern.ch:80/getaction?days=15').read())
        ## now we have a list of things that we can take action on
    except:
        print "Not able to load action list :("
        sendLog('actor','Not able to load action list', level='critical')
        return

    print action_list
    if not action_list:
        print "EMPTY!"
        return

    for wfname in action_list:
        print '-'*100
        print "Looking at",wfname,"for recovery options"

        to_clone = False
        to_acdc = False
        for key in action_list[wfname]:
            if key == 'Parameters':
                tasks =  action_list[wfname][key]
            elif key == 'Action' and action_list[wfname][key] == 'acdc':
                print "Going to create ACDCs for ", wfname
                to_acdc = True
            elif key == 'Action' and action_list[wfname][key] == 'clone':
                print "Going to clone ", wfname
                to_clone = True

        if not to_acdc and not to_clone:
            sendLog('actor','Action submitted for something other than acdc and clone for workflow %s'%wfname,level='critical')
            print "Can only do acdcs and clones! Skipping workflow ",wfname
            continue
        if not tasks:
            sendLog('actor','Empty action submitted for workflow %s'%wfname,level='critical')
            print "Moving on. Parameters is blank for " + wfname
            continue

        wfi = workflowInfo(url, wfname)

        recover = True
        message_to_ops = ""
        message_to_user = ""

#===========================================================
        if to_clone and options.do:
            print "Let's try kill and clone: "
            wfi.sendLog('actor','Going to clone %s'%wfname)
            results=[]
            datasets = set(wfi.request['OutputDatasets'])

            comment=""

            if 'comment' in tasks: comment = ", reason: "+ tasks['comment']
            wfi.sendLog('actor',"invalidating the workflow by traffic controller %s"%comment)

            #Reject all workflows in the family
            #first reject the original workflow.
            reqMgrClient.invalidateWorkflow(url, wfi.request['RequestName'], current_status=wfi.request['RequestStatus'], cascade=False)
            #Then reject any ACDCs associated with that workflow
            if 'ACDCs' in action_list[wfname]:
                children = action_list[wfname]['ACDCs']
                for child in children:
                    wfi.sendLog('actor',"rejecting %s"%child)
                    wfi_acdc = workflowInfo(url, child)
                    reqMgrClient.invalidateWorkflow(url, wfi_acdc.request['RequestName'], current_status=wfi_acdc.request['RequestStatus'], cascade=False)
                    datasets.update( wfi_acdc.request['OutputDatasets'] )
            #Invalidate all associated output datasets
            for dataset in datasets:
                results.append( setDatasetStatus(dataset, 'INVALID') )

            if all(map(lambda result : result in ['None',None,True],results)):
                wfi.sendLog('actor',"%s and children are rejected"%wfname)

            cloned = None
            try:    
                cloned =  singleClone(url, wfname, tasks, comment, options.do)
            except:
                sendLog('actor','Failed to create clone for %s! Check logs for more information. Action will need to be resubmitted.'%wfname,level='critical')
                wfi.sendLog('actor','Failed to create clone for %s!'%wfname)
                remove_action(wfname)
            if not cloned:
                recover = False
                wfi.sendLog('actor','Failed to create clone for %s!'%wfname)
                sendLog('actor','Failed to create clone for %s!'%wfname,level='critical')

            else:
                wfi.sendLog('actor',"Workflow %s cloned"%wfname)


#===========================================================
        elif to_acdc:
            if 'AllSteps' in tasks:
                allTasksDefaults = tasks['AllSteps']
                tasks.pop('AllSteps')
                for setting in allTasksDefaults:
                    for task in tasks:
                        if setting in tasks[task]:
                            tasks[task][setting] = allTasksDefaults[setting]
                        else:
                                tasks[task].append({setting:allTasksDefaults[setting]})
            print "Tasks is "
            print tasks

            all_tasks = wfi.getAllTasks()

            ## need a way to verify that this is the first round of ACDC, since the second round will have to be on the ACDC themselves
        
            try:
                WMErr = wfi.getWMErrors()
#               print WMErr
            except:
                sendLog('actor','Cannot create ACDCS for %s because WMErr cannot be reached.'%wfname,level='critical')
                continue
            if not WMErr:
                sendLog('actor','Cannot create ACDCS for %s because WMErr is blank.'%wfname,level='critical')
                print "Moving on. WMErr is blank"
                continue

            try:
                where_to_run, missing_to_run,missing_to_run_at =  wfi.getRecoveryInfo()
                print "Where to run = "
                print where_to_run
            except:
                sendLog('actor','Cannot create ACDCS for %s because recovery info cannot be found.'%wfname,level='critical')
                print "Moving on. Cannot access recovery info for " + wfname
                continue
            if not where_to_run:
                sendLog('actor','Cannot create ACDCS for %s because site list cannot be found.'%wfname,level='critical')
                print "Moving on. where to run is blank"
                continue

            message_to_ops = ""
            message_to_user = ""
        
            num_tasks_to_recover = 0
        
            for task in WMErr:
                if 'LogCollect' in task: continue
                if 'Cleanup' in task: continue
                if not 'jobfailed' in WMErr[task]:
                    continue
                else:
                    num_tasks_to_recover += 1
#                print "Task to recover: " + task

            if not num_tasks_to_recover:
                print "\tno error for",wfname
#            recover = False
        
            if 'LheInputFiles' in wfi.request and wfi.request['LheInputFiles']:
            ## we do not try to recover pLHE
                sendLog('actor','Cannot create ACDCS for %s because it is a pLHE workflow.'%wfname,level='critical')
                print "We don't try to recover pLHE. Moving on."
                recover = False
        #            sendEmail('cannot submit action', '%s is a pLHE workflow. We do not try to recover pLHE'%wfname)


#        if wfi.request['RequestType'] in ['ReReco']:
#            recover= False
#            print 'cannot submit action. ReReco'
        #   sendEmail('cannot submit action', '%s is request type ReReco'%wfname)

            recovering = set()
            for task in tasks:
                assign_to_sites = set()
                print "Task names is " + task
                fulltaskname = '/' + wfname + '/' + task
#                print "Full task name is " + fulltaskname
                wrong_task = False
                for task_info in all_tasks:
                    if fulltaskname == task_info.pathName:
                        if task_info.taskType not in ['Processing','Production','Merge']:
                            wrong_task=True
                            wfi.sendLog('actor', "Skipping task %s because the taskType is %s. Can only ACDC Processing, Production, or Merge tasks"%( fulltaskname, task_info.taskType))
                if wrong_task:
                    continue
                print tasks[task]
                actions = tasks[task]
                for action in actions:
                    if action.startswith('sites'):
                        if type(actions[action]) != list:
                            assign_to_sites=[SI.SE_to_CE(actions[action])]
                        else:
                            assign_to_sites=list(set([SI.SE_to_CE(site) for site in actions[action]]))
#                    if action.startswith('mem') and actions[action] != "" and actions[action] != 'Same' and wfi.request['RequestType'] in ['TaskChain']:
#                        recover = False;
#                        print  "Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname
#                        wfi.sendLog('actor',"Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname)
                if not 'sites' in actions:
                    assign_to_sites = list(set([SI.SE_to_CE(site) for site in where_to_run[task]]))
                    print "Found",sorted(assign_to_sites),"as sites where to run the ACDC at, from the acdc doc of ",wfname
                print "Going to run at",sorted(assign_to_sites)
                if recover:
                    print "Initiating recovery"
                    acdc = singleRecovery(url, fulltaskname, wfi.request, actions, do = options.do)
                    if not acdc:
                        if options.do:
                            if recovering:
                                print wfname + " has been partially ACDC'ed. Needs manual attention."
                                sendLog('actor', "%s has had %s/%s recoveries %s only"%( wfname, len(recovering), num_tasks_to_recover, list(recovering)), level='critical')
                                wfi.sendLog('actor', "%s has had %s/%s recoveries %s only"%( wfname, len(recovering), num_tasks_to_recover, list(recovering)))
                                break
                            else:
                                print wfname + " failed recovery once"
                                recover = False
                                break
                        else:
                            print "no action to take further"
#                        sendLog('recoveror', "ACDC for %s can be done automatically"% wfname, level='critical')
                            continue

                    else: #ACDC was made correctly. Now we have to assign it.
                        wfi.sendLog('actor','ACDC created for task %s. Actions taken \n%s'%(fulltaskname,list(actions)))
                        team = wfi.request['Teams'][0]
                        parameters={
                        'SiteWhitelist' : sorted(assign_to_sites),
                        'AcquisitionEra' : wfi.acquisitionEra(),
                        'ProcessingString' :  wfi.processingString(),
                        'MergedLFNBase' : wfi.request['MergedLFNBase'],
                        'ProcessingVersion' : wfi.request['ProcessingVersion'],
                        }
                    ## hackery for ACDC merge assignment
                        if wfi.request['RequestType'] == 'TaskChain' and 'Merge' in task.split('/')[-1]:
                            parameters['AcquisitionEra'] = None
                            parameters['ProcessingString'] = None

                ## xrootd setttings on primary and secondary 
                        if 'xrootd' in actions:
                            if actions['xrootd'] == 'enabled':
                                print "Going to assign via xrootd"
                                parameters['TrustSitelists'] = True
                            elif actions['xrootd'] == 'disabled':
                                parameters['TrustSitelists'] = False
                        elif ('TrustSitelists' in wfi.request and wfi.request['TrustSitelists']=='true'):
                            parameters['TrustSitelists'] = True
                        else:
                            parameters['TrustSitelists'] = False

                        if 'TrustPUSitelists' in wfi.request and wfi.request['TrustPUSitelists']:
                            parameters['TrustPUSitelists'] = True

                        if options.ass:
                            print "really doing the assignment of the ACDC",acdc
                            parameters['execute']=True
                            wfi.sendLog('actor',"%s  was assigned for recovery"% acdc)
                        else:
                            print "no assignment done with this ACDC",acdc
                            sendLog('actor',"%s needs to be assigned"%(acdc), level='critical')
                            continue
 #                       print parameters
                        result = reqMgrClient.assignWorkflow(url, acdc, team, parameters)
                        if not result:
                            print acdc,"was not assigned"
                            sendLog('actor',"%s needs to be assigned"%(acdc), level='critical')
                        else:
                            recovering.add( acdc )
                        wfi.sendLog('actor',"ACDCs created for %s"%wfname)
        #===========================================================
        
        
        if recover and options.do:
            remove_action(wfname)

        if message_to_user:
            print wfname,"to be notified to user(DUMMY)",message_to_user

        if message_to_ops:
            print 'message'
            #sendEmail( "notification in recoveror" , message_to_ops, destination=['*****@*****.**'])
        #            sendLog('recoveror',message_to_ops,level='warning')



    return
コード例 #9
0
def actor(url, options=None):

    if moduleLock(wait=False, silent=True)(): return
    if userLock('actor'): return

    up = componentInfo(soft=['mcm'])
    if not up.check(): return

    # CI = campaignInfo()
    SI = siteInfo()
    UC = unifiedConfiguration()
    WC = wtcClient()

    action_list = WC.get_actions()
    if action_list is None:
        print "Not able to load action list"
        sendLog('actor', 'Not able to load action list', level='critical')
        return

    if options.actions:
        action_list = json.loads(open(options.actions).read())

    print json.dumps(action_list, indent=2)
    if not action_list:
        print "EMPTY!"
        return

    wf_list = action_list.keys()
    print json.dumps(sorted(wf_list), indent=2)
    if options.spec:
        wf_list = [wf for wf in wf_list if options.spec in wf]

    max_per_round = UC.get('max_per_round').get('actor', None)
    if max_per_round:
        random.shuffle(wf_list)
        wf_list = wf_list[:max_per_round]

    for wfname in wf_list:
        print '-' * 100
        print "Looking at", wfname, "for recovery options"

        to_clone = False
        to_acdc = False
        to_force = False
        to_hold = False
        something_to_do = False
        tasks = action_list[wfname].get('Parameters', None)
        to_acdc = action_list[wfname].get('Action', None) == 'acdc'
        to_clone = action_list[wfname].get('Action', None) == 'clone'
        to_force = action_list[wfname].get(
            'Action', None) == 'special' and action_list[wfname].get(
                'Parameters', {}).get('action', None) in ['by-pass', 'bypass']
        to_hold = action_list[wfname].get(
            'Action', None) == 'special' and action_list[wfname].get(
                'Parameters', {}).get('action', None) in ['onhold', 'on-hold']

        if not to_acdc and not to_clone and not to_force and not to_hold:
            sendLog(
                'actor',
                'Action submitted for something other than acdc, clone, bypass or hold for workflow %s'
                % wfname,
                level='critical')
            print json.dumps(action_list[wfname], indent=2)
            continue

        if not tasks and to_acdc:
            sendLog('actor',
                    'Empty action submitted for workflow %s' % wfname,
                    level='critical')
            print "Moving on. Parameters is blank for " + wfname
            continue

        wfi = workflowInfo(url, wfname)

        recover = True
        message_to_ops = ""
        message_to_user = ""

        #===========================================================
        if to_clone and options.do:
            print "Let's try kill and clone: "
            wfi.sendLog('actor', 'Going to clone %s' % wfname)
            results = []
            datasets = set(wfi.request['OutputDatasets'])

            comment = ""

            if 'comment' in tasks: comment = ", reason: " + tasks['comment']
            wfi.sendLog(
                'actor',
                "invalidating the workflow by traffic controller %s" % comment)

            #Reject all workflows in the family
            #first reject the original workflow.
            reqMgrClient.invalidateWorkflow(
                url,
                wfi.request['RequestName'],
                current_status=wfi.request['RequestStatus'],
                cascade=False)
            #Then reject any ACDCs associated with that workflow
            family = getWorkflowById(url, wfi.request['PrepID'], details=True)
            for fwl in family:
                print "rejecting", fwl['RequestName'], fwl['RequestStatus']
                wfi.sendLog(
                    'actor', "rejecting %s, previous status %s" %
                    (fwl['RequestName'], fwl['RequestStatus']))
                reqMgrClient.invalidateWorkflow(
                    url,
                    fwl['RequestName'],
                    current_status=fwl['RequestStatus'],
                    cascade=False)
                datasets.update(fwl['OutputDatasets'])
            #Invalidate all associated output datasets
            for dataset in datasets:
                results.append(setDatasetStatus(dataset, 'INVALID'))

            if all(map(lambda result: result in ['None', None, True],
                       results)):
                wfi.sendLog('actor', "%s and children are rejected" % wfname)

            cloned = None
            try:
                cloned = singleClone(url, wfname, tasks, comment, options.do)
            except Exception as e:
                sendLog(
                    'actor',
                    'Failed to create clone for %s! Check logs for more information. Action will need to be resubmitted.'
                    % wfname,
                    level='critical')
                wfi.sendLog('actor', 'Failed to create clone for %s!' % wfname)
                print str(e)
                ##let's not remove the action other the workflow goes to "trouble" and the WTC cannot set the action again
            if not cloned:
                recover = False
                wfi.sendLog('actor', 'Failed to create clone for %s!' % wfname)
                sendLog('actor',
                        'Failed to create clone for %s!' % wfname,
                        level='critical')

            else:
                wfi.sendLog('actor', "Workflow %s cloned" % wfname)

#===========================================================
        elif to_force:
            wfi.sendLog('actor',
                        'Bypassing from workflow traffic controler request')
            forcing = json.loads(
                open(
                    '/afs/cern.ch/user/v/vlimant/public/ops/forcecomplete.json'
                ).read())
            forcing.append(wfname)
            open('/afs/cern.ch/user/v/vlimant/public/ops/forcecomplete.json',
                 'w').write(json.dumps(sorted(set(forcing))))
        elif to_hold:
            wfi.sendLog('actor',
                        'Holding on workflow traffic controler request')
            holding = json.loads(
                open('/afs/cern.ch/user/v/vlimant/public/ops/onhold.json').
                read())
            holding.append(wfname)
            open('/afs/cern.ch/user/v/vlimant/public/ops/onhold.json',
                 'w').write(json.dumps(sorted(set(holding))))
#===========================================================
        elif to_acdc:
            if 'AllSteps' in tasks:
                allTasksDefaults = tasks['AllSteps']
                tasks.pop('AllSteps')
                for setting in allTasksDefaults:
                    for task in tasks:
                        if setting in tasks[task]:
                            tasks[task][setting] = allTasksDefaults[setting]
                        else:
                            tasks[task].append(
                                {setting: allTasksDefaults[setting]})
            print "Tasks is "
            print json.dumps(tasks, indent=2)

            all_tasks = wfi.getAllTasks()

            ## need a way to verify that this is the first round of ACDC, since the second round will have to be on the ACDC themselves

            try:
                WMErr = wfi.getWMErrors()
#               print WMErr
            except:
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because WMErr cannot be reached.'
                    % wfname,
                    level='critical')
                continue
            if not WMErr:
                wfi.sendLog('actor', 'WMErrors is blank for %s.' % wfname)
                print "FYI getWMErrors is blank. Presumably there are only unreported errors"
#                continue

            try:
                where_to_run, missing_to_run, missing_to_run_at = wfi.getRecoveryInfo(
                )
                print "Where to run = "
                print where_to_run
            except:
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because recovery info cannot be found.'
                    % wfname,
                    level='critical')
                print "Moving on. Cannot access recovery info for " + wfname
                continue
            if not where_to_run:
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because site list cannot be found.'
                    % wfname,
                    level='critical')
                print "Moving on. where to run is blank"
                continue

            message_to_ops = ""
            message_to_user = ""

            num_tasks_to_recover = 0

            if WMErr:
                for task in WMErr:
                    if 'LogCollect' in task: continue
                    if 'Cleanup' in task: continue
                    if not 'jobfailed' in WMErr[task]:
                        continue
                    else:
                        num_tasks_to_recover += 1
#                print "Task to recover: " + task

            if not num_tasks_to_recover:
                print "\tno error for", wfname
#            recover = False

            if 'LheInputFiles' in wfi.request and wfi.request['LheInputFiles']:
                ## we do not try to recover pLHE
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because it is a pLHE workflow.'
                    % wfname,
                    level='critical')
                print "We don't try to recover pLHE. Moving on."
                recover = False
        #            sendEmail('cannot submit action', '%s is a pLHE workflow. We do not try to recover pLHE'%wfname)

#        if wfi.request['RequestType'] in ['ReReco']:
#            recover= False
#            print 'cannot submit action. ReReco'
#   sendEmail('cannot submit action', '%s is request type ReReco'%wfname)

            recovering = set()
            for task in tasks:
                assign_to_sites = set()
                print "Task names is " + task
                fulltaskname = '/' + wfname + '/' + task
                #                print "Full task name is " + fulltaskname
                wrong_task = False
                for task_info in all_tasks:
                    if fulltaskname == task_info.pathName:
                        if task_info.taskType not in [
                                'Processing', 'Production', 'Merge'
                        ]:
                            wrong_task = True
                            wfi.sendLog(
                                'actor',
                                "Skipping task %s because the taskType is %s. Can only ACDC Processing, Production, or Merge tasks"
                                % (fulltaskname, task_info.taskType))
                if wrong_task:
                    continue
                print tasks[task]
                actions = tasks[task]
                for action in actions:
                    if action.startswith('sites'):
                        if type(actions[action]) != list:
                            assign_to_sites = [SI.SE_to_CE(actions[action])]
                        else:
                            assign_to_sites = list(
                                set([
                                    SI.SE_to_CE(site)
                                    for site in actions[action]
                                ]))
#                    if action.startswith('mem') and actions[action] != "" and actions[action] != 'Same' and wfi.request['RequestType'] in ['TaskChain']:
#                        recover = False;
#                        print  "Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname
#                        wfi.sendLog('actor',"Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname)
                if not 'sites' in actions:
                    assign_to_sites = list(
                        set([SI.SE_to_CE(site)
                             for site in where_to_run[task]]))
                    print "Found", sorted(
                        assign_to_sites
                    ), "as sites where to run the ACDC at, from the acdc doc of ", wfname
                print "Going to run at", sorted(assign_to_sites)
                if recover:
                    print "Initiating recovery"
                    acdc = singleRecovery(url,
                                          fulltaskname,
                                          wfi.request,
                                          actions,
                                          do=options.do)
                    if not acdc:
                        if options.do:
                            if recovering:
                                print wfname + " has been partially ACDC'ed. Needs manual attention."
                                sendLog(
                                    'actor',
                                    "%s has had %s/%s recoveries %s only" %
                                    (wfname, len(recovering),
                                     num_tasks_to_recover, list(recovering)),
                                    level='critical')
                                wfi.sendLog(
                                    'actor',
                                    "%s has had %s/%s recoveries %s only" %
                                    (wfname, len(recovering),
                                     num_tasks_to_recover, list(recovering)))
                                break
                            else:
                                print wfname + " failed recovery once"
                                recover = False
                                break
                        else:
                            print "no action to take further"
                            #                        sendLog('recoveror', "ACDC for %s can be done automatically"% wfname, level='critical')
                            continue

                    else:  #ACDC was made correctly. Now we have to assign it.
                        wfi.sendLog(
                            'actor',
                            'ACDC created for task %s. Actions taken \n%s' %
                            (fulltaskname, json.dumps(actions)))
                        #team = wfi.request['Teams'][0]
                        team = 'production'
                        parameters = {
                            'SiteWhitelist': sorted(assign_to_sites),
                            'AcquisitionEra': wfi.acquisitionEra(),
                            'ProcessingString': wfi.processingString(),
                            'MergedLFNBase': wfi.request['MergedLFNBase'],
                            'ProcessingVersion':
                            wfi.request['ProcessingVersion'],
                        }
                        ## hackery for ACDC merge assignment
                        if wfi.request[
                                'RequestType'] == 'TaskChain' and 'Merge' in task.split(
                                    '/')[-1]:
                            parameters['AcquisitionEra'] = None
                            parameters['ProcessingString'] = None

                ## xrootd setttings on primary and secondary
                        if 'xrootd' in actions:
                            if actions['xrootd'] == 'enabled':
                                print "Going to assign via xrootd"
                                parameters['TrustSitelists'] = True
                            elif actions['xrootd'] == 'disabled':
                                parameters['TrustSitelists'] = False
                        elif ('TrustSitelists' in wfi.request
                              and wfi.request['TrustSitelists'] == 'true'):
                            parameters['TrustSitelists'] = True
                        else:
                            parameters['TrustSitelists'] = False

                        if 'secondary' in actions:
                            if actions['secondary'] == 'enabled':
                                print 'Enabling reading the secondary input via xrootd'
                                parameters['TrustPUSitelists'] = True
                            elif actions['secondary'] == 'disabled':
                                parameters['TrustPUSitelists'] = False
                            #in case secondary is blank or not set to enabled or disabled
                            elif 'TrustPUSitelists' in wfi.request and wfi.request[
                                    'TrustPUSitelists']:
                                parameters['TrustPUSitelists'] = True
                        elif 'TrustPUSitelists' in wfi.request and wfi.request[
                                'TrustPUSitelists']:
                            parameters['TrustPUSitelists'] = True

                        if options.ass:
                            print "really doing the assignment of the ACDC", acdc
                            parameters['execute'] = True
                            wfi.sendLog('actor',
                                        "%s  was assigned for recovery" % acdc)
                        else:
                            print "no assignment done with this ACDC", acdc
                            sendLog('actor',
                                    "%s needs to be assigned" % (acdc),
                                    level='critical')
                            continue

#                       print parameters
                        result = reqMgrClient.assignWorkflow(
                            url, acdc, team, parameters)
                        if not result:
                            print acdc, "was not assigned"
                            sendLog('actor',
                                    "%s needs to be assigned" % (acdc),
                                    level='critical')
                        else:
                            recovering.add(acdc)
                        wfi.sendLog('actor', "ACDCs created for %s" % wfname)
        #===========================================================

        if recover and options.do:
            r = WC.remove_action(wfname)
            if not r:
                sendLog(
                    'actor',
                    'not able to remove the action, interlocking the module',
                    level='critical')
                os.system('touch %s/actor.failed-%s.lock' %
                          (base_eos_dir, os.getpid()))
                sys.exit(-1)

        if message_to_user:
            print wfname, "to be notified to user(DUMMY)", message_to_user

        if message_to_ops:
            print 'message'
            #sendEmail( "notification in recoveror" , message_to_ops, destination=['*****@*****.**'])
        #            sendLog('recoveror',message_to_ops,level='warning')

    return
コード例 #10
0
    logs= filter(lambda change : change["Status"]==status, wf['RequestTransition'])
    if logs:
        return logs[-1]['UpdateTime']
    else:
        return None

delays={'assignment-approved' : (7,14),
        'new':(7,14),
        'completed':(14,21),
        'closed-out':(14,21),
        }

warnings=defaultdict(set)
for checkin,(warn,timeout) in delays.items():
    wfs = getWorkflows(url, checkin, user=None, details=True)
    for wf in wfs:
        if not 'backfill' in wf['RequestName'].lower(): continue
        transition = transitiontime(wf,checkin)
        if transition and (now - transition)>(timeout*24*60*60):
            ## that can go away
            print wf['RequestName'],"is old enough to be removed",wf['RequestStatus']
            reqMgrClient.invalidateWorkflow(url, wf['RequestName'], current_status=wf['RequestStatus'])
        elif transition and (now - transition)>(warn*24*60*60):
            ## warn requester
            print wf['RequestName'],"is old enough to be removed",wf['RequestStatus']
            warnings[wf['Requestor']].add( wf['RequestName'] )

for who in warnings:
    sendEmail('Old Backfill in the system','The following backfill should be removed or moved to rejected/announced\n\n%s'%('\n'.join(sorted(warnings[who]))), destination=[who+'@cern.ch'])

コード例 #11
0
def invalidator(url, invalid_status='INVALID'):
    use_mcm = True
    up = componentInfo(mcm=use_mcm)
    if not up.check(): return
    mcm = McMClient(dev=False)

    invalids = mcm.getA('invalidations', query='status=announced')
    print len(invalids), "Object to be invalidated"
    text_to_batch = defaultdict(str)
    text_to_request = defaultdict(str)
    for invalid in invalids:
        acknowledge = False
        pid = invalid['prepid']
        batch_lookup = invalid['prepid']
        text = ""
        if invalid['type'] == 'request':
            wfn = invalid['object']
            print "need to invalidate the workflow", wfn
            wfo = session.query(Workflow).filter(Workflow.name == wfn).first()
            if wfo:
                ## set forget of that thing (although checkor will recover from it)
                print "setting the status of", wfo.status, "to forget"
                wfo.status = 'forget'
                session.commit()
            else:
                ## do not go on like this, do not acknoledge it
                print wfn, "is set to be rejected, but we do not know about it yet"
                #continue
            wfi = workflowInfo(url, wfn)
            success = "not rejected"
            ## to do, we should find a way to reject the workflow and any related acdc
            success = reqMgrClient.invalidateWorkflow(
                url, wfn, current_status=wfi.request['RequestStatus'])
            ## need to find the whole familly and reject the whole gang
            familly = getWorkflowById(url, wfi.request['PrepID'], details=True)
            for fwl in familly:
                if fwl['RequestDate'] < wfi.request['RequestDate']: continue
                if fwl['RequestType'] != 'Resubmission': continue
                print "rejecting", fwl['RequestName']
                success = reqMgrClient.invalidateWorkflow(
                    url,
                    fwl['RequestName'],
                    current_status=fwl['RequestStatus'])

            print success
            acknowledge = True
            text = "The workflow %s (%s) was rejected due to invalidation in McM" % (
                wfn, pid)
            batch_lookup = wfn  ##so that the batch id is taken as the one containing the workflow name
        elif invalid['type'] == 'dataset':
            dataset = invalid['object']

            if '?' in dataset: continue
            if 'None' in dataset: continue
            if 'None-' in dataset: continue
            if 'FAKE-' in dataset: continue

            print "setting", dataset, "to", invalid_status
            success = setDatasetStatus(dataset, invalid_status)
            if success:
                acknowledge = True
                text = "The dataset %s (%s) was set INVALID due to invalidation in McM" % (
                    dataset, pid)
            else:
                print "invalidation of", dataset, "did not go so well"
        else:
            print "\t\t", invalid['type'], " type not recognized"

        if acknowledge:
            ## acknoldge invalidation in mcm, provided we can have the api
            print "acknowledgment to mcm"
            mcm.get('/restapi/invalidations/acknowledge/%s' % (invalid['_id']))
            # prepare the text for batches
            batches = []
            batches.extend(
                mcm.getA('batches', query='contains=%s' % batch_lookup))
            batches = filter(
                lambda b: b['status'] in ['announced', 'done', 'reset'],
                batches)
            if len(batches):
                bid = batches[-1]['prepid']
                print "batch nofication to", bid
                text_to_batch[bid] += text + "\n\n"
            # prepare the text for requests
            text_to_request[pid] += text + "\n\n"

    for bid, text in text_to_batch.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/batches/notify', {"notes": text, "prepid": bid})
        pass
    for pid, text in text_to_request.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/requests/notify', {
            "message": text,
            "prepids": [pid]
        })
コード例 #12
0
ファイル: rejector.py プロジェクト: menglu21/WmAgentScripts
def rejector(url, specific, options=None):

    up = componentInfo()

    if specific and specific.startswith('/'):
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend(
                session.query(Workflow).filter(
                    Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
    else:
        return

    print len(wfs), "to reject"

    if len(wfs) > 1:
        print "\n".join([wfo.name for wfo in wfs])
        answer = raw_input('Reject these')
        if not answer.lower() in ['y', 'yes']:
            return

    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject", spec
            return
        results = []
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(
            url, wfo.name, current_status=wfi.request['RequestStatus'])

        wfi.sendLog('rejector',
                    'invalidating the workflow by unified operator')
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById(url, wfi.request['PrepID'], details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType'] != 'Resubmission': continue
            if 'OriginalRequestName' in fwl and fwl[
                    'OriginalRequestName'] != wfi.request['RequestName']:
                continue
            print "rejecting", fwl['RequestName']
            reqMgrClient.invalidateWorkflow(
                url, fwl['RequestName'], current_status=fwl['RequestStatus'])
            datasets.update(fwl['OutputDatasets'])

        for dataset in datasets:
            if options.keep:
                print "keeping", dataset, "in its current status"
            else:
                results.append(setDatasetStatus(dataset, 'INVALID'))

        #if wfi.request['RequestStatus'] in ['rejected','rejected-archived','aborted','aborted-archived']:
        #    print 'already',wfi.request['RequestStatus']
        #    if not options.clone:
        #        wfo.status = 'forget'
        #        session.commit()
        #        continue

        if all(map(lambda result: result in ['None', None, True], results)):
            wfo.status = 'forget'
            session.commit()
            print wfo.name, "and", datasets, "are rejected"
            if options and options.clone:
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(
                        schema['ProcessingVersion']
                    ) + 1  ## dubious str->int conversion
                else:
                    schema['ProcessingVersion'] = 2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    schema['Memory'] = options.Memory
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup'] = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int, options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] = options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1, ntask - 1):
                        schema['Task%d' % it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask - 1
                    schema.pop('Task%d' % ntask)

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                for p in [
                        'RequestStatus',
                        'RequestTransition',
                        'RequestorDN',
                        'RequestWorkflow',
                        'OutputDatasets',
                        'ReqMgr2Only',
                        #'Group',
                        'RequestDate',
                        #'ConfigCacheUrl',
                        'RequestName',
                        'timeStamp',
                        'SoftwareVersions',
                        'CouchURL'
                ]:
                    if p in schema:
                        schema.pop(p)
                        #pass
                print "submitting"
                print json.dumps(schema, indent=2)
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning", wfo.name
                    print json.dumps(schema, indent=2)
                    return
                print newWorkflow
                #m = re.search("details\/(.*)\'",response)
                #if not m:
                #    print "error in cloning",wfo.name
                #    print response
                #    print json.dumps( schema, indent=2 )
                #    return
                #newWorkflow = m.group(1)

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfo.status = 'trouble'
                session.commit()
                wfi.sendLog(
                    'rejector',
                    'Cloned into %s by unified operator' % (newWorkflow))
        else:
            print "error in rejecting", wfo.name, results
コード例 #13
0
def rejector(url, specific, options=None):

    up = componentInfo()

    if specific and specific.startswith('/'):
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend( session.query(Workflow).filter(Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
    else:
        return 

    print len(wfs),"to reject"

    if len(wfs)>1:
        print "\n".join( [wfo.name for wfo in wfs] )
        answer = raw_input('Reject these')
        if not answer.lower() in ['y','yes']:
            return
        
    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        results=[]
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(url, wfo.name, current_status=wfi.request['RequestStatus'])

        wfi.sendLog('rejector','invalidating the workflow by unified operator')
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById( url, wfi.request['PrepID'] , details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType']!='Resubmission': continue
            if 'OriginalRequestName' in fwl and fwl['OriginalRequestName'] != wfi.request['RequestName']: continue
            print "rejecting",fwl['RequestName']
            reqMgrClient.invalidateWorkflow(url, fwl['RequestName'], current_status=fwl['RequestStatus'])
            datasets.update( fwl['OutputDatasets'] )

        for dataset in datasets:
            if options.keep:
                print "keeping",dataset,"in its current status"
            else:
                results.append( setDatasetStatus(dataset, 'INVALID') )

        #if wfi.request['RequestStatus'] in ['rejected','rejected-archived','aborted','aborted-archived']:
        #    print 'already',wfi.request['RequestStatus']
        #    if not options.clone:
        #        wfo.status = 'forget'
        #        session.commit()
        #        continue

        if all(map(lambda result : result in ['None',None,True],results)):
            wfo.status = 'forget'
            session.commit()
            print wfo.name,"and",datasets,"are rejected"
            if options and options.clone:
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(schema['ProcessingVersion'])+1 ## dubious str->int conversion
                else:
                    schema['ProcessingVersion']=2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    schema['Memory'] = options.Memory
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup']  = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int,options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] =options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1,ntask-1):
                        schema['Task%d'%it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask-1
                    schema.pop('Task%d'%ntask)

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                for p in ['RequestStatus',
                          'RequestTransition',
                          'RequestorDN',
                          'RequestWorkflow',
                          'OutputDatasets',
                          'ReqMgr2Only',
                          #'Group',
                          'RequestDate',
                          #'ConfigCacheUrl',
                          'RequestName',
                          'timeStamp',
                          'SoftwareVersions',
                          'CouchURL'
                          ]:
                    if p in schema:
                        schema.pop( p )
                        #pass
                print "submitting"
                print json.dumps( schema, indent=2 )
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning",wfo.name
                    print json.dumps( schema, indent=2 )
                    return 
                print newWorkflow
                #m = re.search("details\/(.*)\'",response)
                #if not m:
                #    print "error in cloning",wfo.name
                #    print response
                #    print json.dumps( schema, indent=2 )
                #    return 
                #newWorkflow = m.group(1)

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfo.status = 'trouble'
                session.commit()
                wfi.sendLog('rejector','Cloned into %s by unified operator'%( newWorkflow ))
        else:
            print "error in rejecting",wfo.name,results
コード例 #14
0
def rejector(url, specific, options=None):

    up = componentInfo(soft=['wtc'])
    if not up.check(): return

    if specific and specific.startswith('/'):
        ## this is for a dataset
        print setDatasetStatus(specific, 'INVALID')
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend(
                session.query(Workflow).filter(
                    Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
        if not wfs:
            batches = json.loads(open('batches.json').read())
            for bname in batches:
                if specific == bname:
                    for wf in batches[bname]:
                        wfs.append(
                            session.query(Workflow).filter(
                                Workflow.name == wf).first())
    else:
        wfs = session.query(Workflow).filter(
            Workflow.status == 'assistance-clone').all()
        #wfs.extend( session.query(Workflow).filter(Workflow.status == 'assistance-reject').all())
        ## be careful then on clone case by case
        options.clone = True
        print "not supposed to function yet"
        return

    print len(wfs), "to reject"

    if len(wfs) > 1:
        print "\n".join([wfo.name for wfo in wfs])
        answer = raw_input('Reject these')
        if not answer.lower() in ['y', 'yes']:
            return

    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject", spec
            return
        results = []
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(
            url, wfo.name, current_status=wfi.request['RequestStatus'])

        comment = ""
        if options.comments: comment = ", reason: " + options.comments
        wfi.sendLog(
            'rejector',
            'invalidating the workflow by unified operator%s' % comment)
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById(url, wfi.request['PrepID'], details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType'] != 'Resubmission': continue
            ## does not work on second order acd
            #if 'OriginalRequestName' in fwl and fwl['OriginalRequestName'] != wfi.request['RequestName']: continue
            print "rejecting", fwl['RequestName']
            reqMgrClient.invalidateWorkflow(
                url,
                fwl['RequestName'],
                current_status=fwl['RequestStatus'],
                cascade=False)
            datasets.update(fwl['OutputDatasets'])

        for dataset in datasets:
            if options.keep:
                print "keeping", dataset, "in its current status"
            else:
                results.append(setDatasetStatus(dataset, 'INVALID'))
                pass

        if all(map(lambda result: result in ['None', None, True], results)):
            print wfo.name, "and", datasets, "are rejected"
            if options and options.clone:
                wfo.status = 'trouble'
                session.commit()
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(
                        schema['ProcessingVersion']
                    ) + 1  ## dubious str->int conversion
                else:
                    schema['ProcessingVersion'] = 2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    if schema['RequestType'] == 'TaskChain':
                        it = 1
                        while True:
                            t = 'Task%d' % it
                            it += 1
                            if t in schema:
                                schema[t]['Memory'] = options.Memory
                            else:
                                break
                    else:
                        schema['Memory'] = options.Memory

                if options.Multicore:
                    ## to do : set it properly in taskchains
                    if schema['RequestType'] == 'TaskChain':
                        tasks, set_to = options.Multicore.split(
                            ':') if ':' in options.Multicore else (
                                "", options.Multicore)
                        set_to = int(set_to)
                        tasks = tasks.split(',') if tasks else ['Task1']
                        it = 1
                        while True:
                            tt = 'Task%d' % it
                            it += 1
                            if tt in schema:
                                tname = schema[tt]['TaskName']
                                if tname in tasks or tt in tasks:
                                    mem = schema[tt]['Memory']
                                    mcore = schema[tt].get('Multicore', 1)
                                    factor = (set_to / float(mcore))
                                    fraction_constant = 0.4
                                    mem_per_core_c = int(
                                        (1 - fraction_constant) * mem /
                                        float(mcore))
                                    print "mem per core", mem_per_core_c
                                    print "base mem", mem
                                    ## adjusting the parameter in the clone
                                    schema[tt]['Memory'] += (
                                        set_to - mcore) * mem_per_core_c
                                    schema[tt]['Multicore'] = set_to
                                    schema[tt]['TimePerEvent'] /= factor
                            else:
                                break
                    else:
                        schema['Multicore'] = options.Multicore
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup'] = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int, options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] = options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1, ntask - 1):
                        schema['Task%d' % it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask - 1
                    schema.pop('Task%d' % ntask)

                if options.priority:
                    schema['RequestPriority'] = options.priority

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                schema = reqMgrClient.purgeClonedSchema(schema)

                print "submitting"
                if (options.to_stepchain
                        and (schema['RequestType'] == 'TaskChain')):
                    ## transform the schema into StepChain schema
                    print "Transforming a TaskChain into a StepChain"
                    schema['RequestType'] = 'StepChain'
                    schema['StepChain'] = schema.pop('TaskChain')
                    schema['SizePerEvent'] = 0
                    schema['TimePerEvent'] = 0
                    step = 1
                    s_n = {}
                    while True:
                        if 'Task%d' % step in schema:
                            schema['Step%d' % step] = schema.pop('Task%d' %
                                                                 step)
                            schema['Step%d' % step]['StepName'] = schema[
                                'Step%d' % step].pop('TaskName')
                            s_n[schema['Step%d' %
                                       step]['StepName']] = 'Step%d' % step
                            if 'InputTask' in schema['Step%d' % step]:
                                schema['Step%d' % step]['InputStep'] = schema[
                                    'Step%d' % step].pop('InputTask')
                            eff = 1.
                            up_s = 'Step%d' % step
                            while True:
                                ## climb up a step. supposedely already all converted
                                up_s = s_n.get(
                                    schema[up_s].get('InputStep', None), None)
                                if up_s:
                                    ## multiply with the efficiency
                                    eff *= schema[up_s].get(
                                        'FilterEfficiency', 1.)
                                else:
                                    ## or stop there
                                    break

                            if not 'KeepOutput' in schema['Step%d' % step]:
                                ## this is a weird translation capability. Absence of keepoutput in step means : keep the output. while in TaskChain absence means : drop
                                schema['Step%d' % step]['KeepOutput'] = False
                            schema['TimePerEvent'] += eff * schema[
                                'Step%d' % step].pop('TimePerEvent')
                            schema['SizePerEvent'] += eff * schema[
                                'Step%d' % step].pop('SizePerEvent')
                            step += 1
                        else:
                            break

                print json.dumps(schema, indent=2)
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning", wfo.name
                    print json.dumps(schema, indent=2)
                    return
                print newWorkflow

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfi.sendLog(
                    'rejector', 'Cloned into %s by unified operator %s' %
                    (newWorkflow, comment))
                wfi.notifyRequestor('Cloned into %s by unified operator %s' %
                                    (newWorkflow, comment),
                                    do_batch=False)
            else:
                wfo.status = 'trouble' if options.set_trouble else 'forget'
                wfi.notifyRequestor('Rejected by unified operator %s' %
                                    (comment),
                                    do_batch=False)
                session.commit()

        else:
            print "error in rejecting", wfo.name, results
コード例 #15
0
ファイル: reject.py プロジェクト: CMSCompOps/WmAgentScripts
def main():
    usage = (
        "\n       python %prog [options] [WORKFLOW_NAME]\n"
        "WORKFLOW_NAME: if the list file is provided this should be empty\n"
    )

    parser = OptionParser(usage=usage)
    parser.add_option("-f", "--file", help="Text file of workflows to Reject and Clone", dest="file")
    parser.add_option(
        "-c",
        "--clone",
        help="Are the workflows going to be cloned? The default value is False",
        action="store_true",
        dest="clone",
        default=False,
    )
    parser.add_option(
        "-i",
        "--invalidate",
        help="Invalidate datasets? The default value is False",
        action="store_true",
        dest="invalidate",
        default=False,
    )
    parser.add_option(
        "-u",
        "--user",
        dest="user",
        help="The user for creating the clone, if empty it will use the OS user running the script",
    )
    parser.add_option(
        "-g",
        "--group",
        dest="group",
        default="DATAOPS",
        help="The group for creating the clone, if empty it will, use 'DATAOPS' by default",
    )
    parser.add_option(
        "-m",
        "--memory",
        dest="memory",
        help="Set max memory for the clone. At assignment, this will be used to calculate maxRSS = memory*1024",
    )

    (options, args) = parser.parse_args()

    # Check the arguments, get info from them
    if options.file:
        try:
            workflows = [l.strip() for l in open(options.file) if l.strip()]
        except:
            parser.error("Provide a valid file of workflows")
            sys.exit(1)
    elif len(args) > 0:
        # name of workflow
        workflows = [args[0]]
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    if not options.user:
        # get os username by default
        uinfo = pwd.getpwuid(os.getuid())
        user = uinfo.pw_name
    else:
        user = options.user

    for workflow in workflows:
        try:
            workflowInfo = reqMgrClient.Workflow(workflow)
        except:
            print("The workflow name: " + workflow + " is  not valid.")
            continue
        # invalidates workflow
        print("Invalidating the workflow: " + workflow)
        reqMgrClient.invalidateWorkflow(url, workflow, workflowInfo.status)

        # invalidates datasets
        if options.invalidate:
            print("Invalidating datasets")
            datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
            for dataset in datasets:
                print(dataset)
                dbs3.setDatasetStatus(dataset, "INVALID", files=True)

        # clones workflow
        if options.clone:
            print("Cloning workflow: " + workflow)
            if options.memory:
                mem = float(options.memory)
            else:
                mem = workflowInfo.info["Memory"]
            cloned = resubmit.cloneWorkflow(workflow, user, options.group, memory=mem)
    sys.exit(0)