Python setDatasetStatus Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: setDatasetStatus

Examples at hotexamples.com: 21

Python setDatasetStatus - 21 examples found. These are the top rated real world Python examples of utils.setDatasetStatus extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: rejector.py Project: julianbadillo/WmAgentScripts

def rejector(url, specific, options=None):

    up = componentInfo()

    if specific.startswith('/'):
        pass
    else:
        wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        results=[]
        wfi = workflowInfo(url, wfo.name)
        reqMgrClient.invalidateWorkflow(url, wfo.name, current_status=wfi.request['RequestStatus'])
        #if wfi.request['RequestStatus'] in ['assignment-approved','new','completed']:
        #    #results.append( reqMgrClient.rejectWorkflow(url, wfo.name))
        #    reqMgrClient.rejectWorkflow(url, wfo.name)
        #else:
        #    #results.append( reqMgrClient.abortWorkflow(url, wfo.name))
        #    reqMgrClient.abortWorkflow(url, wfo.name)
        
        datasets = wfi.request['OutputDatasets']
        for dataset in datasets:
            if options.keep:
                print "keeping",dataset,"in its current status"
            else:
                results.append( setDatasetStatus(dataset, 'INVALID') )

        if all(map(lambda result : result in ['None',None,True],results)):
            wfo.status = 'forget'
            session.commit()
            print wfo.name,"and",datasets,"are rejected"
            if options and options.clone:
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion']+=1
                else:
                    schema['ProcessingVersion']=2
                ##schema.pop('RequestDate') ## ok then, let's not reset the time stamp
                if options.Memory:
                    schema['Memory'] = options.Memory
                response = reqMgrClient.submitWorkflow(url, schema)
                m = re.search("details\/(.*)\'",response)
                if not m:
                    print "error in cloning",wfo.name
                    print response
                    return 
                newWorkflow = m.group(1)
                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfo.status = 'trouble'
                session.commit()
        else:
            print "error in rejecting",wfo.name,results

Example #2

Show file

File: closor.py Project: menglu21/WmAgentScripts

def closor(url, specific=None):
    if userLock(): return
    if duplicateLock(): return
    if not componentInfo().check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')
    ## manually closed-out workflows should get to close with checkor
    if specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status == 'close').all()

    held = set()

    max_per_round = UC.get('max_per_round').get('closor', None)
    random.shuffle(wfs)
    if max_per_round: wfs = wfs[:max_per_round]

    for wfo in wfs:

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis
        wfi = workflowInfo(url, wfo.name)
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.request['RequestStatus'] in ['announced', 'normal-archived']:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog(
                'closor',
                '%s is announced already : %s' % (wfo.name, wfo.wm_status))
        session.commit()

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name, "has not been assigned yet, or the database is corrupted"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda: False)
        #print outputs
        if len(outputs):
            print wfo.name, wfi.request['RequestStatus']
        for out in outputs:
            event_count, lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(
                Output.datasetname == out).first()
            if not odb:
                print "adding an output object", out
                odb = Output(datasetname=out)
                odb.workflow = wfo
                session.add(odb)
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()

            wfi.sendLog(
                'closor', "\t%60s %d/%d = %3.2f%%" %
                (out, lumi_count, expected_lumis,
                 lumi_count / float(expected_lumis) * 100.))
            #print wfo.fraction_for_closing, lumi_count, expected_lumis
            #fraction = wfo.fraction_for_closing
            #fraction = 0.0
            #all_OK.append((float(lumi_count) > float(expected_lumis*fraction)))
            all_OK[out] = True

        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence(url, out)
            where = [site for site, info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out, "is in full at", ",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites'] + wfi.request[
                    'CustodialSites']
                wfi.sendLog(
                    'closor', "%s is not in full anywhere. send to %s" %
                    (out, ",".join(sorted(going_to))))
                at_destination = dict([(k, v) for (k, v) in presence.items()
                                       if k in going_to])
                else_where = dict([(k, v) for (k, v) in presence.items()
                                   if not k in going_to])
                print json.dumps(at_destination)
                print json.dumps(else_where, indent=2)
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to=there)
                    for l in late_info:
                        l.update({"workflow": wfo.name, "dataset": out})
                    all_late_files.extend(late_info)
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

        ## verify if we have to do harvesting

        (OK, requests) = spawn_harvesting(url, wfi, in_full)
        all_OK.update(OK)

        ## only that status can let me go into announced
        if all(all_OK.values()) and wfi.request['RequestStatus'] in [
                'closed-out'
        ]:
            print wfo.name, "to be announced"
            results = []  #'dummy']
            if not results:
                for out in outputs:
                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                        tier = out.split('/')[-1]
                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request[
                                    'Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[
                                campaign] and tier in CI.campaigns[campaign][
                                    'toDDM']:
                            to_DDM = True
                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM") + UC.get(
                                "tiers_to_DDM"):
                            print "tier", tier, "neither TO or NO DDM for", out
                            results.append('Not recognitized tier %s' % tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog(
                                'closor',
                                "could not recognize %s for injecting in DDM" %
                                out,
                                level='critical')
                            continue

                        n_copies = 2
                        destinations = []
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[
                                campaign]:
                            ddm_instructions = CI.campaigns[campaign][
                                'DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier, indication in ddm_instructions.items(
                                ):
                                    if ddmtier == tier or ddmtier in [
                                            '*', 'all'
                                    ]:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']

                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination=" + ",".join(
                                destinations)
                        ## inject to DDM when necessary
                        if to_DDM:
                            #print "Sending",out," to DDM"
                            p = os.popen(
                                'python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'
                                % (n_copies, out, destination_spec))
                            print p.read()
                            status = p.close()
                            if status != None:
                                print "Failed DDM, retrying a second time"
                                p = os.popen(
                                    'python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'
                                    % (n_copies, out, destination_spec))
                                print p.read()
                                status = p.close()
                                if status != None:
                                    #sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                                    sendLog('closor',
                                            "could not add " + out +
                                            " to DDM pool. check closor logs.",
                                            level='critical')
                            results.append(status)
                            if status == None:
                                wfi.sendLog(
                                    'closor',
                                    '%s is send to AnalysisOps DDM pool in %s copies %s'
                                    % (n_copies, out, destination_spec))

                    else:
                        print wfo.name, "no stats for announcing", out
                        results.append('No Stats')

                if all(
                        map(lambda result: result in ['None', None, True],
                            results)):
                    ## only announce if all previous are fine
                    res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                    if not res in ['None', None]:
                        ## check the status again, it might well have toggled
                        wl_bis = workflowInfo(url, wfo.name)
                        wfo.wm_status = wl_bis.request['RequestStatus']
                        session.commit()
                        if wl_bis.request['RequestStatus'] in [
                                'announced', 'normal-archived'
                        ]:
                            res = None
                        else:
                            ## retry ?
                            res = reqMgrClient.announceWorkflowCascade(
                                url, wfo.name)

                    results.append(res)

            #print results
            if all(map(lambda result: result in ['None', None, True],
                       results)):
                wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor', "workflow is announced")
            else:
                print "ERROR with ", wfo.name, "to be announced", json.dumps(
                    results)

        else:
            print wfo.name, "not good for announcing:", wfi.request[
                'RequestStatus']
            wfi.sendLog('closor', "cannot be announced")
            held.add(wfo.name)

    days_late = 0.
    retries_late = 10

    really_late_files = [
        info for info in all_late_files if info['retries'] >= retries_late
    ]
    really_late_files = [
        info for info in really_late_files
        if info['delay'] / (60 * 60 * 24.) >= days_late
    ]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s' % (
            len(really_late_files), days_late, retries_late,
            json.dumps(really_late_files, indent=2))
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor', subject)
        print subject
        open('%s/stuck_files.json' % monitor_dir,
             'w').write(json.dumps(really_late_files, indent=2))

    if held:
        #sendEmail("held from announcing","the workflows below are held up, please check the logs https://cmst2.web.cern.ch/cmst2/unified/logs/closor/last.log \n%s"%("\n".join( held )))
        sendLog('closor',
                "the workflows below are held up \n%s" % ("\n".join(held)),
                level='critical')

Example #3

Show file

File: closor.py Project: areinsvo/WmAgentScripts

def closor(url, specific=None):
    if not componentInfo().check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()
    #LI = lockInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')
    ## manually closed-out workflows should get to close with checkor
    if specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status=='close').all()

    held = set()

    for wfo in wfs:

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis 
        wfi = workflowInfo(url, wfo.name )
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.request['RequestStatus'] in  ['announced','normal-archived']:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog('closor','%s is announced already : %s'%( wfo.name,wfo.wm_status))
        session.commit()


        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda : False)
        #print outputs
        if len(outputs): 
            print wfo.name,wfi.request['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(Output.datasetname==out).first()
            if not odb:
                print "adding an output object",out
                odb = Output( datasetname = out )
                odb.workflow = wfo
                session.add( odb )
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()

            wfi.sendLog('closor',"\t%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,lumi_count/float(expected_lumis)*100.))
            #print wfo.fraction_for_closing, lumi_count, expected_lumis
            #fraction = wfo.fraction_for_closing
            #fraction = 0.0
            #all_OK.append((float(lumi_count) > float(expected_lumis*fraction)))
            all_OK[out] = True 


        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence( url, out )
            where = [site for site,info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out,"is in full at",",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
                wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
                at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
                else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
                print json.dumps( at_destination )
                print json.dumps( else_where, indent=2 )
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to = there )
                    for l in late_info:
                        l.update({"workflow":wfo.name,"dataset":out})
                    all_late_files.extend( late_info )
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

    
        ## verify if we have to do harvesting

        (OK, requests) = spawn_harvesting(url, wfi, in_full)
        all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and wfi.request['RequestStatus'] in ['closed-out']:
            print wfo.name,"to be announced"
            results=[]#'dummy']
            if not results:
                for out in outputs:
                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                        tier = out.split('/')[-1]
                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request['Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True
                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM")+UC.get("tiers_to_DDM"):
                            print "tier",tier,"neither TO or NO DDM for",out
                            results.append('Not recognitized tier %s'%tier)
                            sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            continue

                        n_copies = 2
                        destinations=[]
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[campaign]:
                            ddm_instructions = CI.campaigns[campaign]['DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier,indication in ddm_instructions.items():
                                    if ddmtier==tier or ddmtier in ['*','all']:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']
                                            
                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination="+",".join( destinations )
                        ## inject to DDM when necessary
                        if to_DDM:
                            #print "Sending",out," to DDM"
                            p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'%(n_copies, out,destination_spec))
                            print p.read()
                            status = p.close()
                            if status!=None:
                                print "Failed DDM, retrying a second time"
                                p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'%(n_copies, out,destination_spec))
                                print p.read()
                                status = p.close()    
                                if status!=None:
                                    sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                            results.append( status )
                            if status == None:
                                wfi.sendLog('closor','%s is send to AnalysisOps DDM pool in %s copies %s'%( n_copies, out,destination_spec))
                                                            
                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    ## only announce if all previous are fine
                    res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                    if not res in ['None',None]:
                        ## check the status again, it might well have toggled
                        wl_bis = workflowInfo(url, wfo.name)
                        wfo.wm_status = wl_bis.request['RequestStatus']
                        session.commit()
                        if wl_bis.request['RequestStatus'] in  ['announced','normal-archived']:
                            res = None
                        else:
                            ## retry ?
                            res = reqMgrClient.announceWorkflowCascade(url, wfo.name) 
                            
                    results.append( res )
                                
            #print results
            if all(map(lambda result : result in ['None',None,True],results)):
                wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor',"workflow is announced")
            else:
                print "ERROR with ",wfo.name,"to be announced",json.dumps( results )
                
        else:
            print wfo.name,"not good for announcing:",wfi.request['RequestStatus']
            wfi.sendLog('closor',"cannot be announced")
            held.add( wfo.name )

    days_late = 0.
    retries_late = 10

    really_late_files = [info for info in all_late_files if info['retries']>=retries_late]
    really_late_files = [info for info in really_late_files if info['delay']/(60*60*24.)>=days_late]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s'%(len(really_late_files), days_late, retries_late, json.dumps( really_late_files , indent=2) )
        sendEmail('waiting for files to announce', subject)
        sendLog('closor',subject)
        print subject
        open('%s/stuck_files.json'%monitor_dir,'w').write( json.dumps( really_late_files , indent=2))

    if held:
        sendEmail("held from announcing","the workflows below are held up, please check the logs https://cmst2.web.cern.ch/cmst2/unified/logs/closor/last.log \n%s"%("\n".join( held )))

Example #4

Show file

File: actor.py Project: SvenjaPflitsch/WmAgentScripts

def actor(url, options=None):

    if moduleLock(wait=False, silent=True)(): return
    if userLock('actor'): return

    up = componentInfo(soft=['mcm'])
    if not up.check(): return

    # CI = campaignInfo()
    SI = siteInfo()
    UC = unifiedConfiguration()
    WC = wtcClient()

    action_list = WC.get_actions()
    if action_list is None:
        print "Not able to load action list"
        sendLog('actor', 'Not able to load action list', level='critical')
        return

    if options.actions:
        action_list = json.loads(open(options.actions).read())

    print json.dumps(action_list, indent=2)
    if not action_list:
        print "EMPTY!"
        return

    wf_list = action_list.keys()
    print json.dumps(sorted(wf_list), indent=2)
    if options.spec:
        wf_list = [wf for wf in wf_list if options.spec in wf]

    max_per_round = UC.get('max_per_round').get('actor', None)
    if max_per_round:
        random.shuffle(wf_list)
        wf_list = wf_list[:max_per_round]

    for wfname in wf_list:
        print '-' * 100
        print "Looking at", wfname, "for recovery options"

        to_clone = False
        to_acdc = False
        to_force = False
        to_hold = False
        something_to_do = False
        tasks = action_list[wfname].get('Parameters', None)
        to_acdc = action_list[wfname].get('Action', None) == 'acdc'
        to_clone = action_list[wfname].get('Action', None) == 'clone'
        to_force = action_list[wfname].get(
            'Action', None) == 'special' and action_list[wfname].get(
                'Parameters', {}).get('action', None) in ['by-pass', 'bypass']
        to_hold = action_list[wfname].get(
            'Action', None) == 'special' and action_list[wfname].get(
                'Parameters', {}).get('action', None) in ['onhold', 'on-hold']

        if not to_acdc and not to_clone and not to_force and not to_hold:
            sendLog(
                'actor',
                'Action submitted for something other than acdc, clone, bypass or hold for workflow %s'
                % wfname,
                level='critical')
            print json.dumps(action_list[wfname], indent=2)
            continue

        if not tasks and to_acdc:
            sendLog('actor',
                    'Empty action submitted for workflow %s' % wfname,
                    level='critical')
            print "Moving on. Parameters is blank for " + wfname
            continue

        wfi = workflowInfo(url, wfname)

        recover = True
        message_to_ops = ""
        message_to_user = ""

        #===========================================================
        if to_clone and options.do:
            print "Let's try kill and clone: "
            wfi.sendLog('actor', 'Going to clone %s' % wfname)
            results = []
            datasets = set(wfi.request['OutputDatasets'])

            comment = ""

            if 'comment' in tasks: comment = ", reason: " + tasks['comment']
            wfi.sendLog(
                'actor',
                "invalidating the workflow by traffic controller %s" % comment)

            #Reject all workflows in the family
            #first reject the original workflow.
            reqMgrClient.invalidateWorkflow(
                url,
                wfi.request['RequestName'],
                current_status=wfi.request['RequestStatus'],
                cascade=False)
            #Then reject any ACDCs associated with that workflow
            family = getWorkflowById(url, wfi.request['PrepID'], details=True)
            for fwl in family:
                print "rejecting", fwl['RequestName'], fwl['RequestStatus']
                wfi.sendLog(
                    'actor', "rejecting %s, previous status %s" %
                    (fwl['RequestName'], fwl['RequestStatus']))
                reqMgrClient.invalidateWorkflow(
                    url,
                    fwl['RequestName'],
                    current_status=fwl['RequestStatus'],
                    cascade=False)
                datasets.update(fwl['OutputDatasets'])
            #Invalidate all associated output datasets
            for dataset in datasets:
                results.append(setDatasetStatus(dataset, 'INVALID'))

            if all(map(lambda result: result in ['None', None, True],
                       results)):
                wfi.sendLog('actor', "%s and children are rejected" % wfname)

            cloned = None
            try:
                cloned = singleClone(url, wfname, tasks, comment, options.do)
            except Exception as e:
                sendLog(
                    'actor',
                    'Failed to create clone for %s! Check logs for more information. Action will need to be resubmitted.'
                    % wfname,
                    level='critical')
                wfi.sendLog('actor', 'Failed to create clone for %s!' % wfname)
                print str(e)
                ##let's not remove the action other the workflow goes to "trouble" and the WTC cannot set the action again
            if not cloned:
                recover = False
                wfi.sendLog('actor', 'Failed to create clone for %s!' % wfname)
                sendLog('actor',
                        'Failed to create clone for %s!' % wfname,
                        level='critical')

            else:
                wfi.sendLog('actor', "Workflow %s cloned" % wfname)

#===========================================================
        elif to_force:
            wfi.sendLog('actor',
                        'Bypassing from workflow traffic controler request')
            forcing = json.loads(
                open(
                    '/afs/cern.ch/user/v/vlimant/public/ops/forcecomplete.json'
                ).read())
            forcing.append(wfname)
            open('/afs/cern.ch/user/v/vlimant/public/ops/forcecomplete.json',
                 'w').write(json.dumps(sorted(set(forcing))))
        elif to_hold:
            wfi.sendLog('actor',
                        'Holding on workflow traffic controler request')
            holding = json.loads(
                open('/afs/cern.ch/user/v/vlimant/public/ops/onhold.json').
                read())
            holding.append(wfname)
            open('/afs/cern.ch/user/v/vlimant/public/ops/onhold.json',
                 'w').write(json.dumps(sorted(set(holding))))
#===========================================================
        elif to_acdc:
            if 'AllSteps' in tasks:
                allTasksDefaults = tasks['AllSteps']
                tasks.pop('AllSteps')
                for setting in allTasksDefaults:
                    for task in tasks:
                        if setting in tasks[task]:
                            tasks[task][setting] = allTasksDefaults[setting]
                        else:
                            tasks[task].append(
                                {setting: allTasksDefaults[setting]})
            print "Tasks is "
            print json.dumps(tasks, indent=2)

            all_tasks = wfi.getAllTasks()

            ## need a way to verify that this is the first round of ACDC, since the second round will have to be on the ACDC themselves

            try:
                WMErr = wfi.getWMErrors()
#               print WMErr
            except:
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because WMErr cannot be reached.'
                    % wfname,
                    level='critical')
                continue
            if not WMErr:
                wfi.sendLog('actor', 'WMErrors is blank for %s.' % wfname)
                print "FYI getWMErrors is blank. Presumably there are only unreported errors"
#                continue

            try:
                where_to_run, missing_to_run, missing_to_run_at = wfi.getRecoveryInfo(
                )
                print "Where to run = "
                print where_to_run
            except:
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because recovery info cannot be found.'
                    % wfname,
                    level='critical')
                print "Moving on. Cannot access recovery info for " + wfname
                continue
            if not where_to_run:
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because site list cannot be found.'
                    % wfname,
                    level='critical')
                print "Moving on. where to run is blank"
                continue

            message_to_ops = ""
            message_to_user = ""

            num_tasks_to_recover = 0

            if WMErr:
                for task in WMErr:
                    if 'LogCollect' in task: continue
                    if 'Cleanup' in task: continue
                    if not 'jobfailed' in WMErr[task]:
                        continue
                    else:
                        num_tasks_to_recover += 1
#                print "Task to recover: " + task

            if not num_tasks_to_recover:
                print "\tno error for", wfname
#            recover = False

            if 'LheInputFiles' in wfi.request and wfi.request['LheInputFiles']:
                ## we do not try to recover pLHE
                sendLog(
                    'actor',
                    'Cannot create ACDCS for %s because it is a pLHE workflow.'
                    % wfname,
                    level='critical')
                print "We don't try to recover pLHE. Moving on."
                recover = False
        #            sendEmail('cannot submit action', '%s is a pLHE workflow. We do not try to recover pLHE'%wfname)

#        if wfi.request['RequestType'] in ['ReReco']:
#            recover= False
#            print 'cannot submit action. ReReco'
#   sendEmail('cannot submit action', '%s is request type ReReco'%wfname)

            recovering = set()
            for task in tasks:
                assign_to_sites = set()
                print "Task names is " + task
                fulltaskname = '/' + wfname + '/' + task
                #                print "Full task name is " + fulltaskname
                wrong_task = False
                for task_info in all_tasks:
                    if fulltaskname == task_info.pathName:
                        if task_info.taskType not in [
                                'Processing', 'Production', 'Merge'
                        ]:
                            wrong_task = True
                            wfi.sendLog(
                                'actor',
                                "Skipping task %s because the taskType is %s. Can only ACDC Processing, Production, or Merge tasks"
                                % (fulltaskname, task_info.taskType))
                if wrong_task:
                    continue
                print tasks[task]
                actions = tasks[task]
                for action in actions:
                    if action.startswith('sites'):
                        if type(actions[action]) != list:
                            assign_to_sites = [SI.SE_to_CE(actions[action])]
                        else:
                            assign_to_sites = list(
                                set([
                                    SI.SE_to_CE(site)
                                    for site in actions[action]
                                ]))
#                    if action.startswith('mem') and actions[action] != "" and actions[action] != 'Same' and wfi.request['RequestType'] in ['TaskChain']:
#                        recover = False;
#                        print  "Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname
#                        wfi.sendLog('actor',"Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname)
                if not 'sites' in actions:
                    assign_to_sites = list(
                        set([SI.SE_to_CE(site)
                             for site in where_to_run[task]]))
                    print "Found", sorted(
                        assign_to_sites
                    ), "as sites where to run the ACDC at, from the acdc doc of ", wfname
                print "Going to run at", sorted(assign_to_sites)
                if recover:
                    print "Initiating recovery"
                    acdc = singleRecovery(url,
                                          fulltaskname,
                                          wfi.request,
                                          actions,
                                          do=options.do)
                    if not acdc:
                        if options.do:
                            if recovering:
                                print wfname + " has been partially ACDC'ed. Needs manual attention."
                                sendLog(
                                    'actor',
                                    "%s has had %s/%s recoveries %s only" %
                                    (wfname, len(recovering),
                                     num_tasks_to_recover, list(recovering)),
                                    level='critical')
                                wfi.sendLog(
                                    'actor',
                                    "%s has had %s/%s recoveries %s only" %
                                    (wfname, len(recovering),
                                     num_tasks_to_recover, list(recovering)))
                                break
                            else:
                                print wfname + " failed recovery once"
                                recover = False
                                break
                        else:
                            print "no action to take further"
                            #                        sendLog('recoveror', "ACDC for %s can be done automatically"% wfname, level='critical')
                            continue

                    else:  #ACDC was made correctly. Now we have to assign it.
                        wfi.sendLog(
                            'actor',
                            'ACDC created for task %s. Actions taken \n%s' %
                            (fulltaskname, json.dumps(actions)))
                        #team = wfi.request['Teams'][0]
                        team = 'production'
                        parameters = {
                            'SiteWhitelist': sorted(assign_to_sites),
                            'AcquisitionEra': wfi.acquisitionEra(),
                            'ProcessingString': wfi.processingString(),
                            'MergedLFNBase': wfi.request['MergedLFNBase'],
                            'ProcessingVersion':
                            wfi.request['ProcessingVersion'],
                        }
                        ## hackery for ACDC merge assignment
                        if wfi.request[
                                'RequestType'] == 'TaskChain' and 'Merge' in task.split(
                                    '/')[-1]:
                            parameters['AcquisitionEra'] = None
                            parameters['ProcessingString'] = None

                ## xrootd setttings on primary and secondary
                        if 'xrootd' in actions:
                            if actions['xrootd'] == 'enabled':
                                print "Going to assign via xrootd"
                                parameters['TrustSitelists'] = True
                            elif actions['xrootd'] == 'disabled':
                                parameters['TrustSitelists'] = False
                        elif ('TrustSitelists' in wfi.request
                              and wfi.request['TrustSitelists'] == 'true'):
                            parameters['TrustSitelists'] = True
                        else:
                            parameters['TrustSitelists'] = False

                        if 'secondary' in actions:
                            if actions['secondary'] == 'enabled':
                                print 'Enabling reading the secondary input via xrootd'
                                parameters['TrustPUSitelists'] = True
                            elif actions['secondary'] == 'disabled':
                                parameters['TrustPUSitelists'] = False
                            #in case secondary is blank or not set to enabled or disabled
                            elif 'TrustPUSitelists' in wfi.request and wfi.request[
                                    'TrustPUSitelists']:
                                parameters['TrustPUSitelists'] = True
                        elif 'TrustPUSitelists' in wfi.request and wfi.request[
                                'TrustPUSitelists']:
                            parameters['TrustPUSitelists'] = True

                        if options.ass:
                            print "really doing the assignment of the ACDC", acdc
                            parameters['execute'] = True
                            wfi.sendLog('actor',
                                        "%s  was assigned for recovery" % acdc)
                        else:
                            print "no assignment done with this ACDC", acdc
                            sendLog('actor',
                                    "%s needs to be assigned" % (acdc),
                                    level='critical')
                            continue

#                       print parameters
                        result = reqMgrClient.assignWorkflow(
                            url, acdc, team, parameters)
                        if not result:
                            print acdc, "was not assigned"
                            sendLog('actor',
                                    "%s needs to be assigned" % (acdc),
                                    level='critical')
                        else:
                            recovering.add(acdc)
                        wfi.sendLog('actor', "ACDCs created for %s" % wfname)
        #===========================================================

        if recover and options.do:
            r = WC.remove_action(wfname)
            if not r:
                sendLog(
                    'actor',
                    'not able to remove the action, interlocking the module',
                    level='critical')
                os.system('touch %s/actor.failed-%s.lock' %
                          (base_eos_dir, os.getpid()))
                sys.exit(-1)

        if message_to_user:
            print wfname, "to be notified to user(DUMMY)", message_to_user

        if message_to_ops:
            print 'message'
            #sendEmail( "notification in recoveror" , message_to_ops, destination=['*****@*****.**'])
        #            sendLog('recoveror',message_to_ops,level='warning')

    return

Example #5

Show file

File: invalidator.py Project: CMSCompOps/WmAgentScripts

def invalidator(url, invalid_status='INVALID'):
    use_mcm = True
    up = componentInfo(soft=['wtc','jira'])
    if not up.check(): return
    mcm = McMClient(dev=False)

    invalids = mcm.getA('invalidations',query='status=announced')
    if not invalids: return

    print len(invalids),"Object to be invalidated"
    text_to_batch = defaultdict(str)
    text_to_request = defaultdict(str)
    for invalid in invalids:
        acknowledge= False
        pid = invalid['prepid']
        batch_lookup = invalid['prepid']
        text = ""
        if invalid['type'] == 'request':
            wfn = invalid['object']
            print "need to invalidate the workflow",wfn
            wfo = session.query(Workflow).filter(Workflow.name == wfn).first()
            if wfo:
                ## set forget of that thing (although checkor will recover from it)
                print "setting the status of",wfo.status,"to forget"
                wfo.status = 'forget'
                session.commit()
            else:
                ## do not go on like this, do not acknoledge it
                print wfn,"is set to be rejected, but we do not know about it yet"
                #continue
            wfi = workflowInfo(url, wfn)
            success = "not rejected"
            ## to do, we should find a way to reject the workflow and any related acdc
            successes = invalidate(url, wfi, only_resub=True, with_output=False)
            wfi.sendLog('invalidator',"rejection is performed from McM invalidations request")
            acknowledge= all(successes)

            text = "The workflow %s (%s) was rejected due to invalidation in McM" % ( wfn, pid )
            batch_lookup = wfn ##so that the batch id is taken as the one containing the workflow name
        elif invalid['type'] == 'dataset':
            dataset = invalid['object']

            if '?' in dataset: continue
            if 'None' in dataset: continue
            if 'None-' in dataset: continue
            if 'FAKE-' in dataset: continue

            print "setting",dataset,"to",invalid_status
            success = setDatasetStatus(dataset , invalid_status )
            if success:
                acknowledge= True
                text = "The dataset %s (%s) was set INVALID due to invalidation in McM" % ( dataset, pid )
            else:
                msg = "Could not invalidate {}. Please consider contacting data management team for manual intervention.".format(dataset)
                print(msg)
                sendLog('invalidator', msg, level='critical')
        else:
            print "\t\t",invalid['type']," type not recognized"

        if acknowledge:
            ## acknoldge invalidation in mcm, provided we can have the api
            print "acknowledgment to mcm"
            ackno_url = '/restapi/invalidations/acknowledge/%s'%( invalid['_id'] )
            print "at",ackno_url
            mcm.get(ackno_url)
            # prepare the text for batches
            batches = []
            batches.extend(mcm.getA('batches',query='contains=%s'%batch_lookup))
            batches = filter(lambda b : b['status'] in ['announced','done','reset'], batches)
            if len(batches):
                bid = batches[-1]['prepid']
                print "batch nofication to",bid
                text_to_batch[bid] += text+"\n\n"
            # prepare the text for requests
            text_to_request[pid] += text+"\n\n"

    for bid,text in text_to_batch.items():    
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/batches/notify',{ "notes" : text, "prepid" : bid})
        pass
    for pid,text in text_to_request.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/requests/notify',{ "message" : text, "prepids" : [pid]})

Example #6

Show file

File: remainor.py Project: CMSCompOps/WmAgentScripts

    def run(self):
        site = self.site
        print "checking on site",site
        si = self.SI
        UC = self.UC
        RDI = self.RDI
        options = self.options
        locks = self.locks
        waiting = self.waiting
        stuck = self.stuck
        missing = self.missing
        remainings = {}
        
        ds = si.getRemainingDatasets(si.CE_to_SE(site))
        #print len(ds)
        taken_size=0.
        sum_waiting=0.
        sum_stuck=0.
        sum_missing=0.
        sum_unlocked=0.
        n_ds = options.ndatasets
        i_ds = 0
        ds_threads = []
        for i_ds,(size,dataset) in enumerate(ds):
            if n_ds and i_ds>=n_ds: break
            remainings[dataset] = {"size" : size, "reasons": []}
            #print "-"*10
            if not dataset in locks:
                #print dataset,"is not locked"
                sum_unlocked += size
                remainings[dataset]["reasons"].append('unlock')
            else:
                remainings[dataset]["reasons"].append('lock')
            if dataset in waiting:
                #print dataset,"is waiting for custodial"
                sum_waiting+=size
                remainings[dataset]["reasons"].append('tape')

            if dataset in stuck:
                sum_stuck+=size
                remainings[dataset]["reasons"].append('stuck-tape')
            if dataset in missing:
                sum_missing +=size
                remainings[dataset]["reasons"].append('missing-tape')

            ds_threads.append( DatasetCheckBuster( dataset = dataset,
                                                   url = url))

        
        run_threads = ThreadHandler( threads = ds_threads,
                                     label = '%s Dataset Threads'%site,
                                     n_threads = 10 ,
                                     start_wait = 0,
                                     timeout = None,
                                     verbose=True)
        ## start and sync
        run_threads.run()
        #run_threads.start()
        #while run_threads.is_alive():
        #    time.sleep(10)        

        for t in run_threads.threads:
            remainings[t.dataset]["reasons"].extend( t.reasons )
            remainings[t.dataset]["reasons"].sort()
            print t.dataset,remainings[t.dataset]["reasons"]

        #print "\t",sum_waiting,"[GB] could be freed by custodial"
        print "\t",sum_unlocked,"[GB] is not locked by unified"

        print "updating database with remaining datasets"
        RDI.set(site, remainings)
        try:
            eosFile('%s/remaining_%s.json'%(monitor_dir,site),'w').write( json.dumps( remainings , indent=2)).close()
        except:
            pass

        ld = remainings.items()
        ld.sort( key = lambda i:i[1]['size'], reverse=True)
        table = "<html>Updated %s GMT, <a href=remaining_%s.json>json data</a><br>"%(time.asctime(time.gmtime()),site)

        accumulate = defaultdict(lambda : defaultdict(float))
        for item in remainings:
            tier = item.split('/')[-1]

            for reason in remainings[item]['reasons']:
                accumulate[reason][tier] += remainings[item]['size']
        table += "<table border=1></thead><tr><th>Reason</th><th>size [TB]</th></thead>"
        for reason in accumulate:
            s=0
            table += "<tr><td>%s</td><td><ul>"% reason
            subitems = accumulate[reason].items()
            subitems.sort(key = lambda i:i[1], reverse=True)

            for tier,ss in subitems:
                table += "<li> %s : %10.3f</li>"%( tier, ss/1024.)
                s+=  ss/1024.
            table+="</ul>total : %.3f</td>"%s

        table += "</table>\n"
        table += "<table border=1></thead><tr><th>Dataset</th><th>Size [GB]</th><th>Label</th></tr></thead>\n"
        only_unlock = set()
        for item in ld:
            ds_name = item[0]
            reasons = item[1]['reasons']
            sub_url = '<a href="https://cmsweb.cern.ch/das/request?input=%s">%s</a>'%(ds_name, ds_name)
            if 'unlock' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/phedex/datasvc/xml/prod/subscriptions?block=%s%%23*&node=%s">block</a>'%(ds_name, site)
            if 'unlock' in reasons or 'input' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?inputdataset=%s&mask=RequestName&mask=RequestStatus">input</a>'%(ds_name)
            if 'unlock' in reasons or 'output' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s&mask=RequestName&mask=RequestStatus">output</a>'%(ds_name)
            if 'pilup' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?mc_pileup=%s&mask=RequestName&mask=RequestStatus">secondary</a>'%(ds_name)                
            table+="<tr><td>%s</td><td>%d</td><td><ul>%s</ul></td></tr>\n"%( sub_url, item[1]['size'], "<li>".join([""]+reasons))
            if reasons==['unlock']:
                only_unlock.add(item[0])
        table+="</table></html>"
        eosFile('%s/remaining_%s.html'%(monitor_dir,site),'w').write( table ).close()

        print "checking on unlock only datasets"
        to_ddm = UC.get('tiers_to_DDM')
        #look_at = list(only_unlock)
        look_at = list(only_unlock)[:20]
        #look_at = list([ds for ds in only_unlock if not ds.endswith('NANOAODSIM')])
        for item in look_at:
            tier = item.split('/')[-1]
            ds_status = getDatasetStatus(item)
            print item,ds_status
            if ds_status == 'PRODUCTION':
                print item,"is found",ds_status,"and unklocked on",site
                if options.invalidate_anything_left_production_once_unlocked:
                    print "Setting status to invalid for",item
                    setDatasetStatus(item, 'INVALID')
            if tier in to_ddm:
                print item,"looks like analysis and still dataops on",site
                if options.change_dataops_subs_to_anaops_once_unlocked:
                    print "Sending",item,"to anaops"
                    allCompleteToAnaOps(url, item)

Example #7

Show file

File: invalidator.py Project: dabercro/WmAgentScripts

def invalidator(url, invalid_status='INVALID'):
    use_mcm = True
    up = componentInfo(mcm=use_mcm)
    if not up.check(): return
    mcm = McMClient(dev=False)

    invalids = mcm.getA('invalidations',query='status=announced')
    print len(invalids),"Object to be invalidated"
    text_to_batch = defaultdict(str)
    text_to_request = defaultdict(str)
    for invalid in invalids:
        acknowledge= False
        pid = invalid['prepid']
        batch_lookup = invalid['prepid']
        text = ""
        if invalid['type'] == 'request':
            wfn = invalid['object']
            print "need to invalidate the workflow",wfn
            wfo = session.query(Workflow).filter(Workflow.name == wfn).first()
            if wfo:
                ## set forget of that thing (although checkor will recover from it)
                print "setting the status of",wfo.status,"to forget"
                wfo.status = 'forget'
                session.commit()
            else:
                ## do not go on like this, do not acknoledge it
                print wfn,"is set to be rejected, but we do not know about it yet"
                #continue
            wfi = workflowInfo(url, wfn)
            success = "not rejected"
            ## to do, we should find a way to reject the workflow and any related acdc
            success = reqMgrClient.invalidateWorkflow(url, wfn, current_status = wfi.request['RequestStatus'])
            ## need to find the whole familly and reject the whole gang
            familly = getWorkflowById( url, wfi.request['PrepID'] , details=True)
            for fwl in familly:
                ## take out all acdc
                if fwl['RequestDate'] < wfi.request['RequestDate']:continue
                if fwl['RequestType']!='Resubmission': continue
                print "rejecting",fwl['RequestName']
                success = reqMgrClient.invalidateWorkflow(url, fwl['RequestName'], current_status=fwl['RequestStatus'])
                print success
            wfi.sendLog('invalidator',"rejection is performed from McM invalidations request")
            acknowledge= True
            text = "The workflow %s (%s) was rejected due to invalidation in McM" % ( wfn, pid )
            batch_lookup = wfn ##so that the batch id is taken as the one containing the workflow name
        elif invalid['type'] == 'dataset':
            dataset = invalid['object']

            if '?' in dataset: continue
            if 'None' in dataset: continue
            if 'None-' in dataset: continue
            if 'FAKE-' in dataset: continue

            print "setting",dataset,"to",invalid_status
            success = setDatasetStatus(dataset , invalid_status )
            if success:
                acknowledge= True
                text = "The dataset %s (%s) was set INVALID due to invalidation in McM" % ( dataset, pid )
            else:
                print "invalidation of",dataset,"did not go so well"
        else:
            print "\t\t",invalid['type']," type not recognized"

        if acknowledge:
            ## acknoldge invalidation in mcm, provided we can have the api
            print "acknowledgment to mcm"
            mcm.get('/restapi/invalidations/acknowledge/%s'%( invalid['_id'] ))
            # prepare the text for batches
            batches = []
            batches.extend(mcm.getA('batches',query='contains=%s'%batch_lookup))
            batches = filter(lambda b : b['status'] in ['announced','done','reset'], batches)
            if len(batches):
                bid = batches[-1]['prepid']
                print "batch nofication to",bid
                text_to_batch[bid] += text+"\n\n"
            # prepare the text for requests
            text_to_request[pid] += text+"\n\n"

    for bid,text in text_to_batch.items():    
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/batches/notify',{ "notes" : text, "prepid" : bid})
        pass
    for pid,text in text_to_request.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/requests/notify',{ "message" : text, "prepids" : [pid]})

Example #8

Show file

File: rejector.py Project: dabercro/WmAgentScripts

def rejector(url, specific, options=None):

    #use_mcm = True
    #up = componentInfo(mcm=use_mcm, soft=['mcm'])
    up = componentInfo()
    if not up.check(): return
    #use_mcm = up.status['mcm']
    #mcm = McMClient(dev=False) if use_mcm else None

    if specific and specific.startswith('/'):
        ## this is for a dataset
        print setDatasetStatus(specific, 'INVALID')
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend( session.query(Workflow).filter(Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status == 'assistance-clone').all()
        #wfs.extend( session.query(Workflow).filter(Workflow.status == 'assistance-reject').all())
        ## be careful then on clone case by case
        options.clone = True
        print "not supposed to function yet"
        return 

    print len(wfs),"to reject"

    if len(wfs)>1:
        print "\n".join( [wfo.name for wfo in wfs] )
        answer = raw_input('Reject these')
        if not answer.lower() in ['y','yes']:
            return
        
    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        results=[]
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(url, wfo.name, current_status=wfi.request['RequestStatus'])

        comment=""
        if options.comments: comment = ", reason: "+options.comments

        wfi.sendLog('rejector','invalidating the workflow by unified operator%s'%comment)
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById( url, wfi.request['PrepID'] , details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType']!='Resubmission': continue
            ## does not work on second order acd
            if 'OriginalRequestName' in fwl and fwl['OriginalRequestName'] != wfi.request['RequestName']: continue
            print "rejecting",fwl['RequestName']
            reqMgrClient.invalidateWorkflow(url, fwl['RequestName'], current_status=fwl['RequestStatus'], cascade=False)
            datasets.update( fwl['OutputDatasets'] )

        for dataset in datasets:
            if options.keep:
                print "keeping",dataset,"in its current status"
            else:
                results.append( setDatasetStatus(dataset, 'INVALID') )
                pass


        if all(map(lambda result : result in ['None',None,True],results)):
            print wfo.name,"and",datasets,"are rejected"
            if options and options.clone:
                wfo.status = 'trouble'
                session.commit()                
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(schema['ProcessingVersion'])+1 ## dubious str->int conversion
                else:
                    schema['ProcessingVersion']=2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    if schema['RequestType'] == 'TaskChain':
                        it=1
                        while True:
                            t = 'Task%d'%it
                            it+=1
                            if t in schema:
                                schema[t]['Memory'] = options.Memory
                            else:
                                break
                    else:
                        schema['Memory'] = options.Memory
                        
                if options.Multicore:
                    ## to do : set it properly in taslchains
                    schema['Multicore'] = options.Multicore

                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup']  = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int,options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] =options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1,ntask-1):
                        schema['Task%d'%it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask-1
                    schema.pop('Task%d'%ntask)

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                paramBlacklist = ['BlockCloseMaxEvents', 'BlockCloseMaxFiles', 'BlockCloseMaxSize', 'BlockCloseMaxWaitTime',
                                  'CouchWorkloadDBName', 'CustodialGroup', 'CustodialSubType', 'Dashboard',
                                  'GracePeriod', 'HardTimeout', 'InitialPriority', 'inputMode', 'MaxMergeEvents', 'MaxMergeSize',
                                  'MaxRSS', 'MaxVSize', 'MinMergeSize', 'NonCustodialGroup', 'NonCustodialSubType',
                                  'OutputDatasets', 'ReqMgr2Only', 'RequestDate' 'RequestorDN', 'RequestName', 'RequestStatus',
                                  'RequestTransition', 'RequestWorkflow', 'SiteWhitelist', 'SoftTimeout', 'SoftwareVersions',
                                  'SubscriptionPriority', 'Team', 'timeStamp', 'TrustSitelists', 'TrustPUSitelists',
                                  'TotalEstimatedJobs', 'TotalInputEvents', 'TotalInputLumis', 'TotalInputFiles']
                for p in paramBlacklist:
                    if p in schema:
                        schema.pop( p )
                        #pass
                print "submitting"
                if (options.to_stepchain and (schema['RequestType']=='TaskChain')):
                    ## transform the schema into StepChain schema
                    print "Transforming a TaskChain into a StepChain"
                    schema['RequestType'] = 'StepChain'
                    schema['StepChain'] = schema.pop('TaskChain')
                    step=1
                    while True:
                        if 'Task%d'%step in schema:
                            schema['Step%d'%step] = schema.pop('Task%d'%step)
                            schema['Step%d'%step]['StepName'] = schema['Step%d'%step].pop('TaskName')
                            if 'InputTask' in schema['Step%d'%step]:
                                schema['Step%d'%step]['InputStep'] = schema['Step%d'%step].pop('InputTask')
                            if not 'KeepOutput' in schema['Step%d'%step]:
                                ## this is a weird translation capability. Absence of keepoutput in step means : keep the output. while in TaskChain absence means : drop
                                schema['Step%d'%step]['KeepOutput'] = False
                            step+=1
                        else:
                            break


                print json.dumps( schema, indent=2 )
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning",wfo.name
                    print json.dumps( schema, indent=2 )
                    return 
                print newWorkflow

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfi.sendLog('rejector','Cloned into %s by unified operator %s'%( newWorkflow, comment ))
                wfi.notifyRequestor('Cloned into %s by unified operator %s'%( newWorkflow, comment ),do_batch=False)
            else:
                wfo.status = 'forget'
                wfi.notifyRequestor('Rejected by unified operator %s'%( comment ),do_batch=False)
                session.commit()

        else:
            print "error in rejecting",wfo.name,results

Example #9

Show file

File: rejector.py Project: AndrewLevin/WmAgentScripts

def rejector(url, specific, options=None):

    up = componentInfo()

    if specific and specific.startswith('/'):
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend( session.query(Workflow).filter(Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
    else:
        return 

    print len(wfs),"to reject"

    if len(wfs)>1:
        print "\n".join( [wfo.name for wfo in wfs] )
        answer = raw_input('Reject these')
        if not answer.lower() in ['y','yes']:
            return
        
    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        results=[]
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(url, wfo.name, current_status=wfi.request['RequestStatus'])

        wfi.sendLog('rejector','invalidating the workflow by unified operator')
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById( url, wfi.request['PrepID'] , details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType']!='Resubmission': continue
            if 'OriginalRequestName' in fwl and fwl['OriginalRequestName'] != wfi.request['RequestName']: continue
            print "rejecting",fwl['RequestName']
            reqMgrClient.invalidateWorkflow(url, fwl['RequestName'], current_status=fwl['RequestStatus'])
            datasets.update( fwl['OutputDatasets'] )

        for dataset in datasets:
            if options.keep:
                print "keeping",dataset,"in its current status"
            else:
                results.append( setDatasetStatus(dataset, 'INVALID') )

        #if wfi.request['RequestStatus'] in ['rejected','rejected-archived','aborted','aborted-archived']:
        #    print 'already',wfi.request['RequestStatus']
        #    if not options.clone:
        #        wfo.status = 'forget'
        #        session.commit()
        #        continue

        if all(map(lambda result : result in ['None',None,True],results)):
            wfo.status = 'forget'
            session.commit()
            print wfo.name,"and",datasets,"are rejected"
            if options and options.clone:
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(schema['ProcessingVersion'])+1 ## dubious str->int conversion
                else:
                    schema['ProcessingVersion']=2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    schema['Memory'] = options.Memory
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup']  = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int,options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] =options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1,ntask-1):
                        schema['Task%d'%it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask-1
                    schema.pop('Task%d'%ntask)

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                for p in ['RequestStatus',
                          'RequestTransition',
                          'RequestorDN',
                          'RequestWorkflow',
                          'OutputDatasets',
                          'ReqMgr2Only',
                          #'Group',
                          'RequestDate',
                          #'ConfigCacheUrl',
                          'RequestName',
                          'timeStamp',
                          'SoftwareVersions',
                          'CouchURL'
                          ]:
                    if p in schema:
                        schema.pop( p )
                        #pass
                print "submitting"
                print json.dumps( schema, indent=2 )
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning",wfo.name
                    print json.dumps( schema, indent=2 )
                    return 
                print newWorkflow
                #m = re.search("details\/(.*)\'",response)
                #if not m:
                #    print "error in cloning",wfo.name
                #    print response
                #    print json.dumps( schema, indent=2 )
                #    return 
                #newWorkflow = m.group(1)

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfo.status = 'trouble'
                session.commit()
                wfi.sendLog('rejector','Cloned into %s by unified operator'%( newWorkflow ))
        else:
            print "error in rejecting",wfo.name,results

Example #10

Show file

def closor(url, specific=None, options=None):
    if userLock(): return
    if duplicateLock(): return
    if not componentInfo().check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

    jump_the_line = options.announce if options else False
    if jump_the_line:
        print "announce option is on. Checking on things on-going ready to be announced"
        wfs = session.query(Workflow).filter(
            Workflow.status.contains('announce')).filter(
                sqlalchemy.not_(Workflow.status.contains('announced'))).all()
    else:
        print "regular option. Checking on things done and to be announced"
        wfs = session.query(Workflow).filter(Workflow.status == 'close').all()

    wfs_n = [w.name for w in wfs]
    print "unique names?"
    print len(set(wfs_n)) == len(wfs_n)

    held = set()

    print len(wfs), "closing"
    random.shuffle(wfs)
    max_per_round = UC.get('max_per_round').get('closor', None)
    if options.limit: max_per_round = options.limit

    if max_per_round:
        ## order them by priority
        all_closedout = sorted(getWorkflows(url, 'closed-out', details=True),
                               key=lambda r: r['RequestPriority'])
        all_closedout = [r['RequestName'] for r in all_closedout]

        def rank(wfn):
            return all_closedout.index(wfn) if wfn in all_closedout else 0

        wfs = sorted(wfs, key=lambda wfo: rank(wfo.name), reverse=True)
        wfs = wfs[:max_per_round]

    batch_go = {}
    batch_warnings = defaultdict(set)
    batch_goodness = UC.get("batch_goodness")

    for iwfo, wfo in enumerate(wfs):

        if specific and not specific in wfo.name: continue

        print "Progress [%d/%d]" % (iwfo, len(wfs))
        ## what is the expected #lumis
        wfi = workflowInfo(url, wfo.name)
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url,
                                                   batch_name,
                                                   details=True)
                batch_go[batch_name] = all(
                    map(
                        lambda s: not s in [
                            'completed', 'running-open', 'running-closed',
                            'acquired', 'assigned', 'assignment-approved'
                        ], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog(
                    'closor',
                    'Cannot close for now because the batch <a href=https://dmytro.web.cern.ch/dmytro/cmsprodmon/workflows.php?campaign=%s>%s</a> is not all close'
                    % (batch_name, batch_name))
                continue

        if wfi.request['RequestStatus'] in ['announced', 'normal-archived'
                                            ] and not options.force:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog(
                'closor',
                '%s is announced already : %s' % (wfo.name, wfo.wm_status))
        session.commit()

        if jump_the_line:
            wfi.sendLog('closor', 'Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name, "has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis'] == 0:
            print wfo.name, "is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda: False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs):
            print wfo.name, wfi.request['RequestStatus']
        for out in outputs:
            event_count, lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(
                Output.datasetname == out).first()
            if not odb:
                print "adding an output object", out
                odb = Output(datasetname=out)
                odb.workflow = wfo
                session.add(odb)
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()
            fraction = lumi_count / float(expected_lumis) * 100.

            completion_line = "%60s %d/%d = %3.2f%%" % (
                out, lumi_count, expected_lumis, fraction)
            wfi.sendLog('closor', "\t%s" % completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                batch_warnings[wfi.getCampaign()].add(completion_line)
            stats[out] = lumi_count
            all_OK[out] = True

        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence(url, out)
            where = [site for site, info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out, "is in full at", ",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites'] + wfi.request[
                    'CustodialSites']
                wfi.sendLog(
                    'closor', "%s is not in full anywhere. send to %s" %
                    (out, ",".join(sorted(going_to))))
                at_destination = dict([(k, v) for (k, v) in presence.items()
                                       if k in going_to])
                else_where = dict([(k, v) for (k, v) in presence.items()
                                   if not k in going_to])
                print json.dumps(at_destination)
                print json.dumps(else_where, indent=2)
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to=there)
                    for l in late_info:
                        l.update({"workflow": wfo.name, "dataset": out})
                    all_late_files.extend(late_info)
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

        ## verify if we have to do harvesting

        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update(OK)

        ## only that status can let me go into announced
        if all(all_OK.values()) and (
            (wfi.request['RequestStatus'] in ['closed-out']) or options.force
                or jump_the_line):
            print wfo.name, "to be announced"
            results = []
            if not results:
                for out in outputs:
                    if out in stats and not stats[out]:
                        continue
                    _, dsn, process_string, tier = out.split('/')

                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if destinations:
                            wfi.sendLog(
                                'closor', '%s to go to %s' %
                                (out, ', '.join(sorted(destinations))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(
                                url,
                                site, [out],
                                'Copy for release validation consumption',
                                priority='normal',
                                approve=True,
                                mail=False,
                                group='RelVal')
                            try:
                                request_id = result['phedex'][
                                    'request_created'][0]['id']
                                results.append(True)
                            except:
                                results.append('Failed relval transfer')

                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request[
                                    'Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[
                                campaign] and tier in CI.campaigns[campaign][
                                    'toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM") + UC.get(
                                "tiers_to_DDM"):
                            print "tier", tier, "neither TO or NO DDM for", out
                            results.append('Not recognitized tier %s' % tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog(
                                'closor',
                                "could not recognize %s for injecting in DDM" %
                                out,
                                level='critical')
                            continue

                        n_copies = 1
                        destinations = []
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[
                                campaign]:
                            ddm_instructions = CI.campaigns[campaign][
                                'DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier, indication in ddm_instructions.items(
                                ):
                                    if ddmtier == tier or ddmtier in [
                                            '*', 'all'
                                    ]:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']

                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination=" + ",".join(
                                destinations)
                        group_spec = ""  ## not used yet
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending", out, " to DDM"
                            status = pass_to_dynamo(
                                [out],
                                N=n_copies,
                                sites=destinations if destinations else None,
                                group=group_spec if group_spec else None)
                            results.append(status)
                            if status == True:
                                wfi.sendLog(
                                    'closor',
                                    '%s is send to dynamo in %s copies %s %s' %
                                    (out, n_copies, sorted(destinations),
                                     group_spec))
                            else:
                                sendLog('closor',
                                        "could not add " + out +
                                        " to dynamo pool. check closor logs.",
                                        level='critical')
                                wfi.sendLog(
                                    'closor', "could not add " + out +
                                    " to dynamo pool. check closor logs.")
                    else:
                        print wfo.name, "no stats for announcing", out
                        results.append('No Stats')

                if all(
                        map(lambda result: result in ['None', None, True],
                            results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(
                            url, wfo.name)
                        if not res in ['None', None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            wfo.wm_status = wl_bis.request['RequestStatus']
                            session.commit()
                            if wl_bis.request['RequestStatus'] in [
                                    'announced', 'normal-archived'
                            ]:
                                res = None
                            else:
                                ## retry ?
                                res = reqMgrClient.announceWorkflowCascade(
                                    url, wfo.name)

                        results.append(res)

            #print results
            if all(map(lambda result: result in ['None', None, True],
                       results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        wfo.status = wfo.status.replace(
                            'announce', 'announced')
                else:
                    wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor', "workflow outputs are announced")
            else:
                wfi.sendLog(
                    'closor', "Error with %s to be announced \n%s" %
                    (wfo.name, json.dumps(results)))

        elif wfi.request['RequestStatus'] in [
                'failed', 'aborted', 'aborted-archived', 'rejected',
                'rejected-archived', 'aborted-completed'
        ]:
            if wfi.isRelval():
                wfo.status = 'forget'
                wfo.wm_status = wfi.request['RequestStatus']
                wfi.sendLog(
                    'closor',
                    "%s is %s, but will not be set in trouble to find a replacement."
                    % (wfo.name, wfo.wm_status))
            else:
                wfo.status = 'trouble'
                wfo.wm_status = wfi.request['RequestStatus']
            session.commit()
        else:
            print wfo.name, "not good for announcing:", wfi.request[
                'RequestStatus']
            wfi.sendLog('closor', "cannot be announced")
            held.add(wfo.name)

    days_late = 0.
    retries_late = 10

    really_late_files = [
        info for info in all_late_files if info['retries'] >= retries_late
    ]
    really_late_files = [
        info for info in really_late_files
        if info['delay'] / (60 * 60 * 24.) >= days_late
    ]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s' % (
            len(really_late_files), days_late, retries_late,
            json.dumps(really_late_files, indent=2))
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor', subject)
        print subject
        open('%s/stuck_files.json' % monitor_dir,
             'w').write(json.dumps(really_late_files, indent=2))

    if held:
        sendLog('closor',
                "the workflows below are held up \n%s" %
                ("\n".join(sorted(held))),
                level='critical')

    #batches = json.loads(open('batches.json').read())
    for bname, go in batch_go.items():
        if go:
            subject = "Release Validation Samples Batch %s" % bname
            issues = ""
            if batch_warnings[bname]:
                issues = "The following datasets have outstanding completion (<%d%%) issues:\n\n" % batch_goodness
                issues += "\n".join(sorted(batch_warnings[bname]))
                issues += "\n\n"
            text = """
Dear all,

a batch of release validation workflows has finished.

Batch ID:

%s

Detail of the workflows

https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s

%s 
This is an automated message.
""" % (bname, bname, issues)
            to = ['*****@*****.**']
            sendEmail(subject, text, destination=to)

Example #11

Show file

File: closor.py Project: DAMason/WmAgentScripts

def closor(url, specific=None, options=None):
    if userLock(): return
    if duplicateLock(): return
    if not componentInfo().check(): return


    UC = unifiedConfiguration()
    CI = campaignInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

    jump_the_line = options.announce if options else False
    if jump_the_line:
        wfs = session.query(Workflow).filter(Workflow.status.contains('announce')).filter(sqlalchemy.not_(Workflow.status.contains('announced'))).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status=='close').all()

    held = set()

    print len(wfs),"closing"
    max_per_round = UC.get('max_per_round').get('closor',None)
    if options.limit: max_per_round = options.limit
    random.shuffle( wfs )    
    if max_per_round: wfs = wfs[:max_per_round]

    batch_go = {}
    batch_warnings = defaultdict(set)
    batch_goodness = UC.get("batch_goodness")

    for wfo in wfs:

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis 
        wfi = workflowInfo(url, wfo.name )
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url , batch_name, details=True)
                batch_go[ batch_name ]  = all(map(lambda s : not s in ['completed','running-open','running-closed','acquired','assigned','assignment-approved'], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog('closor', 'Cannot close for now because the batch %s is not all close'% batch_name)
                continue


        if wfi.request['RequestStatus'] in  ['announced','normal-archived'] and not options.force:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog('closor','%s is announced already : %s'%( wfo.name,wfo.wm_status))
        session.commit()

        if jump_the_line:
            wfi.sendLog('closor','Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis']==0:
            print wfo.name,"is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda : False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs): 
            print wfo.name,wfi.request['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(Output.datasetname==out).first()
            if not odb:
                print "adding an output object",out
                odb = Output( datasetname = out )
                odb.workflow = wfo
                session.add( odb )
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()
            fraction = lumi_count/float(expected_lumis)*100.

            completion_line = "%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,fraction)
            wfi.sendLog('closor',"\t%s"% completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                batch_warnings[ wfi.getCampaign()].add( completion_line )
            stats[out] = lumi_count
            all_OK[out] = True 


        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence( url, out )
            where = [site for site,info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out,"is in full at",",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
                wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
                at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
                else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
                print json.dumps( at_destination )
                print json.dumps( else_where, indent=2 )
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to = there )
                    for l in late_info:
                        l.update({"workflow":wfo.name,"dataset":out})
                    all_late_files.extend( late_info )
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

    
        ## verify if we have to do harvesting

        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and ((wfi.request['RequestStatus'] in ['closed-out']) or options.force or jump_the_line):
            print wfo.name,"to be announced"
            results=[]
            if not results:
                for out in outputs:
                    if out in stats and not stats[out]: 
                        continue
                    _,dsn,process_string,tier = out.split('/')

                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        
                        if destinations:
                            wfi.sendLog('closor', '%s to go to %s'%(out, ', '.join( sorted( destinations ))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(url, site, [out], 'Copy for release validation consumption', priority='normal', approve=True, mail=False, group='RelVal')
                            try:
                                request_id =  result['phedex']['request_created'][0]['id']
                                results.append( True )
                            except:
                                results.append( 'Failed relval transfer' )
                        
                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request['Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM")+UC.get("tiers_to_DDM"):
                            print "tier",tier,"neither TO or NO DDM for",out
                            results.append('Not recognitized tier %s'%tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog('closor', "could not recognize %s for injecting in DDM"% out, level='critical')
                            continue

                        n_copies = 2
                        destinations=[]
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[campaign]:
                            ddm_instructions = CI.campaigns[campaign]['DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier,indication in ddm_instructions.items():
                                    if ddmtier==tier or ddmtier in ['*','all']:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']
                                            
                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination="+",".join( destinations )
                        group_spec = "" ## not used yet 
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending",out," to DDM"
                            p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s %s --debug 0 --exec'%(n_copies, out,destination_spec, group_spec))
                            ddm_text = p.read()
                            print ddm_text
                            status = p.close()
                            if status!=None:
                                print "Failed DDM, retrying to send",out,"a second time"
                                p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s %s --debug 1 --exec'%(n_copies, out,destination_spec, group_spec))

                                ddm_text = p.read()
                                print ddm_text
                                status = p.close()    
                                if status!=None:
                                    #sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                                    sendLog('closor',"could not add "+out+" to DDM pool. check closor logs.", level='critical')
                                    if options.force: status = True
                            results.append( status )
                            if status == None:
                                wfi.sendLog('closor',ddm_text)
                                wfi.sendLog('closor','%s is send to AnalysisOps DDM pool in %s copies %s'%( out, n_copies, destination_spec))
                                                            
                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                        if not res in ['None',None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            wfo.wm_status = wl_bis.request['RequestStatus']
                            session.commit()
                            if wl_bis.request['RequestStatus'] in  ['announced','normal-archived']:
                                res = None
                            else:
                                ## retry ?
                                res = reqMgrClient.announceWorkflowCascade(url, wfo.name) 
                            
                        results.append( res )
                                
            #print results
            if all(map(lambda result : result in ['None',None,True],results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        wfo.status = wfo.status.replace('announce','announced')
                else:
                    wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor',"workflow outputs are announced")
            else:
                wfi.sendLog('closor',"Error with %s to be announced \n%s"%( wfo.name, json.dumps( results )))
            
        elif wfi.request['RequestStatus'] in ['failed','aborted','aborted-archived','rejected','rejected-archived','aborted-completed']:
            if wfi.isRelval():
                wfo.status = 'forget'
                wfo.wm_status = wfi.request['RequestStatus']
                wfi.sendLog('closor',"%s is %s, but will not be set in trouble to find a replacement."%( wfo.name, wfo.wm_status))
            else:
                wfo.status = 'trouble'
                wfo.wm_status = wfi.request['RequestStatus']
            session.commit()
        else:
            print wfo.name,"not good for announcing:",wfi.request['RequestStatus']
            wfi.sendLog('closor',"cannot be announced")
            held.add( wfo.name )

    days_late = 0.
    retries_late = 10

    really_late_files = [info for info in all_late_files if info['retries']>=retries_late]
    really_late_files = [info for info in really_late_files if info['delay']/(60*60*24.)>=days_late]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s'%(len(really_late_files), days_late, retries_late, json.dumps( really_late_files , indent=2) )
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor',subject)
        print subject
        open('%s/stuck_files.json'%monitor_dir,'w').write( json.dumps( really_late_files , indent=2))

    if held:
        sendLog('closor',"the workflows below are held up \n%s"%("\n".join( sorted(held) )), level='critical')


    #batches = json.loads(open('batches.json').read())
    for bname,go in batch_go.items():
        if go:
            subject = "Release Validation Samples Batch %s"% bname
            issues=""
            if batch_warnings[ bname ]:
                issues="The following datasets have outstanding completion (<%d%%) issues:\n\n"% batch_goodness
                issues+="\n".join( sorted( batch_warnings[ bname ] ))
                issues+="\n\n"
            text = """
Dear all,

a batch of release validation workflows has finished.

Batch ID:

%s

Detail of the workflows

https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s

%s 
This is an automated message.
"""%( bname, 
      bname,
      issues)
            to = ['*****@*****.**']
            sendEmail(subject, text, destination=to )

Example #12

Show file

File: closor.py Project: CMSCompOps/WmAgentScripts

    def close(self):
        if os.path.isfile('.closor_stop'):
            print "The closing of workflows is shortened"
            return 

        url = self.url
        batch_go = self.batch_go
        CI = self.CI
        UC = self.UC
        wfo = self.wfo

        jump_the_line = self.jump_the_line
        batch_goodness = self.batch_goodness
        check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

        ## what is the expected #lumis 
        self.wfi = workflowInfo(url, wfo.name )
        wfi = self.wfi
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url , batch_name, details=True)
                batch_go[ batch_name ]  = all(map(lambda s : not s in ['completed','running-open','running-closed','acquired','assigned','assignment-approved'], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog('closor', 'Cannot close for now because the batch <a href=https://dmytro.web.cern.ch/dmytro/cmsprodmon/workflows.php?campaign=%s>%s</a> is not all close'%( batch_name, batch_name))
                return


        if wfi.request['RequestStatus'] in  ['announced','normal-archived'] and not options.force:
            ## manually announced ??
            self.to_status = 'done'
            self.to_wm_status = wfi.request['RequestStatus']
            wfi.sendLog('closor','%s is announced already : %s'%( wfo.name,self.to_wm_status))
            return 

        if jump_the_line:
            wfi.sendLog('closor','Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis']==0:
            print wfo.name,"is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda : False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs): 
            print wfo.name,wfi.request['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            self.outs.append( Output( datasetname = out ))
            odb = self.outs[-1]
            odb.workflow = wfo
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())

            fraction = lumi_count/float(expected_lumis)*100.

            completion_line = "%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,fraction)
            wfi.sendLog('closor',"\t%s"% completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                self.batch_warnings[ wfi.getCampaign()].add( completion_line )
            stats[out] = lumi_count
            all_OK[out] = True 


        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence( url, out )
            where = [site for site,info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out,"is in full at",",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
                wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
                at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
                else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
                print json.dumps( at_destination )
                print json.dumps( else_where, indent=2 )
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to = there )
                    for l in late_info:
                        l.update({"workflow":wfo.name,"dataset":out})
                    self.all_late_files.extend( late_info )
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

    
        ## verify if we have to do harvesting
        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and ((wfi.request['RequestStatus'] in ['closed-out']) or options.force or jump_the_line):
            print wfo.name,"to be announced"
            results=[]
            if not results:
                for out in outputs:
                    print "dealing with",out
                    if out in stats and not stats[out]: 
                        continue
                    _,dsn,process_string,tier = out.split('/')

                    if all_OK[out]:
                        print "setting valid"
                        results.append(setDatasetStatus(out, 'VALID', withFiles=False))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        
                        if destinations:
                            wfi.sendLog('closor', '%s to go to %s'%(out, ', '.join( sorted( destinations ))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(url, site, [out], 'Copy for release validation consumption', priority='normal', approve=True, mail=False, group='RelVal')
                            try:
                                request_id =  result['phedex']['request_created'][0]['id']
                                results.append( True )
                            except:
                                results.append( 'Failed relval transfer' )
                        
                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request['Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM")+UC.get("tiers_to_DDM"):
                            print "tier",tier,"neither TO or NO DDM for",out
                            results.append('Not recognitized tier %s'%tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog('closor', "could not recognize %s for injecting in DDM"% out, level='critical')
                            continue

                        n_copies = 1
                        destinations=[]
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[campaign]:
                            ddm_instructions = CI.campaigns[campaign]['DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier,indication in ddm_instructions.items():
                                    if ddmtier==tier or ddmtier in ['*','all']:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']
                                            
                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination="+",".join( destinations )
                        group_spec = "" ## not used yet 
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending",out," to DDM"
                            status = pass_to_dynamo( [out], N = n_copies, sites=destinations if destinations else None, group = group_spec if group_spec else None)
                            results.append( status )
                            if status == True:
                                wfi.sendLog('closor','%s is send to dynamo in %s copies %s %s'%( out, n_copies, sorted(destinations), group_spec))
                            else:
                                sendLog('closor',"could not add "+out+" to dynamo pool. check closor logs.", level='critical')
                                wfi.sendLog('closor',"could not add "+out+" to dynamo pool. check closor logs.")
                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                        if not res in ['None',None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            self.to_wm_status = wl_bis.request['RequestStatus']
                            if wl_bis.request['RequestStatus'] in  ['announced','normal-archived']:
                                res = None
                            else:
                                res = reqMgrClient.announceWorkflowCascade(url, wfo.name) 
                            
                        results.append( res )
                                
            print results
            if all(map(lambda result : result in ['None',None,True],results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        self.to_status = wfo.status.replace('announce','announced')
                else:
                    self.to_status = 'done'
                    self.closing = True
                
                    
                wfi.sendLog('closor',"workflow outputs are announced")
            else:
                wfi.sendLog('closor',"Error with %s to be announced \n%s"%( wfo.name, json.dumps( results )))
            
        elif wfi.request['RequestStatus'] in ['failed','aborted','aborted-archived','rejected','rejected-archived','aborted-completed']:
            if wfi.isRelval():
                self.to_status = 'forget'
                self.to_wm_status = wfi.request['RequestStatus']
                wfi.sendLog('closor',"%s is %s, but will not be set in trouble to find a replacement."%( wfo.name, self.to_wm_status))
            else:
                self.to_status = 'trouble'
                self.to_wm_status = wfi.request['RequestStatus']
        else:
            print wfo.name,"not good for announcing:",wfi.request['RequestStatus']
            wfi.sendLog('closor',"cannot be announced")
            self.held.add( wfo.name )

Example #13

Show file

File: rejector.py Project: CMSCompOps/WmAgentScripts

def rejector(url, specific, options=None):

    
    up = componentInfo(soft=['wtc','jira'])
    if not up.check(): return

    if specific and specific.startswith('/'):
        ## this is for a dataset
        print setDatasetStatus(specific, 'INVALID')
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend( session.query(Workflow).filter(Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
        if not wfs:
            batches = batchInfo().content()
            for bname in batches:
                if specific == bname:
                    for pid in batches[bname]:
                        b_wfs = getWorkflowById(url, pid)
                        for wf in b_wfs:
                            wfs.append( session.query(Workflow).filter(Workflow.name == wf).first())
                    break
    else:
        wfs = session.query(Workflow).filter(Workflow.status == 'assistance-clone').all()
        #wfs.extend( session.query(Workflow).filter(Workflow.status == 'assistance-reject').all())
        ## be careful then on clone case by case
        options.clone = True
        print "not supposed to function yet"
        return 

    print len(wfs),"to reject"

    if len(wfs)>1:
        print "\n".join( [wfo.name for wfo in wfs] )
        answer = raw_input('Reject these')
        if not answer.lower() in ['y','yes']:
            return
        
    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject",spec
            return
        wfi = workflowInfo(url, wfo.name)

        comment=""
        if options.comments: comment = ", reason: "+options.comments
        if options.keep: 
            wfi.sendLog('rejector','invalidating the workflow by unified operator%s'%comment)
        else:
            wfi.sendLog('rejector','invalidating the workflow and outputs by unified operator%s'%comment)

        results = invalidate(url, wfi, only_resub=True, with_output= (not options.keep))

        if all(results):
            print wfo.name,"rejected"
            if options and options.clone:
                wfo.status = 'trouble'
                session.commit()                
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(schema['ProcessingVersion'])+1 ## dubious str->int conversion
                else:
                    schema['ProcessingVersion']=2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    if schema['RequestType'] == 'TaskChain':
                        it=1
                        while True:
                            t = 'Task%d'%it
                            it+=1
                            if t in schema:
                                schema[t]['Memory'] = options.Memory
                            else:
                                break
                    else:
                        schema['Memory'] = options.Memory
                        
                if options.Multicore:
                    ## to do : set it properly in taskchains
                    if schema['RequestType'] == 'TaskChain':
                        tasks,set_to = options.Multicore.split(':') if ':' in options.Multicore else ("",options.Multicore)
                        set_to = int(set_to)
                        tasks = tasks.split(',') if tasks else ['Task1']
                        it = 1 
                        while True:
                            tt = 'Task%d'% it
                            it+=1
                            if tt in schema:
                                tname = schema[tt]['TaskName']
                                if tname in tasks or tt in tasks:
                                    mem = schema[tt]['Memory']
                                    mcore = schema[tt].get('Multicore',1)
                                    factor = (set_to / float(mcore))
                                    fraction_constant = 0.4
                                    mem_per_core_c = int((1-fraction_constant) * mem / float(mcore))
                                    print "mem per core", mem_per_core_c
                                    print "base mem", mem
                                    ## adjusting the parameter in the clone
                                    schema[tt]['Memory'] += (set_to-mcore)*mem_per_core_c
                                    schema[tt]['Multicore'] = set_to
                                    schema[tt]['TimePerEvent'] /= factor
                            else:
                                break
                    else:
                        schema['Multicore'] = options.Multicore
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup']  = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int,options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] =options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1,ntask-1):
                        schema['Task%d'%it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask-1
                    schema.pop('Task%d'%ntask)

                if options.priority:
                    schema['RequestPriority'] = options.priority

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                schema = reqMgrClient.purgeClonedSchema( schema )

                print "submitting"
                if (options.to_stepchain and (schema['RequestType']=='TaskChain')):
                    ## transform the schema into StepChain schema
                    print "Transforming a TaskChain into a StepChain"
                    mcore = 0
                    mem = 0
                    schema['RequestType'] = 'StepChain'
                    schema['StepChain'] = schema.pop('TaskChain')
                    schema['SizePerEvent'] = 0
                    schema['TimePerEvent'] = 0
                    step=1
                    s_n = {}
                    while True:
                        if 'Task%d'%step in schema:
                            sname = 'Step%d'%step
                            schema[sname] = schema.pop('Task%d'%step)
                            tmcore = schema[sname].pop('Multicore')
                            tmem = schema[sname].pop('Memory')
                            if mcore and tmcore != mcore:
                                wfi.sendLog('rejector','the conversion to stepchain encoutered different value of Multicore %d != %d'%( tmcore, mcore))
                                sendLog('rejector','the conversion of %s to stepchain encoutered different value of Multicore %d != %d'%( wfo.name, tmcore, mcore), level='critical')
                            mcore = max(mcore, tmcore)
                            mem = max(mem, tmem)
                            schema[sname]['StepName'] = schema[sname].pop('TaskName')
                            s_n[ schema[sname]['StepName'] ] = sname
                            if 'InputTask' in schema[sname]:
                                schema[sname]['InputStep'] = schema[sname].pop('InputTask')
                            eff = 1.
                            up_s = sname
                            while True:
                                ## climb up a step. supposedely already all converted
                                up_s = s_n.get(schema[up_s].get('InputStep',None),None)
                                if up_s:
                                    ## multiply with the efficiency
                                    eff *= schema[up_s].get('FilterEfficiency',1.)
                                else:
                                    ## or stop there
                                    break

                            if not 'KeepOutput' in schema[sname]:
                                ## this is a weird translation capability. Absence of keepoutput in step means : keep the output. while in TaskChain absence means : drop
                                schema[sname]['KeepOutput'] = False
                            schema['TimePerEvent'] += eff*schema[sname].pop('TimePerEvent')
                            schema['SizePerEvent'] += eff*schema[sname].pop('SizePerEvent')
                            step+=1
                        else:
                            break
                    schema['Multicore'] = mcore
                    schema['Memory'] = mem
                print json.dumps( schema, indent=2 )
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    msg = "Error in cloning {}".format(wfo.name)
                    print(msg)
                    wfi.sendLog('rejector',msg)
                          
                    # Get the error message
                    time.sleep(5)
                    data = reqMgrClient.requestManagerPost(url, "/reqmgr2/data/request", schema)
                    wfi.sendLog('rejector',data)
                    
                    print json.dumps( schema, indent=2 )
                    return 
                print newWorkflow

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfi.sendLog('rejector','Cloned into %s by unified operator %s'%( newWorkflow, comment ))
                wfi.notifyRequestor('Cloned into %s by unified operator %s'%( newWorkflow, comment ),do_batch=False)
            else:
                wfo.status = 'trouble' if options.set_trouble else 'forget' 
                wfi.notifyRequestor('Rejected by unified operator %s'%( comment ),do_batch=False)
                session.commit()

        else:
            msg = "Error in rejecting {}: {}".format(wfo.name,results)
            print(msg)
            wfi.sendLog('rejector',msg)

Example #14

Show file

File: rejector.py Project: sharad1126/WmAgentScripts

def rejector(url, specific, options=None):

    up = componentInfo(soft=['wtc', 'jira'])
    #if not up.check(): return

    if specific and specific.startswith('/'):
        ## this is for a dataset
        print setDatasetStatus(specific, 'INVALID')
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend(
                session.query(Workflow).filter(
                    Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
        if not wfs:
            batches = batchInfo().content()
            for bname in batches:
                if specific == bname:
                    for pid in batches[bname]:
                        b_wfs = getWorkflowById(url, pid)
                        for wf in b_wfs:
                            wfs.append(
                                session.query(Workflow).filter(
                                    Workflow.name == wf).first())
                    break
    else:
        wfs = session.query(Workflow).filter(
            Workflow.status == 'assistance-clone').all()
        #wfs.extend( session.query(Workflow).filter(Workflow.status == 'assistance-reject').all())
        ## be careful then on clone case by case
        options.clone = True
        print "not supposed to function yet"
        return

    print len(wfs), "to reject"

    if len(wfs) > 1:
        print "\n".join([wfo.name for wfo in wfs])
        answer = raw_input('Reject these')
        if not answer.lower() in ['y', 'yes']:
            return

    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject", spec
            return
        wfi = workflowInfo(url, wfo.name)

        comment = ""
        if options.comments: comment = ", reason: " + options.comments
        if options.keep:
            wfi.sendLog(
                'rejector',
                'invalidating the workflow by unified operator%s' % comment)
        else:
            wfi.sendLog(
                'rejector',
                'invalidating the workflow and outputs by unified operator%s' %
                comment)

        results = invalidate(url,
                             wfi,
                             only_resub=True,
                             with_output=(not options.keep))

        if all(results):
            print wfo.name, "rejected"
            if options and options.clone:
                wfo.status = 'trouble'
                session.commit()
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(
                        schema['ProcessingVersion']
                    ) + 1  ## dubious str->int conversion
                else:
                    schema['ProcessingVersion'] = 2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    if schema['RequestType'] == 'TaskChain':
                        it = 1
                        while True:
                            t = 'Task%d' % it
                            it += 1
                            if t in schema:
                                schema[t]['Memory'] = options.Memory
                            else:
                                break
                    else:
                        schema['Memory'] = options.Memory

                if options.short_task and schema['RequestType'] == 'TaskChain':
                    translate = {}
                    it = 1
                    while True:
                        tt = 'Task%d' % it
                        if tt in schema:
                            tname = schema[tt]['TaskName']
                            ntname = 'T%d' % it
                            translate[tname] = ntname
                            it += 1
                            schema[tt]['TaskName'] = ntname
                            if 'InputTask' in schema[tt]:
                                itname = schema[tt]['InputTask']
                                schema[tt]['InputTask'] = translate[itname]
                        else:
                            break
                    for k in schema.get('ProcessingString', {}).keys():
                        schema['ProcessingString'][
                            translate[k]] = schema['ProcessingString'].pop(k)
                    for k in schema.get('AcquisitionEra', {}).keys():
                        schema['AcquisitionEra'][
                            translate[k]] = schema['AcquisitionEra'].pop(k)

                if options.Multicore:
                    ## to do : set it properly in taskchains
                    if schema['RequestType'] == 'TaskChain':
                        tasks, set_to = options.Multicore.split(
                            ':') if ':' in options.Multicore else (
                                "", options.Multicore)
                        set_to = int(set_to)
                        tasks = tasks.split(',') if tasks else ['Task1']
                        it = 1
                        while True:
                            tt = 'Task%d' % it
                            it += 1
                            if tt in schema:
                                tname = schema[tt]['TaskName']
                                if tname in tasks or tt in tasks:
                                    mem = schema[tt]['Memory']
                                    mcore = schema[tt].get('Multicore', 1)
                                    factor = (set_to / float(mcore))
                                    fraction_constant = 0.4
                                    mem_per_core_c = int(
                                        (1 - fraction_constant) * mem /
                                        float(mcore))
                                    print "mem per core", mem_per_core_c
                                    print "base mem", mem
                                    ## adjusting the parameter in the clone
                                    schema[tt]['Memory'] += (
                                        set_to - mcore) * mem_per_core_c
                                    schema[tt]['Multicore'] = set_to
                                    schema[tt]['TimePerEvent'] /= factor
                            else:
                                break
                    else:
                        schema['Multicore'] = options.Multicore
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup'] = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int, options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] = options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1, ntask - 1):
                        schema['Task%d' % it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask - 1
                    schema.pop('Task%d' % ntask)

                if options.priority:
                    schema['RequestPriority'] = options.priority

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                schema = reqMgrClient.purgeClonedSchema(schema)

                print "submitting"
                if (options.to_stepchain
                        and (schema['RequestType'] == 'TaskChain')):
                    ## transform the schema into StepChain schema
                    print "Transforming a TaskChain into a StepChain"
                    mcore = 0
                    mem = 0
                    schema['RequestType'] = 'StepChain'
                    schema['StepChain'] = schema.pop('TaskChain')
                    schema['SizePerEvent'] = 0
                    schema['TimePerEvent'] = 0
                    step = 1
                    s_n = {}
                    while True:
                        if 'Task%d' % step in schema:
                            sname = 'Step%d' % step
                            schema[sname] = schema.pop('Task%d' % step)
                            tmcore = schema[sname].pop('Multicore')
                            tmem = schema[sname].pop('Memory')
                            if mcore and tmcore != mcore:
                                wfi.sendLog(
                                    'rejector',
                                    'the conversion to stepchain encoutered different value of Multicore %d != %d'
                                    % (tmcore, mcore))
                                sendLog(
                                    'rejector',
                                    'the conversion of %s to stepchain encoutered different value of Multicore %d != %d'
                                    % (wfo.name, tmcore, mcore),
                                    level='critical')
                            mcore = max(mcore, tmcore)
                            mem = max(mem, tmem)
                            schema[sname]['StepName'] = schema[sname].pop(
                                'TaskName')
                            s_n[schema[sname]['StepName']] = sname
                            if 'InputTask' in schema[sname]:
                                schema[sname]['InputStep'] = schema[sname].pop(
                                    'InputTask')
                            eff = 1.
                            up_s = sname
                            while True:
                                ## climb up a step. supposedely already all converted
                                up_s = s_n.get(
                                    schema[up_s].get('InputStep', None), None)
                                if up_s:
                                    ## multiply with the efficiency
                                    eff *= schema[up_s].get(
                                        'FilterEfficiency', 1.)
                                else:
                                    ## or stop there
                                    break

                            if not 'KeepOutput' in schema[sname]:
                                ## this is a weird translation capability. Absence of keepoutput in step means : keep the output. while in TaskChain absence means : drop
                                schema[sname]['KeepOutput'] = False
                            schema['TimePerEvent'] += eff * schema[sname].pop(
                                'TimePerEvent')
                            schema['SizePerEvent'] += eff * schema[sname].pop(
                                'SizePerEvent')
                            step += 1
                        else:
                            break
                    schema['Multicore'] = mcore
                    schema['Memory'] = mem
                print json.dumps(schema, indent=2)
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    msg = "Error in cloning {}".format(wfo.name)
                    print(msg)
                    wfi.sendLog('rejector', msg)

                    # Get the error message
                    time.sleep(5)
                    data = reqMgrClient.requestManagerPost(
                        url, "/reqmgr2/data/request", schema)
                    wfi.sendLog('rejector', data)

                    print json.dumps(schema, indent=2)
                    return
                print newWorkflow

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfi.sendLog(
                    'rejector', 'Cloned into %s by unified operator %s' %
                    (newWorkflow, comment))
                #wfi.notifyRequestor('Cloned into %s by unified operator %s'%( newWorkflow, comment ),do_batch=False)
            else:
                wfo.status = 'trouble' if options.set_trouble else 'forget'
                wfi.notifyRequestor('Rejected by unified operator %s' %
                                    (comment),
                                    do_batch=False)
                session.commit()

        else:
            msg = "Error in rejecting {}: {}".format(wfo.name, results)
            print(msg)
            wfi.sendLog('rejector', msg)

Example #15

Show file

File: closor.py Project: julianbadillo/WmAgentScripts

def closor(url, specific=None):
    if not componentInfo().check(): return

    CI = campaignInfo()
    LI = lockInfo()

    ## manually closed-out workflows should get to close with checkor
    for wfo in session.query(Workflow).filter(Workflow.status=='close').all():

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis 
        wl = getWorkLoad(url, wfo.name)
        wfo.wm_status = wl['RequestStatus']

        if wl['RequestStatus'] in  ['announced','normal-archived']:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wl['RequestStatus']
            print wfo.name,"is announced already",wfo.wm_status

        session.commit()


        expected_lumis = 1
        if not 'TotalInputLumis' in wl:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        else:
            expected_lumis = wl['TotalInputLumis']

        ## what are the outputs
        outputs = wl['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = []
        #print outputs
        if len(outputs): 
            print wfo.name,wl['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(Output.datasetname==out).first()
            if not odb:
                print "adding an output object",out
                odb = Output( datasetname = out )
                odb.workflow = wfo
                session.add( odb )
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()

            print "\t%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,lumi_count/float(expected_lumis)*100.)
            #print wfo.fraction_for_closing, lumi_count, expected_lumis
            fraction = wfo.fraction_for_closing
            fraction = 0.0
            all_OK.append((float(lumi_count) > float(expected_lumis*fraction)))


        ## only that status can let me go into announced
        if wl['RequestStatus'] in ['closed-out']:
            print wfo.name,"to be announced"

            results=[]#'dummy']
            if not results:
                for (io,out) in enumerate(outputs):
                    if all_OK[io]:
                        results.append(setDatasetStatus(out, 'VALID'))
                        tier = out.split('/')[-1]
                        to_DDM = (wl['RequestType'] == 'ReDigi' and not ('DQM' in tier))
                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wl and wl['Campaign']:
                                campaign = wl['Campaign']
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True
                            
                        ## inject to DDM when necessary
                        passed_to_DDM=True
                        if to_DDM:
                            #print "Sending",out," to DDM"
                            status = subprocess.call(['python','assignDatasetToSite.py','--nCopies=2','--dataset='+out,'--exec'])
                            if status!=0:
                                print "Failed DDM, retrying a second time"
                                status = subprocess.call(['python','assignDatasetToSite.py','--nCopies=2','--dataset='+out,'--exec'])
                                if status!=0:
                                    results.append("Failed DDM for %s"% out)
                                    sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                                    passed_to_DDM=False
                            if passed_to_DDM:
                                ## make a lock release
                                LI.release_everywhere( out, reason = 'global unlock after passing to DDM')                                
                                pass

                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    ## only announce if all previous are fine
                    results.append(reqMgrClient.announceWorkflowCascade(url, wfo.name))
                                
            #print results
            if all(map(lambda result : result in ['None',None,True],results)):
                wfo.status = 'done'
                session.commit()
                print wfo.name,"is announced"
            else:
                print "ERROR with ",wfo.name,"to be announced",json.dumps( results )
        else:
            print wfo.name,"not good for announcing:",wl['RequestStatus']

Example #16

Show file

File: actor.py Project: DAMason/WmAgentScripts

def actor(url,options=None):
    
    if userLock('actor'): return
    
    up = componentInfo(mcm=False, soft=['mcm'])
    if not up.check(): return
    
   # CI = campaignInfo()
    SI = siteInfo()
    UC = unifiedConfiguration()
    
    # Need to look at the actions page https://vocms0113.cern.ch:80/getaction (can add ?days=20) and perform any actions listed
    try:
        action_list = json.loads(os.popen('curl -s -k https://vocms0113.cern.ch:80/getaction?days=15').read())
        ## now we have a list of things that we can take action on
    except:
        print "Not able to load action list :("
        sendLog('actor','Not able to load action list', level='critical')
        return

    print action_list
    if not action_list:
        print "EMPTY!"
        return

    for wfname in action_list:
        print '-'*100
        print "Looking at",wfname,"for recovery options"

        to_clone = False
        to_acdc = False
        for key in action_list[wfname]:
            if key == 'Parameters':
                tasks =  action_list[wfname][key]
            elif key == 'Action' and action_list[wfname][key] == 'acdc':
                print "Going to create ACDCs for ", wfname
                to_acdc = True
            elif key == 'Action' and action_list[wfname][key] == 'clone':
                print "Going to clone ", wfname
                to_clone = True

        if not to_acdc and not to_clone:
            sendLog('actor','Action submitted for something other than acdc and clone for workflow %s'%wfname,level='critical')
            print "Can only do acdcs and clones! Skipping workflow ",wfname
            continue
        if not tasks:
            sendLog('actor','Empty action submitted for workflow %s'%wfname,level='critical')
            print "Moving on. Parameters is blank for " + wfname
            continue

        wfi = workflowInfo(url, wfname)

        recover = True
        message_to_ops = ""
        message_to_user = ""

#===========================================================
        if to_clone and options.do:
            print "Let's try kill and clone: "
            wfi.sendLog('actor','Going to clone %s'%wfname)
            results=[]
            datasets = set(wfi.request['OutputDatasets'])

            comment=""

            if 'comment' in tasks: comment = ", reason: "+ tasks['comment']
            wfi.sendLog('actor',"invalidating the workflow by traffic controller %s"%comment)

            #Reject all workflows in the family
            #first reject the original workflow.
            reqMgrClient.invalidateWorkflow(url, wfi.request['RequestName'], current_status=wfi.request['RequestStatus'], cascade=False)
            #Then reject any ACDCs associated with that workflow
            if 'ACDCs' in action_list[wfname]:
                children = action_list[wfname]['ACDCs']
                for child in children:
                    wfi.sendLog('actor',"rejecting %s"%child)
                    wfi_acdc = workflowInfo(url, child)
                    reqMgrClient.invalidateWorkflow(url, wfi_acdc.request['RequestName'], current_status=wfi_acdc.request['RequestStatus'], cascade=False)
                    datasets.update( wfi_acdc.request['OutputDatasets'] )
            #Invalidate all associated output datasets
            for dataset in datasets:
                results.append( setDatasetStatus(dataset, 'INVALID') )

            if all(map(lambda result : result in ['None',None,True],results)):
                wfi.sendLog('actor',"%s and children are rejected"%wfname)

            cloned = None
            try:    
                cloned =  singleClone(url, wfname, tasks, comment, options.do)
            except:
                sendLog('actor','Failed to create clone for %s! Check logs for more information. Action will need to be resubmitted.'%wfname,level='critical')
                wfi.sendLog('actor','Failed to create clone for %s!'%wfname)
                remove_action(wfname)
            if not cloned:
                recover = False
                wfi.sendLog('actor','Failed to create clone for %s!'%wfname)
                sendLog('actor','Failed to create clone for %s!'%wfname,level='critical')

            else:
                wfi.sendLog('actor',"Workflow %s cloned"%wfname)


#===========================================================
        elif to_acdc:
            if 'AllSteps' in tasks:
                allTasksDefaults = tasks['AllSteps']
                tasks.pop('AllSteps')
                for setting in allTasksDefaults:
                    for task in tasks:
                        if setting in tasks[task]:
                            tasks[task][setting] = allTasksDefaults[setting]
                        else:
                                tasks[task].append({setting:allTasksDefaults[setting]})
            print "Tasks is "
            print tasks

            all_tasks = wfi.getAllTasks()

            ## need a way to verify that this is the first round of ACDC, since the second round will have to be on the ACDC themselves
        
            try:
                WMErr = wfi.getWMErrors()
#               print WMErr
            except:
                sendLog('actor','Cannot create ACDCS for %s because WMErr cannot be reached.'%wfname,level='critical')
                continue
            if not WMErr:
                sendLog('actor','Cannot create ACDCS for %s because WMErr is blank.'%wfname,level='critical')
                print "Moving on. WMErr is blank"
                continue

            try:
                where_to_run, missing_to_run,missing_to_run_at =  wfi.getRecoveryInfo()
                print "Where to run = "
                print where_to_run
            except:
                sendLog('actor','Cannot create ACDCS for %s because recovery info cannot be found.'%wfname,level='critical')
                print "Moving on. Cannot access recovery info for " + wfname
                continue
            if not where_to_run:
                sendLog('actor','Cannot create ACDCS for %s because site list cannot be found.'%wfname,level='critical')
                print "Moving on. where to run is blank"
                continue

            message_to_ops = ""
            message_to_user = ""
        
            num_tasks_to_recover = 0
        
            for task in WMErr:
                if 'LogCollect' in task: continue
                if 'Cleanup' in task: continue
                if not 'jobfailed' in WMErr[task]:
                    continue
                else:
                    num_tasks_to_recover += 1
#                print "Task to recover: " + task

            if not num_tasks_to_recover:
                print "\tno error for",wfname
#            recover = False
        
            if 'LheInputFiles' in wfi.request and wfi.request['LheInputFiles']:
            ## we do not try to recover pLHE
                sendLog('actor','Cannot create ACDCS for %s because it is a pLHE workflow.'%wfname,level='critical')
                print "We don't try to recover pLHE. Moving on."
                recover = False
        #            sendEmail('cannot submit action', '%s is a pLHE workflow. We do not try to recover pLHE'%wfname)


#        if wfi.request['RequestType'] in ['ReReco']:
#            recover= False
#            print 'cannot submit action. ReReco'
        #   sendEmail('cannot submit action', '%s is request type ReReco'%wfname)

            recovering = set()
            for task in tasks:
                assign_to_sites = set()
                print "Task names is " + task
                fulltaskname = '/' + wfname + '/' + task
#                print "Full task name is " + fulltaskname
                wrong_task = False
                for task_info in all_tasks:
                    if fulltaskname == task_info.pathName:
                        if task_info.taskType not in ['Processing','Production','Merge']:
                            wrong_task=True
                            wfi.sendLog('actor', "Skipping task %s because the taskType is %s. Can only ACDC Processing, Production, or Merge tasks"%( fulltaskname, task_info.taskType))
                if wrong_task:
                    continue
                print tasks[task]
                actions = tasks[task]
                for action in actions:
                    if action.startswith('sites'):
                        if type(actions[action]) != list:
                            assign_to_sites=[SI.SE_to_CE(actions[action])]
                        else:
                            assign_to_sites=list(set([SI.SE_to_CE(site) for site in actions[action]]))
#                    if action.startswith('mem') and actions[action] != "" and actions[action] != 'Same' and wfi.request['RequestType'] in ['TaskChain']:
#                        recover = False;
#                        print  "Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname
#                        wfi.sendLog('actor',"Skipping %s for now until Allie fixes memory parameter for TaskChain ACDCs."%wfname)
                if not 'sites' in actions:
                    assign_to_sites = list(set([SI.SE_to_CE(site) for site in where_to_run[task]]))
                    print "Found",sorted(assign_to_sites),"as sites where to run the ACDC at, from the acdc doc of ",wfname
                print "Going to run at",sorted(assign_to_sites)
                if recover:
                    print "Initiating recovery"
                    acdc = singleRecovery(url, fulltaskname, wfi.request, actions, do = options.do)
                    if not acdc:
                        if options.do:
                            if recovering:
                                print wfname + " has been partially ACDC'ed. Needs manual attention."
                                sendLog('actor', "%s has had %s/%s recoveries %s only"%( wfname, len(recovering), num_tasks_to_recover, list(recovering)), level='critical')
                                wfi.sendLog('actor', "%s has had %s/%s recoveries %s only"%( wfname, len(recovering), num_tasks_to_recover, list(recovering)))
                                break
                            else:
                                print wfname + " failed recovery once"
                                recover = False
                                break
                        else:
                            print "no action to take further"
#                        sendLog('recoveror', "ACDC for %s can be done automatically"% wfname, level='critical')
                            continue

                    else: #ACDC was made correctly. Now we have to assign it.
                        wfi.sendLog('actor','ACDC created for task %s. Actions taken \n%s'%(fulltaskname,list(actions)))
                        team = wfi.request['Teams'][0]
                        parameters={
                        'SiteWhitelist' : sorted(assign_to_sites),
                        'AcquisitionEra' : wfi.acquisitionEra(),
                        'ProcessingString' :  wfi.processingString(),
                        'MergedLFNBase' : wfi.request['MergedLFNBase'],
                        'ProcessingVersion' : wfi.request['ProcessingVersion'],
                        }
                    ## hackery for ACDC merge assignment
                        if wfi.request['RequestType'] == 'TaskChain' and 'Merge' in task.split('/')[-1]:
                            parameters['AcquisitionEra'] = None
                            parameters['ProcessingString'] = None

                ## xrootd setttings on primary and secondary 
                        if 'xrootd' in actions:
                            if actions['xrootd'] == 'enabled':
                                print "Going to assign via xrootd"
                                parameters['TrustSitelists'] = True
                            elif actions['xrootd'] == 'disabled':
                                parameters['TrustSitelists'] = False
                        elif ('TrustSitelists' in wfi.request and wfi.request['TrustSitelists']=='true'):
                            parameters['TrustSitelists'] = True
                        else:
                            parameters['TrustSitelists'] = False

                        if 'TrustPUSitelists' in wfi.request and wfi.request['TrustPUSitelists']:
                            parameters['TrustPUSitelists'] = True

                        if options.ass:
                            print "really doing the assignment of the ACDC",acdc
                            parameters['execute']=True
                            wfi.sendLog('actor',"%s  was assigned for recovery"% acdc)
                        else:
                            print "no assignment done with this ACDC",acdc
                            sendLog('actor',"%s needs to be assigned"%(acdc), level='critical')
                            continue
 #                       print parameters
                        result = reqMgrClient.assignWorkflow(url, acdc, team, parameters)
                        if not result:
                            print acdc,"was not assigned"
                            sendLog('actor',"%s needs to be assigned"%(acdc), level='critical')
                        else:
                            recovering.add( acdc )
                        wfi.sendLog('actor',"ACDCs created for %s"%wfname)
        #===========================================================
        
        
        if recover and options.do:
            remove_action(wfname)

        if message_to_user:
            print wfname,"to be notified to user(DUMMY)",message_to_user

        if message_to_ops:
            print 'message'
            #sendEmail( "notification in recoveror" , message_to_ops, destination=['*****@*****.**'])
        #            sendLog('recoveror',message_to_ops,level='warning')



    return

Example #17

Show file

File: rejector.py Project: menglu21/WmAgentScripts

def rejector(url, specific, options=None):

    up = componentInfo()

    if specific and specific.startswith('/'):
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend(
                session.query(Workflow).filter(
                    Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
    else:
        return

    print len(wfs), "to reject"

    if len(wfs) > 1:
        print "\n".join([wfo.name for wfo in wfs])
        answer = raw_input('Reject these')
        if not answer.lower() in ['y', 'yes']:
            return

    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject", spec
            return
        results = []
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(
            url, wfo.name, current_status=wfi.request['RequestStatus'])

        wfi.sendLog('rejector',
                    'invalidating the workflow by unified operator')
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById(url, wfi.request['PrepID'], details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType'] != 'Resubmission': continue
            if 'OriginalRequestName' in fwl and fwl[
                    'OriginalRequestName'] != wfi.request['RequestName']:
                continue
            print "rejecting", fwl['RequestName']
            reqMgrClient.invalidateWorkflow(
                url, fwl['RequestName'], current_status=fwl['RequestStatus'])
            datasets.update(fwl['OutputDatasets'])

        for dataset in datasets:
            if options.keep:
                print "keeping", dataset, "in its current status"
            else:
                results.append(setDatasetStatus(dataset, 'INVALID'))

        #if wfi.request['RequestStatus'] in ['rejected','rejected-archived','aborted','aborted-archived']:
        #    print 'already',wfi.request['RequestStatus']
        #    if not options.clone:
        #        wfo.status = 'forget'
        #        session.commit()
        #        continue

        if all(map(lambda result: result in ['None', None, True], results)):
            wfo.status = 'forget'
            session.commit()
            print wfo.name, "and", datasets, "are rejected"
            if options and options.clone:
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(
                        schema['ProcessingVersion']
                    ) + 1  ## dubious str->int conversion
                else:
                    schema['ProcessingVersion'] = 2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    schema['Memory'] = options.Memory
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup'] = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int, options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] = options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1, ntask - 1):
                        schema['Task%d' % it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask - 1
                    schema.pop('Task%d' % ntask)

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                for p in [
                        'RequestStatus',
                        'RequestTransition',
                        'RequestorDN',
                        'RequestWorkflow',
                        'OutputDatasets',
                        'ReqMgr2Only',
                        #'Group',
                        'RequestDate',
                        #'ConfigCacheUrl',
                        'RequestName',
                        'timeStamp',
                        'SoftwareVersions',
                        'CouchURL'
                ]:
                    if p in schema:
                        schema.pop(p)
                        #pass
                print "submitting"
                print json.dumps(schema, indent=2)
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning", wfo.name
                    print json.dumps(schema, indent=2)
                    return
                print newWorkflow
                #m = re.search("details\/(.*)\'",response)
                #if not m:
                #    print "error in cloning",wfo.name
                #    print response
                #    print json.dumps( schema, indent=2 )
                #    return
                #newWorkflow = m.group(1)

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfo.status = 'trouble'
                session.commit()
                wfi.sendLog(
                    'rejector',
                    'Cloned into %s by unified operator' % (newWorkflow))
        else:
            print "error in rejecting", wfo.name, results

Example #18

Show file

File: remainor.py Project: vkuznet/WmAgentScripts

    def run(self):
        site = self.site
        print "checking on site", site
        si = self.SI
        UC = self.UC
        RDI = self.RDI
        options = self.options
        locks = self.locks
        waiting = self.waiting
        stuck = self.stuck
        missing = self.missing
        remainings = {}

        ds = si.getRemainingDatasets(si.CE_to_SE(site))
        #print len(ds)
        taken_size = 0.
        sum_waiting = 0.
        sum_stuck = 0.
        sum_missing = 0.
        sum_unlocked = 0.
        n_ds = options.ndatasets
        i_ds = 0
        ds_threads = []
        for i_ds, (size, dataset) in enumerate(ds):
            if n_ds and i_ds >= n_ds: break
            remainings[dataset] = {"size": size, "reasons": []}
            #print "-"*10
            if not dataset in locks:
                #print dataset,"is not locked"
                sum_unlocked += size
                remainings[dataset]["reasons"].append('unlock')
            else:
                remainings[dataset]["reasons"].append('lock')
            if dataset in waiting:
                #print dataset,"is waiting for custodial"
                sum_waiting += size
                remainings[dataset]["reasons"].append('tape')

            if dataset in stuck:
                sum_stuck += size
                remainings[dataset]["reasons"].append('stuck-tape')
            if dataset in missing:
                sum_missing += size
                remainings[dataset]["reasons"].append('missing-tape')

            ds_threads.append(DatasetCheckBuster(dataset=dataset, url=url))

        run_threads = ThreadHandler(threads=ds_threads,
                                    label='%s Dataset Threads' % site,
                                    n_threads=10,
                                    start_wait=0,
                                    timeout=None,
                                    verbose=True)
        ## start and sync
        run_threads.run()
        #run_threads.start()
        #while run_threads.is_alive():
        #    time.sleep(10)

        for t in run_threads.threads:
            remainings[t.dataset]["reasons"].extend(t.reasons)
            remainings[t.dataset]["reasons"].sort()
            print t.dataset, remainings[t.dataset]["reasons"]

        #print "\t",sum_waiting,"[GB] could be freed by custodial"
        print "\t", sum_unlocked, "[GB] is not locked by unified"

        print "updating database with remaining datasets"
        RDI.set(site, remainings)
        try:
            eosFile('%s/remaining_%s.json' % (monitor_dir, site),
                    'w').write(json.dumps(remainings, indent=2)).close()
        except:
            pass

        ld = remainings.items()
        ld.sort(key=lambda i: i[1]['size'], reverse=True)
        table = "<html>Updated %s GMT, <a href=remaining_%s.json>json data</a><br>" % (
            time.asctime(time.gmtime()), site)

        accumulate = defaultdict(lambda: defaultdict(float))
        for item in remainings:
            tier = item.split('/')[-1]

            for reason in remainings[item]['reasons']:
                accumulate[reason][tier] += remainings[item]['size']
        table += "<table border=1></thead><tr><th>Reason</th><th>size [TB]</th></thead>"
        for reason in accumulate:
            s = 0
            table += "<tr><td>%s</td><td><ul>" % reason
            subitems = accumulate[reason].items()
            subitems.sort(key=lambda i: i[1], reverse=True)

            for tier, ss in subitems:
                table += "<li> %s : %10.3f</li>" % (tier, ss / 1024.)
                s += ss / 1024.
            table += "</ul>total : %.3f</td>" % s

        table += "</table>\n"
        table += "<table border=1></thead><tr><th>Dataset</th><th>Size [GB]</th><th>Label</th></tr></thead>\n"
        only_unlock = set()
        for item in ld:
            ds_name = item[0]
            reasons = item[1]['reasons']
            sub_url = '<a href="https://cmsweb.cern.ch/das/request?input=%s">%s</a>' % (
                ds_name, ds_name)
            if 'unlock' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/phedex/datasvc/xml/prod/subscriptions?block=%s%%23*&node=%s">block</a>' % (
                    ds_name, site)
            if 'unlock' in reasons or 'input' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?inputdataset=%s&mask=RequestName&mask=RequestStatus">input</a>' % (
                    ds_name)
            if 'unlock' in reasons or 'output' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s&mask=RequestName&mask=RequestStatus">output</a>' % (
                    ds_name)
            if 'pilup' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?mc_pileup=%s&mask=RequestName&mask=RequestStatus">secondary</a>' % (
                    ds_name)
            table += "<tr><td>%s</td><td>%d</td><td><ul>%s</ul></td></tr>\n" % (
                sub_url, item[1]['size'], "<li>".join([""] + reasons))
            if reasons == ['unlock']:
                only_unlock.add(item[0])
        table += "</table></html>"
        eosFile('%s/remaining_%s.html' % (monitor_dir, site),
                'w').write(table).close()

        print "checking on unlock only datasets"
        to_ddm = UC.get('tiers_to_DDM')
        #look_at = list(only_unlock)
        look_at = list(only_unlock)[:20]
        #look_at = list([ds for ds in only_unlock if not ds.endswith('NANOAODSIM')])
        for item in look_at:
            tier = item.split('/')[-1]
            ds_status = getDatasetStatus(item)
            print item, ds_status
            if ds_status == 'PRODUCTION':
                print item, "is found", ds_status, "and unklocked on", site
                if options.invalidate_anything_left_production_once_unlocked:
                    print "Setting status to invalid for", item
                    setDatasetStatus(item, 'INVALID')
            if tier in to_ddm:
                print item, "looks like analysis and still dataops on", site
                if options.change_dataops_subs_to_anaops_once_unlocked:
                    print "Sending", item, "to anaops"
                    allCompleteToAnaOps(url, item)

Example #19

Show file

File: invalidator.py Project: mweinberg2718/WmAgentScripts

def invalidator(url, invalid_status='INVALID'):
    use_mcm = True
    up = componentInfo(soft=['wtc'])
    if not up.check(): return
    mcm = McMClient(dev=False)

    invalids = mcm.getA('invalidations', query='status=announced')
    if not invalids: return

    print len(invalids), "Object to be invalidated"
    text_to_batch = defaultdict(str)
    text_to_request = defaultdict(str)
    for invalid in invalids:
        acknowledge = False
        pid = invalid['prepid']
        batch_lookup = invalid['prepid']
        text = ""
        if invalid['type'] == 'request':
            wfn = invalid['object']
            print "need to invalidate the workflow", wfn
            wfo = session.query(Workflow).filter(Workflow.name == wfn).first()
            if wfo:
                ## set forget of that thing (although checkor will recover from it)
                print "setting the status of", wfo.status, "to forget"
                wfo.status = 'forget'
                session.commit()
            else:
                ## do not go on like this, do not acknoledge it
                print wfn, "is set to be rejected, but we do not know about it yet"
                #continue
            wfi = workflowInfo(url, wfn)
            success = "not rejected"
            ## to do, we should find a way to reject the workflow and any related acdc
            successes = invalidate(url,
                                   wfi,
                                   only_resub=True,
                                   with_output=False)
            wfi.sendLog(
                'invalidator',
                "rejection is performed from McM invalidations request")
            acknowledge = all(successes)

            text = "The workflow %s (%s) was rejected due to invalidation in McM" % (
                wfn, pid)
            batch_lookup = wfn  ##so that the batch id is taken as the one containing the workflow name
        elif invalid['type'] == 'dataset':
            dataset = invalid['object']

            if '?' in dataset: continue
            if 'None' in dataset: continue
            if 'None-' in dataset: continue
            if 'FAKE-' in dataset: continue

            print "setting", dataset, "to", invalid_status
            success = setDatasetStatus(dataset, invalid_status)
            if success:
                acknowledge = True
                text = "The dataset %s (%s) was set INVALID due to invalidation in McM" % (
                    dataset, pid)
            else:
                print "invalidation of", dataset, "did not go so well"
        else:
            print "\t\t", invalid['type'], " type not recognized"

        if acknowledge:
            ## acknoldge invalidation in mcm, provided we can have the api
            print "acknowledgment to mcm"
            ackno_url = '/restapi/invalidations/acknowledge/%s' % (
                invalid['_id'])
            print "at", ackno_url
            mcm.get(ackno_url)
            # prepare the text for batches
            batches = []
            batches.extend(
                mcm.getA('batches', query='contains=%s' % batch_lookup))
            batches = filter(
                lambda b: b['status'] in ['announced', 'done', 'reset'],
                batches)
            if len(batches):
                bid = batches[-1]['prepid']
                print "batch nofication to", bid
                text_to_batch[bid] += text + "\n\n"
            # prepare the text for requests
            text_to_request[pid] += text + "\n\n"

    for bid, text in text_to_batch.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/batches/notify', {"notes": text, "prepid": bid})
        pass
    for pid, text in text_to_request.items():
        if not text: continue
        text += '\n This is an automated message'
        mcm.put('/restapi/requests/notify', {
            "message": text,
            "prepids": [pid]
        })

Example #20

Show file

    def close(self):
        if os.path.isfile('.closor_stop'):
            print "The closing of workflows is shortened"
            return

        url = self.url
        batch_go = self.batch_go
        CI = self.CI
        UC = self.UC
        wfo = self.wfo

        jump_the_line = self.jump_the_line
        batch_goodness = self.batch_goodness
        check_parentage_to_announce = UC.get('check_parentage_to_announce')
        check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

        ## what is the expected #lumis
        self.wfi = workflowInfo(url, wfo.name)
        wfi = self.wfi
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url,
                                                   batch_name,
                                                   details=True)
                batch_go[batch_name] = all(
                    map(
                        lambda s: not s in [
                            'completed', 'running-open', 'running-closed',
                            'acquired', 'staged', 'staging', 'assigned',
                            'assignment-approved'
                        ], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog(
                    'closor',
                    'Cannot close for now because the batch <a href=https://dmytro.web.cern.ch/dmytro/cmsprodmon/workflows.php?campaign=%s>%s</a> is not all close'
                    % (batch_name, batch_name))
                return

        if wfi.request['RequestStatus'] in ['announced', 'normal-archived'
                                            ] and not options.force:
            ## manually announced ??
            self.to_status = 'done'
            self.to_wm_status = wfi.request['RequestStatus']
            wfi.sendLog(
                'closor',
                '%s is announced already : %s' % (wfo.name, self.to_wm_status))
            return

        if jump_the_line:
            wfi.sendLog('closor', 'Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name, "has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis'] == 0:
            print wfo.name, "is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda: False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs):
            print wfo.name, wfi.request['RequestStatus']
        for out in outputs:
            event_count, lumi_count = getDatasetEventsAndLumis(dataset=out)
            self.outs.append(Output(datasetname=out))
            odb = self.outs[-1]
            odb.workflow = wfo
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())

            fraction = lumi_count / float(expected_lumis) * 100.

            completion_line = "%60s %d/%d = %3.2f%%" % (
                out, lumi_count, expected_lumis, fraction)
            wfi.sendLog('closor', "\t%s" % completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                self.batch_warnings[wfi.getCampaign()].add(completion_line)
                if fraction < 50:
                    self.batch_extreme_warnings[wfi.getCampaign()].add(
                        completion_line)
            stats[out] = lumi_count
            all_OK[out] = True

        ## check for at least one full copy prior to moving on
        #in_full = {}
        for out in outputs:
            all_OK[out] = True
            #in_full[out] = []
            #presence = getDatasetPresence( url, out )
            #where = [site for site,info in presence.items() if info[0]]
            #if where:
            #    all_OK[out] = True
            #    print out,"is in full at",",".join(where)
            #    in_full[out] = copy.deepcopy(where)
            #else:

            #    going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
            #    wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
            #    at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
            #    else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
            #    print json.dumps( at_destination )
            #    print json.dumps( else_where, indent=2 )
            ## do the full stuck transfer study, missing files and shit !
            #for there in going_to:
            #    late_info = findLateFiles(url, out, going_to = there )
            #    for l in late_info:
            #        l.update({"workflow":wfo.name,"dataset":out})
            #    self.all_late_files.extend( late_info )
            #    if check_fullcopy_to_announce:
            ## only set this false if the check is relevant
            #        all_OK[out] = False

        ## verify if we have to do harvesting
        #if not options.no_harvest and not jump_the_line:
        #    #(OK, requests) = spawn_harvesting(url, wfi, in_full)
        #    sites_for_DQMHarvest = UC.get("sites_for_DQMHarvest")
        #    (OK, requests) = spawn_harvesting(url, wfi, sites_for_DQMHarvest)
        #    print "Harvesting workflow has been created and assigned to: "
        #    print sites_for_DQMHarvest
        #    all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and (
            (wfi.request['RequestStatus'] in ['closed-out']) or options.force
                or jump_the_line):
            print wfo.name, "to be announced"
            results = []
            if not results:
                for out in outputs:
                    print "dealing with", out
                    if out in stats and not stats[out]:
                        continue
                    _, dsn, process_string, tier = out.split('/')

                    if all_OK[out]:
                        print "setting valid"
                        results.append(
                            setDatasetStatus(out, 'VALID', withFiles=False))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier in UC.get("tiers_to_rucio_relval"):
                            wfi.sendLog(
                                'closor',
                                "Data Tier: %s is blacklisted, so skipping dataset placement for: %s"
                                % (tier, out))
                            continue
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if destinations:
                            wfi.sendLog(
                                'closor', '%s to go to %s' %
                                (out, ', '.join(sorted(destinations))))

                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request[
                                    'Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[
                                campaign] and tier in CI.campaigns[campaign][
                                    'toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_rucio_nonrelval"):
                            wfi.sendLog(
                                'closor',
                                "Data Tier: %s is blacklisted, so skipping dataset placement for: %s"
                                % (tier, out))
                            continue

                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM") + UC.get(
                                "tiers_to_DDM"):
                            print "tier", tier, "neither TO or NO DDM for", out
                            results.append('Not recognitized tier %s' % tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog(
                                'closor',
                                "could not recognize %s for injecting in DDM" %
                                out,
                                level='critical')
                            continue

                        n_copies = 1
                        destinations = []
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[
                                campaign]:
                            ddm_instructions = CI.campaigns[campaign][
                                'DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier, indication in ddm_instructions.items(
                                ):
                                    if ddmtier == tier or ddmtier in [
                                            '*', 'all'
                                    ]:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']

                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination=" + ",".join(
                                destinations)
                        group_spec = ""  ## not used yet
                    else:
                        print wfo.name, "no stats for announcing", out
                        results.append('No Stats')

                # adding check for PrentageResolved flag from ReqMgr:
                if wfi.request[
                        'RequestType'] == 'StepChain' and check_parentage_to_announce:
                    if wfi.request['ParentageResolved']:
                        results.append(True)
                    else:
                        wfi.sendLog(
                            'closor',
                            "Delayed announcement of %s due to unresolved Parentage dependencies"
                            % wfi.request['RequestName'])
                        results.append('No ParentageResolved')

                if all(
                        map(lambda result: result in ['None', None, True],
                            results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(
                            url, wfo.name)
                        if not res in ['None', None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            self.to_wm_status = wl_bis.request['RequestStatus']
                            if wl_bis.request['RequestStatus'] in [
                                    'announced', 'normal-archived'
                            ]:
                                res = None
                            else:
                                res = reqMgrClient.announceWorkflowCascade(
                                    url, wfo.name)

                        results.append(res)

            print results
            if all(map(lambda result: result in ['None', None, True],
                       results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        self.to_status = wfo.status.replace(
                            'announce', 'announced')
                else:
                    self.to_status = 'done'
                    self.closing = True

                wfi.sendLog('closor', "workflow outputs are announced")
            else:
                wfi.sendLog(
                    'closor', "Error with %s to be announced \n%s" %
                    (wfo.name, json.dumps(results)))

        elif wfi.request['RequestStatus'] in [
                'failed', 'aborted', 'aborted-archived', 'rejected',
                'rejected-archived', 'aborted-completed'
        ]:
            if wfi.isRelval():
                self.to_status = 'forget'
                self.to_wm_status = wfi.request['RequestStatus']
                wfi.sendLog(
                    'closor',
                    "%s is %s, but will not be set in trouble to find a replacement."
                    % (wfo.name, self.to_wm_status))
            else:
                self.to_status = 'trouble'
                self.to_wm_status = wfi.request['RequestStatus']
        else:
            print wfo.name, "not good for announcing:", wfi.request[
                'RequestStatus']
            wfi.sendLog('closor', "cannot be announced")
            self.held.add(wfo.name)

Example #21

Show file

def rejector(url, specific, options=None):

    up = componentInfo(soft=['wtc'])
    if not up.check(): return

    if specific and specific.startswith('/'):
        ## this is for a dataset
        print setDatasetStatus(specific, 'INVALID')
        return

    if options.filelist:
        wfs = []
        for line in filter(None, open(options.filelist).read().split('\n')):
            print line
            wfs.extend(
                session.query(Workflow).filter(
                    Workflow.name.contains(line)).all())
    elif specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
        if not wfs:
            batches = json.loads(open('batches.json').read())
            for bname in batches:
                if specific == bname:
                    for wf in batches[bname]:
                        wfs.append(
                            session.query(Workflow).filter(
                                Workflow.name == wf).first())
    else:
        wfs = session.query(Workflow).filter(
            Workflow.status == 'assistance-clone').all()
        #wfs.extend( session.query(Workflow).filter(Workflow.status == 'assistance-reject').all())
        ## be careful then on clone case by case
        options.clone = True
        print "not supposed to function yet"
        return

    print len(wfs), "to reject"

    if len(wfs) > 1:
        print "\n".join([wfo.name for wfo in wfs])
        answer = raw_input('Reject these')
        if not answer.lower() in ['y', 'yes']:
            return

    for wfo in wfs:
        #wfo = session.query(Workflow).filter(Workflow.name == specific).first()
        if not wfo:
            print "cannot reject", spec
            return
        results = []
        wfi = workflowInfo(url, wfo.name)

        datasets = set(wfi.request['OutputDatasets'])
        reqMgrClient.invalidateWorkflow(
            url, wfo.name, current_status=wfi.request['RequestStatus'])

        comment = ""
        if options.comments: comment = ", reason: " + options.comments
        wfi.sendLog(
            'rejector',
            'invalidating the workflow by unified operator%s' % comment)
        ## need to find the whole familly and reject the whole gang
        familly = getWorkflowById(url, wfi.request['PrepID'], details=True)
        for fwl in familly:
            if fwl['RequestDate'] < wfi.request['RequestDate']: continue
            if fwl['RequestType'] != 'Resubmission': continue
            ## does not work on second order acd
            #if 'OriginalRequestName' in fwl and fwl['OriginalRequestName'] != wfi.request['RequestName']: continue
            print "rejecting", fwl['RequestName']
            reqMgrClient.invalidateWorkflow(
                url,
                fwl['RequestName'],
                current_status=fwl['RequestStatus'],
                cascade=False)
            datasets.update(fwl['OutputDatasets'])

        for dataset in datasets:
            if options.keep:
                print "keeping", dataset, "in its current status"
            else:
                results.append(setDatasetStatus(dataset, 'INVALID'))
                pass

        if all(map(lambda result: result in ['None', None, True], results)):
            print wfo.name, "and", datasets, "are rejected"
            if options and options.clone:
                wfo.status = 'trouble'
                session.commit()
                schema = wfi.getSchema()
                schema['Requestor'] = os.getenv('USER')
                schema['Group'] = 'DATAOPS'
                schema['OriginalRequestName'] = wfo.name
                if 'ProcessingVersion' in schema:
                    schema['ProcessingVersion'] = int(
                        schema['ProcessingVersion']
                    ) + 1  ## dubious str->int conversion
                else:
                    schema['ProcessingVersion'] = 2
                for k in schema.keys():
                    if k.startswith('Team'):
                        schema.pop(k)
                    if k.startswith('checkbox'):
                        schema.pop(k)

                ## a few tampering of the original request
                if options.Memory:
                    if schema['RequestType'] == 'TaskChain':
                        it = 1
                        while True:
                            t = 'Task%d' % it
                            it += 1
                            if t in schema:
                                schema[t]['Memory'] = options.Memory
                            else:
                                break
                    else:
                        schema['Memory'] = options.Memory

                if options.Multicore:
                    ## to do : set it properly in taskchains
                    if schema['RequestType'] == 'TaskChain':
                        tasks, set_to = options.Multicore.split(
                            ':') if ':' in options.Multicore else (
                                "", options.Multicore)
                        set_to = int(set_to)
                        tasks = tasks.split(',') if tasks else ['Task1']
                        it = 1
                        while True:
                            tt = 'Task%d' % it
                            it += 1
                            if tt in schema:
                                tname = schema[tt]['TaskName']
                                if tname in tasks or tt in tasks:
                                    mem = schema[tt]['Memory']
                                    mcore = schema[tt].get('Multicore', 1)
                                    factor = (set_to / float(mcore))
                                    fraction_constant = 0.4
                                    mem_per_core_c = int(
                                        (1 - fraction_constant) * mem /
                                        float(mcore))
                                    print "mem per core", mem_per_core_c
                                    print "base mem", mem
                                    ## adjusting the parameter in the clone
                                    schema[tt]['Memory'] += (
                                        set_to - mcore) * mem_per_core_c
                                    schema[tt]['Multicore'] = set_to
                                    schema[tt]['TimePerEvent'] /= factor
                            else:
                                break
                    else:
                        schema['Multicore'] = options.Multicore
                if options.deterministic:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['DeterministicPileup'] = True
                if options.EventsPerJob:
                    if schema['RequestType'] == 'TaskChain':
                        schema['Task1']['EventsPerJob'] = options.EventsPerJob
                    else:
                        schema['EventsPerJob'] = options.EventsPerJob
                if options.EventAwareLumiBased:
                    schema['SplittingAlgo'] = 'EventAwareLumiBased'
                if options.TimePerEvent:
                    schema['TimePerEvent'] = options.TimePerEvent

                if options.ProcessingString:
                    schema['ProcessingString'] = options.ProcessingString
                if options.AcquisitionEra:
                    schema['AcquisitionEra'] = options.AcquisitionEra
                if options.runs:
                    schema['RunWhitelist'] = map(int, options.runs.split(','))
                if options.PrepID:
                    schema['PrepID'] = options.PrepID

                if schema['RequestType'] == 'TaskChain' and options.no_output:
                    ntask = schema['TaskChain']
                    for it in range(1, ntask - 1):
                        schema['Task%d' % it]['KeepOutput'] = False
                    schema['TaskChain'] = ntask - 1
                    schema.pop('Task%d' % ntask)

                if options.priority:
                    schema['RequestPriority'] = options.priority

                ## update to the current priority
                schema['RequestPriority'] = wfi.request['RequestPriority']

                ## drop shit on the way to reqmgr2
                schema = reqMgrClient.purgeClonedSchema(schema)

                print "submitting"
                if (options.to_stepchain
                        and (schema['RequestType'] == 'TaskChain')):
                    ## transform the schema into StepChain schema
                    print "Transforming a TaskChain into a StepChain"
                    schema['RequestType'] = 'StepChain'
                    schema['StepChain'] = schema.pop('TaskChain')
                    schema['SizePerEvent'] = 0
                    schema['TimePerEvent'] = 0
                    step = 1
                    s_n = {}
                    while True:
                        if 'Task%d' % step in schema:
                            schema['Step%d' % step] = schema.pop('Task%d' %
                                                                 step)
                            schema['Step%d' % step]['StepName'] = schema[
                                'Step%d' % step].pop('TaskName')
                            s_n[schema['Step%d' %
                                       step]['StepName']] = 'Step%d' % step
                            if 'InputTask' in schema['Step%d' % step]:
                                schema['Step%d' % step]['InputStep'] = schema[
                                    'Step%d' % step].pop('InputTask')
                            eff = 1.
                            up_s = 'Step%d' % step
                            while True:
                                ## climb up a step. supposedely already all converted
                                up_s = s_n.get(
                                    schema[up_s].get('InputStep', None), None)
                                if up_s:
                                    ## multiply with the efficiency
                                    eff *= schema[up_s].get(
                                        'FilterEfficiency', 1.)
                                else:
                                    ## or stop there
                                    break

                            if not 'KeepOutput' in schema['Step%d' % step]:
                                ## this is a weird translation capability. Absence of keepoutput in step means : keep the output. while in TaskChain absence means : drop
                                schema['Step%d' % step]['KeepOutput'] = False
                            schema['TimePerEvent'] += eff * schema[
                                'Step%d' % step].pop('TimePerEvent')
                            schema['SizePerEvent'] += eff * schema[
                                'Step%d' % step].pop('SizePerEvent')
                            step += 1
                        else:
                            break

                print json.dumps(schema, indent=2)
                newWorkflow = reqMgrClient.submitWorkflow(url, schema)
                if not newWorkflow:
                    print "error in cloning", wfo.name
                    print json.dumps(schema, indent=2)
                    return
                print newWorkflow

                data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
                print data
                wfi.sendLog(
                    'rejector', 'Cloned into %s by unified operator %s' %
                    (newWorkflow, comment))
                wfi.notifyRequestor('Cloned into %s by unified operator %s' %
                                    (newWorkflow, comment),
                                    do_batch=False)
            else:
                wfo.status = 'trouble' if options.set_trouble else 'forget'
                wfi.notifyRequestor('Rejected by unified operator %s' %
                                    (comment),
                                    do_batch=False)
                session.commit()

        else:
            print "error in rejecting", wfo.name, results