Python findLateFiles Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: findLateFiles

Examples at hotexamples.com: 6

Python findLateFiles - 6 examples found. These are the top rated real world Python examples of utils.findLateFiles extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: closor.py Project: sharad1126/WmAgentScripts

    def close(self):
        if os.path.isfile('.closor_stop'):
            print "The closing of workflows is shortened"
            return

        url = self.url
        batch_go = self.batch_go
        CI = self.CI
        UC = self.UC
        wfo = self.wfo

        jump_the_line = self.jump_the_line
        batch_goodness = self.batch_goodness
        check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

        ## what is the expected #lumis
        self.wfi = workflowInfo(url, wfo.name)
        wfi = self.wfi
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url,
                                                   batch_name,
                                                   details=True)
                batch_go[batch_name] = all(
                    map(
                        lambda s: not s in [
                            'completed', 'running-open', 'running-closed',
                            'acquired', 'staged', 'staging', 'assigned',
                            'assignment-approved'
                        ], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog(
                    'closor',
                    'Cannot close for now because the batch <a href=https://dmytro.web.cern.ch/dmytro/cmsprodmon/workflows.php?campaign=%s>%s</a> is not all close'
                    % (batch_name, batch_name))
                return

        if wfi.request['RequestStatus'] in ['announced', 'normal-archived'
                                            ] and not options.force:
            ## manually announced ??
            self.to_status = 'done'
            self.to_wm_status = wfi.request['RequestStatus']
            wfi.sendLog(
                'closor',
                '%s is announced already : %s' % (wfo.name, self.to_wm_status))
            return

        if jump_the_line:
            wfi.sendLog('closor', 'Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name, "has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis'] == 0:
            print wfo.name, "is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda: False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs):
            print wfo.name, wfi.request['RequestStatus']
        for out in outputs:
            event_count, lumi_count = getDatasetEventsAndLumis(dataset=out)
            self.outs.append(Output(datasetname=out))
            odb = self.outs[-1]
            odb.workflow = wfo
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())

            fraction = lumi_count / float(expected_lumis) * 100.

            completion_line = "%60s %d/%d = %3.2f%%" % (
                out, lumi_count, expected_lumis, fraction)
            wfi.sendLog('closor', "\t%s" % completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                self.batch_warnings[wfi.getCampaign()].add(completion_line)
                if fraction < 50:
                    self.batch_extreme_warnings[wfi.getCampaign()].add(
                        completion_line)
            stats[out] = lumi_count
            all_OK[out] = True

        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence(url, out)
            where = [site for site, info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out, "is in full at", ",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites'] + wfi.request[
                    'CustodialSites']
                wfi.sendLog(
                    'closor', "%s is not in full anywhere. send to %s" %
                    (out, ",".join(sorted(going_to))))
                at_destination = dict([(k, v) for (k, v) in presence.items()
                                       if k in going_to])
                else_where = dict([(k, v) for (k, v) in presence.items()
                                   if not k in going_to])
                print json.dumps(at_destination)
                print json.dumps(else_where, indent=2)
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to=there)
                    for l in late_info:
                        l.update({"workflow": wfo.name, "dataset": out})
                    self.all_late_files.extend(late_info)
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

        ## verify if we have to do harvesting
        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update(OK)

        ## only that status can let me go into announced
        if all(all_OK.values()) and (
            (wfi.request['RequestStatus'] in ['closed-out']) or options.force
                or jump_the_line):
            print wfo.name, "to be announced"
            results = []
            if not results:
                for out in outputs:
                    print "dealing with", out
                    if out in stats and not stats[out]:
                        continue
                    _, dsn, process_string, tier = out.split('/')

                    if all_OK[out]:
                        print "setting valid"
                        results.append(
                            setDatasetStatus(out, 'VALID', withFiles=False))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if destinations:
                            wfi.sendLog(
                                'closor', '%s to go to %s' %
                                (out, ', '.join(sorted(destinations))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(
                                url,
                                site, [out],
                                'Copy for release validation consumption',
                                priority='normal',
                                approve=True,
                                mail=False,
                                group='RelVal')
                            try:
                                request_id = result['phedex'][
                                    'request_created'][0]['id']
                                results.append(True)
                            except:
                                results.append('Failed relval transfer')

                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request[
                                    'Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[
                                campaign] and tier in CI.campaigns[campaign][
                                    'toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM") + UC.get(
                                "tiers_to_DDM"):
                            print "tier", tier, "neither TO or NO DDM for", out
                            results.append('Not recognitized tier %s' % tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog(
                                'closor',
                                "could not recognize %s for injecting in DDM" %
                                out,
                                level='critical')
                            continue

                        n_copies = 1
                        destinations = []
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[
                                campaign]:
                            ddm_instructions = CI.campaigns[campaign][
                                'DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier, indication in ddm_instructions.items(
                                ):
                                    if ddmtier == tier or ddmtier in [
                                            '*', 'all'
                                    ]:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']

                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination=" + ",".join(
                                destinations)
                        group_spec = ""  ## not used yet
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending", out, " to DDM"
                            status = pass_to_dynamo(
                                [out],
                                N=n_copies,
                                sites=destinations if destinations else None,
                                group=group_spec if group_spec else None)
                            results.append(status)
                            if status == True:
                                wfi.sendLog(
                                    'closor',
                                    '%s is send to dynamo in %s copies %s %s' %
                                    (out, n_copies, sorted(destinations),
                                     group_spec))
                            else:
                                sendLog('closor',
                                        "could not add " + out +
                                        " to dynamo pool. check closor logs.",
                                        level='critical')
                                wfi.sendLog(
                                    'closor', "could not add " + out +
                                    " to dynamo pool. check closor logs.")
                    else:
                        print wfo.name, "no stats for announcing", out
                        results.append('No Stats')

                if all(
                        map(lambda result: result in ['None', None, True],
                            results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(
                            url, wfo.name)
                        if not res in ['None', None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            self.to_wm_status = wl_bis.request['RequestStatus']
                            if wl_bis.request['RequestStatus'] in [
                                    'announced', 'normal-archived'
                            ]:
                                res = None
                            else:
                                res = reqMgrClient.announceWorkflowCascade(
                                    url, wfo.name)

                        results.append(res)

            print results
            if all(map(lambda result: result in ['None', None, True],
                       results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        self.to_status = wfo.status.replace(
                            'announce', 'announced')
                else:
                    self.to_status = 'done'
                    self.closing = True

                wfi.sendLog('closor', "workflow outputs are announced")
            else:
                wfi.sendLog(
                    'closor', "Error with %s to be announced \n%s" %
                    (wfo.name, json.dumps(results)))

        elif wfi.request['RequestStatus'] in [
                'failed', 'aborted', 'aborted-archived', 'rejected',
                'rejected-archived', 'aborted-completed'
        ]:
            if wfi.isRelval():
                self.to_status = 'forget'
                self.to_wm_status = wfi.request['RequestStatus']
                wfi.sendLog(
                    'closor',
                    "%s is %s, but will not be set in trouble to find a replacement."
                    % (wfo.name, self.to_wm_status))
            else:
                self.to_status = 'trouble'
                self.to_wm_status = wfi.request['RequestStatus']
        else:
            print wfo.name, "not good for announcing:", wfi.request[
                'RequestStatus']
            wfi.sendLog('closor', "cannot be announced")
            self.held.add(wfo.name)

Example #2

Show file

File: closor.py Project: menglu21/WmAgentScripts

def closor(url, specific=None):
    if userLock(): return
    if duplicateLock(): return
    if not componentInfo().check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')
    ## manually closed-out workflows should get to close with checkor
    if specific:
        wfs = session.query(Workflow).filter(
            Workflow.name.contains(specific)).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status == 'close').all()

    held = set()

    max_per_round = UC.get('max_per_round').get('closor', None)
    random.shuffle(wfs)
    if max_per_round: wfs = wfs[:max_per_round]

    for wfo in wfs:

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis
        wfi = workflowInfo(url, wfo.name)
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.request['RequestStatus'] in ['announced', 'normal-archived']:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog(
                'closor',
                '%s is announced already : %s' % (wfo.name, wfo.wm_status))
        session.commit()

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name, "has not been assigned yet, or the database is corrupted"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda: False)
        #print outputs
        if len(outputs):
            print wfo.name, wfi.request['RequestStatus']
        for out in outputs:
            event_count, lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(
                Output.datasetname == out).first()
            if not odb:
                print "adding an output object", out
                odb = Output(datasetname=out)
                odb.workflow = wfo
                session.add(odb)
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()

            wfi.sendLog(
                'closor', "\t%60s %d/%d = %3.2f%%" %
                (out, lumi_count, expected_lumis,
                 lumi_count / float(expected_lumis) * 100.))
            #print wfo.fraction_for_closing, lumi_count, expected_lumis
            #fraction = wfo.fraction_for_closing
            #fraction = 0.0
            #all_OK.append((float(lumi_count) > float(expected_lumis*fraction)))
            all_OK[out] = True

        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence(url, out)
            where = [site for site, info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out, "is in full at", ",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites'] + wfi.request[
                    'CustodialSites']
                wfi.sendLog(
                    'closor', "%s is not in full anywhere. send to %s" %
                    (out, ",".join(sorted(going_to))))
                at_destination = dict([(k, v) for (k, v) in presence.items()
                                       if k in going_to])
                else_where = dict([(k, v) for (k, v) in presence.items()
                                   if not k in going_to])
                print json.dumps(at_destination)
                print json.dumps(else_where, indent=2)
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to=there)
                    for l in late_info:
                        l.update({"workflow": wfo.name, "dataset": out})
                    all_late_files.extend(late_info)
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

        ## verify if we have to do harvesting

        (OK, requests) = spawn_harvesting(url, wfi, in_full)
        all_OK.update(OK)

        ## only that status can let me go into announced
        if all(all_OK.values()) and wfi.request['RequestStatus'] in [
                'closed-out'
        ]:
            print wfo.name, "to be announced"
            results = []  #'dummy']
            if not results:
                for out in outputs:
                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                        tier = out.split('/')[-1]
                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request[
                                    'Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[
                                campaign] and tier in CI.campaigns[campaign][
                                    'toDDM']:
                            to_DDM = True
                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM") + UC.get(
                                "tiers_to_DDM"):
                            print "tier", tier, "neither TO or NO DDM for", out
                            results.append('Not recognitized tier %s' % tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog(
                                'closor',
                                "could not recognize %s for injecting in DDM" %
                                out,
                                level='critical')
                            continue

                        n_copies = 2
                        destinations = []
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[
                                campaign]:
                            ddm_instructions = CI.campaigns[campaign][
                                'DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier, indication in ddm_instructions.items(
                                ):
                                    if ddmtier == tier or ddmtier in [
                                            '*', 'all'
                                    ]:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']

                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination=" + ",".join(
                                destinations)
                        ## inject to DDM when necessary
                        if to_DDM:
                            #print "Sending",out," to DDM"
                            p = os.popen(
                                'python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'
                                % (n_copies, out, destination_spec))
                            print p.read()
                            status = p.close()
                            if status != None:
                                print "Failed DDM, retrying a second time"
                                p = os.popen(
                                    'python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'
                                    % (n_copies, out, destination_spec))
                                print p.read()
                                status = p.close()
                                if status != None:
                                    #sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                                    sendLog('closor',
                                            "could not add " + out +
                                            " to DDM pool. check closor logs.",
                                            level='critical')
                            results.append(status)
                            if status == None:
                                wfi.sendLog(
                                    'closor',
                                    '%s is send to AnalysisOps DDM pool in %s copies %s'
                                    % (n_copies, out, destination_spec))

                    else:
                        print wfo.name, "no stats for announcing", out
                        results.append('No Stats')

                if all(
                        map(lambda result: result in ['None', None, True],
                            results)):
                    ## only announce if all previous are fine
                    res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                    if not res in ['None', None]:
                        ## check the status again, it might well have toggled
                        wl_bis = workflowInfo(url, wfo.name)
                        wfo.wm_status = wl_bis.request['RequestStatus']
                        session.commit()
                        if wl_bis.request['RequestStatus'] in [
                                'announced', 'normal-archived'
                        ]:
                            res = None
                        else:
                            ## retry ?
                            res = reqMgrClient.announceWorkflowCascade(
                                url, wfo.name)

                    results.append(res)

            #print results
            if all(map(lambda result: result in ['None', None, True],
                       results)):
                wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor', "workflow is announced")
            else:
                print "ERROR with ", wfo.name, "to be announced", json.dumps(
                    results)

        else:
            print wfo.name, "not good for announcing:", wfi.request[
                'RequestStatus']
            wfi.sendLog('closor', "cannot be announced")
            held.add(wfo.name)

    days_late = 0.
    retries_late = 10

    really_late_files = [
        info for info in all_late_files if info['retries'] >= retries_late
    ]
    really_late_files = [
        info for info in really_late_files
        if info['delay'] / (60 * 60 * 24.) >= days_late
    ]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s' % (
            len(really_late_files), days_late, retries_late,
            json.dumps(really_late_files, indent=2))
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor', subject)
        print subject
        open('%s/stuck_files.json' % monitor_dir,
             'w').write(json.dumps(really_late_files, indent=2))

    if held:
        #sendEmail("held from announcing","the workflows below are held up, please check the logs https://cmst2.web.cern.ch/cmst2/unified/logs/closor/last.log \n%s"%("\n".join( held )))
        sendLog('closor',
                "the workflows below are held up \n%s" % ("\n".join(held)),
                level='critical')

Example #3

Show file

File: closor.py Project: areinsvo/WmAgentScripts

def closor(url, specific=None):
    if not componentInfo().check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()
    #LI = lockInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')
    ## manually closed-out workflows should get to close with checkor
    if specific:
        wfs = session.query(Workflow).filter(Workflow.name.contains(specific)).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status=='close').all()

    held = set()

    for wfo in wfs:

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis 
        wfi = workflowInfo(url, wfo.name )
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.request['RequestStatus'] in  ['announced','normal-archived']:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog('closor','%s is announced already : %s'%( wfo.name,wfo.wm_status))
        session.commit()


        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda : False)
        #print outputs
        if len(outputs): 
            print wfo.name,wfi.request['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(Output.datasetname==out).first()
            if not odb:
                print "adding an output object",out
                odb = Output( datasetname = out )
                odb.workflow = wfo
                session.add( odb )
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()

            wfi.sendLog('closor',"\t%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,lumi_count/float(expected_lumis)*100.))
            #print wfo.fraction_for_closing, lumi_count, expected_lumis
            #fraction = wfo.fraction_for_closing
            #fraction = 0.0
            #all_OK.append((float(lumi_count) > float(expected_lumis*fraction)))
            all_OK[out] = True 


        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence( url, out )
            where = [site for site,info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out,"is in full at",",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
                wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
                at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
                else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
                print json.dumps( at_destination )
                print json.dumps( else_where, indent=2 )
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to = there )
                    for l in late_info:
                        l.update({"workflow":wfo.name,"dataset":out})
                    all_late_files.extend( late_info )
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

    
        ## verify if we have to do harvesting

        (OK, requests) = spawn_harvesting(url, wfi, in_full)
        all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and wfi.request['RequestStatus'] in ['closed-out']:
            print wfo.name,"to be announced"
            results=[]#'dummy']
            if not results:
                for out in outputs:
                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                        tier = out.split('/')[-1]
                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request['Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True
                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM")+UC.get("tiers_to_DDM"):
                            print "tier",tier,"neither TO or NO DDM for",out
                            results.append('Not recognitized tier %s'%tier)
                            sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            continue

                        n_copies = 2
                        destinations=[]
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[campaign]:
                            ddm_instructions = CI.campaigns[campaign]['DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier,indication in ddm_instructions.items():
                                    if ddmtier==tier or ddmtier in ['*','all']:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']
                                            
                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination="+",".join( destinations )
                        ## inject to DDM when necessary
                        if to_DDM:
                            #print "Sending",out," to DDM"
                            p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'%(n_copies, out,destination_spec))
                            print p.read()
                            status = p.close()
                            if status!=None:
                                print "Failed DDM, retrying a second time"
                                p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s --exec'%(n_copies, out,destination_spec))
                                print p.read()
                                status = p.close()    
                                if status!=None:
                                    sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                            results.append( status )
                            if status == None:
                                wfi.sendLog('closor','%s is send to AnalysisOps DDM pool in %s copies %s'%( n_copies, out,destination_spec))
                                                            
                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    ## only announce if all previous are fine
                    res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                    if not res in ['None',None]:
                        ## check the status again, it might well have toggled
                        wl_bis = workflowInfo(url, wfo.name)
                        wfo.wm_status = wl_bis.request['RequestStatus']
                        session.commit()
                        if wl_bis.request['RequestStatus'] in  ['announced','normal-archived']:
                            res = None
                        else:
                            ## retry ?
                            res = reqMgrClient.announceWorkflowCascade(url, wfo.name) 
                            
                    results.append( res )
                                
            #print results
            if all(map(lambda result : result in ['None',None,True],results)):
                wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor',"workflow is announced")
            else:
                print "ERROR with ",wfo.name,"to be announced",json.dumps( results )
                
        else:
            print wfo.name,"not good for announcing:",wfi.request['RequestStatus']
            wfi.sendLog('closor',"cannot be announced")
            held.add( wfo.name )

    days_late = 0.
    retries_late = 10

    really_late_files = [info for info in all_late_files if info['retries']>=retries_late]
    really_late_files = [info for info in really_late_files if info['delay']/(60*60*24.)>=days_late]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s'%(len(really_late_files), days_late, retries_late, json.dumps( really_late_files , indent=2) )
        sendEmail('waiting for files to announce', subject)
        sendLog('closor',subject)
        print subject
        open('%s/stuck_files.json'%monitor_dir,'w').write( json.dumps( really_late_files , indent=2))

    if held:
        sendEmail("held from announcing","the workflows below are held up, please check the logs https://cmst2.web.cern.ch/cmst2/unified/logs/closor/last.log \n%s"%("\n".join( held )))

Example #4

Show file

def closor(url, specific=None, options=None):
    if userLock(): return
    if duplicateLock(): return
    if not componentInfo().check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

    jump_the_line = options.announce if options else False
    if jump_the_line:
        print "announce option is on. Checking on things on-going ready to be announced"
        wfs = session.query(Workflow).filter(
            Workflow.status.contains('announce')).filter(
                sqlalchemy.not_(Workflow.status.contains('announced'))).all()
    else:
        print "regular option. Checking on things done and to be announced"
        wfs = session.query(Workflow).filter(Workflow.status == 'close').all()

    wfs_n = [w.name for w in wfs]
    print "unique names?"
    print len(set(wfs_n)) == len(wfs_n)

    held = set()

    print len(wfs), "closing"
    random.shuffle(wfs)
    max_per_round = UC.get('max_per_round').get('closor', None)
    if options.limit: max_per_round = options.limit

    if max_per_round:
        ## order them by priority
        all_closedout = sorted(getWorkflows(url, 'closed-out', details=True),
                               key=lambda r: r['RequestPriority'])
        all_closedout = [r['RequestName'] for r in all_closedout]

        def rank(wfn):
            return all_closedout.index(wfn) if wfn in all_closedout else 0

        wfs = sorted(wfs, key=lambda wfo: rank(wfo.name), reverse=True)
        wfs = wfs[:max_per_round]

    batch_go = {}
    batch_warnings = defaultdict(set)
    batch_goodness = UC.get("batch_goodness")

    for iwfo, wfo in enumerate(wfs):

        if specific and not specific in wfo.name: continue

        print "Progress [%d/%d]" % (iwfo, len(wfs))
        ## what is the expected #lumis
        wfi = workflowInfo(url, wfo.name)
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url,
                                                   batch_name,
                                                   details=True)
                batch_go[batch_name] = all(
                    map(
                        lambda s: not s in [
                            'completed', 'running-open', 'running-closed',
                            'acquired', 'assigned', 'assignment-approved'
                        ], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog(
                    'closor',
                    'Cannot close for now because the batch <a href=https://dmytro.web.cern.ch/dmytro/cmsprodmon/workflows.php?campaign=%s>%s</a> is not all close'
                    % (batch_name, batch_name))
                continue

        if wfi.request['RequestStatus'] in ['announced', 'normal-archived'
                                            ] and not options.force:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog(
                'closor',
                '%s is announced already : %s' % (wfo.name, wfo.wm_status))
        session.commit()

        if jump_the_line:
            wfi.sendLog('closor', 'Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name, "has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis'] == 0:
            print wfo.name, "is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda: False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs):
            print wfo.name, wfi.request['RequestStatus']
        for out in outputs:
            event_count, lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(
                Output.datasetname == out).first()
            if not odb:
                print "adding an output object", out
                odb = Output(datasetname=out)
                odb.workflow = wfo
                session.add(odb)
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()
            fraction = lumi_count / float(expected_lumis) * 100.

            completion_line = "%60s %d/%d = %3.2f%%" % (
                out, lumi_count, expected_lumis, fraction)
            wfi.sendLog('closor', "\t%s" % completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                batch_warnings[wfi.getCampaign()].add(completion_line)
            stats[out] = lumi_count
            all_OK[out] = True

        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence(url, out)
            where = [site for site, info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out, "is in full at", ",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites'] + wfi.request[
                    'CustodialSites']
                wfi.sendLog(
                    'closor', "%s is not in full anywhere. send to %s" %
                    (out, ",".join(sorted(going_to))))
                at_destination = dict([(k, v) for (k, v) in presence.items()
                                       if k in going_to])
                else_where = dict([(k, v) for (k, v) in presence.items()
                                   if not k in going_to])
                print json.dumps(at_destination)
                print json.dumps(else_where, indent=2)
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to=there)
                    for l in late_info:
                        l.update({"workflow": wfo.name, "dataset": out})
                    all_late_files.extend(late_info)
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

        ## verify if we have to do harvesting

        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update(OK)

        ## only that status can let me go into announced
        if all(all_OK.values()) and (
            (wfi.request['RequestStatus'] in ['closed-out']) or options.force
                or jump_the_line):
            print wfo.name, "to be announced"
            results = []
            if not results:
                for out in outputs:
                    if out in stats and not stats[out]:
                        continue
                    _, dsn, process_string, tier = out.split('/')

                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if destinations:
                            wfi.sendLog(
                                'closor', '%s to go to %s' %
                                (out, ', '.join(sorted(destinations))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(
                                url,
                                site, [out],
                                'Copy for release validation consumption',
                                priority='normal',
                                approve=True,
                                mail=False,
                                group='RelVal')
                            try:
                                request_id = result['phedex'][
                                    'request_created'][0]['id']
                                results.append(True)
                            except:
                                results.append('Failed relval transfer')

                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request[
                                    'Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[
                                campaign] and tier in CI.campaigns[campaign][
                                    'toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM") + UC.get(
                                "tiers_to_DDM"):
                            print "tier", tier, "neither TO or NO DDM for", out
                            results.append('Not recognitized tier %s' % tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog(
                                'closor',
                                "could not recognize %s for injecting in DDM" %
                                out,
                                level='critical')
                            continue

                        n_copies = 1
                        destinations = []
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[
                                campaign]:
                            ddm_instructions = CI.campaigns[campaign][
                                'DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier, indication in ddm_instructions.items(
                                ):
                                    if ddmtier == tier or ddmtier in [
                                            '*', 'all'
                                    ]:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']

                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination=" + ",".join(
                                destinations)
                        group_spec = ""  ## not used yet
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending", out, " to DDM"
                            status = pass_to_dynamo(
                                [out],
                                N=n_copies,
                                sites=destinations if destinations else None,
                                group=group_spec if group_spec else None)
                            results.append(status)
                            if status == True:
                                wfi.sendLog(
                                    'closor',
                                    '%s is send to dynamo in %s copies %s %s' %
                                    (out, n_copies, sorted(destinations),
                                     group_spec))
                            else:
                                sendLog('closor',
                                        "could not add " + out +
                                        " to dynamo pool. check closor logs.",
                                        level='critical')
                                wfi.sendLog(
                                    'closor', "could not add " + out +
                                    " to dynamo pool. check closor logs.")
                    else:
                        print wfo.name, "no stats for announcing", out
                        results.append('No Stats')

                if all(
                        map(lambda result: result in ['None', None, True],
                            results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(
                            url, wfo.name)
                        if not res in ['None', None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            wfo.wm_status = wl_bis.request['RequestStatus']
                            session.commit()
                            if wl_bis.request['RequestStatus'] in [
                                    'announced', 'normal-archived'
                            ]:
                                res = None
                            else:
                                ## retry ?
                                res = reqMgrClient.announceWorkflowCascade(
                                    url, wfo.name)

                        results.append(res)

            #print results
            if all(map(lambda result: result in ['None', None, True],
                       results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        wfo.status = wfo.status.replace(
                            'announce', 'announced')
                else:
                    wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor', "workflow outputs are announced")
            else:
                wfi.sendLog(
                    'closor', "Error with %s to be announced \n%s" %
                    (wfo.name, json.dumps(results)))

        elif wfi.request['RequestStatus'] in [
                'failed', 'aborted', 'aborted-archived', 'rejected',
                'rejected-archived', 'aborted-completed'
        ]:
            if wfi.isRelval():
                wfo.status = 'forget'
                wfo.wm_status = wfi.request['RequestStatus']
                wfi.sendLog(
                    'closor',
                    "%s is %s, but will not be set in trouble to find a replacement."
                    % (wfo.name, wfo.wm_status))
            else:
                wfo.status = 'trouble'
                wfo.wm_status = wfi.request['RequestStatus']
            session.commit()
        else:
            print wfo.name, "not good for announcing:", wfi.request[
                'RequestStatus']
            wfi.sendLog('closor', "cannot be announced")
            held.add(wfo.name)

    days_late = 0.
    retries_late = 10

    really_late_files = [
        info for info in all_late_files if info['retries'] >= retries_late
    ]
    really_late_files = [
        info for info in really_late_files
        if info['delay'] / (60 * 60 * 24.) >= days_late
    ]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s' % (
            len(really_late_files), days_late, retries_late,
            json.dumps(really_late_files, indent=2))
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor', subject)
        print subject
        open('%s/stuck_files.json' % monitor_dir,
             'w').write(json.dumps(really_late_files, indent=2))

    if held:
        sendLog('closor',
                "the workflows below are held up \n%s" %
                ("\n".join(sorted(held))),
                level='critical')

    #batches = json.loads(open('batches.json').read())
    for bname, go in batch_go.items():
        if go:
            subject = "Release Validation Samples Batch %s" % bname
            issues = ""
            if batch_warnings[bname]:
                issues = "The following datasets have outstanding completion (<%d%%) issues:\n\n" % batch_goodness
                issues += "\n".join(sorted(batch_warnings[bname]))
                issues += "\n\n"
            text = """
Dear all,

a batch of release validation workflows has finished.

Batch ID:

%s

Detail of the workflows

https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s

%s 
This is an automated message.
""" % (bname, bname, issues)
            to = ['*****@*****.**']
            sendEmail(subject, text, destination=to)

Example #5

Show file

File: closor.py Project: DAMason/WmAgentScripts

def closor(url, specific=None, options=None):
    if userLock(): return
    if duplicateLock(): return
    if not componentInfo().check(): return


    UC = unifiedConfiguration()
    CI = campaignInfo()

    all_late_files = []
    check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

    jump_the_line = options.announce if options else False
    if jump_the_line:
        wfs = session.query(Workflow).filter(Workflow.status.contains('announce')).filter(sqlalchemy.not_(Workflow.status.contains('announced'))).all()
    else:
        wfs = session.query(Workflow).filter(Workflow.status=='close').all()

    held = set()

    print len(wfs),"closing"
    max_per_round = UC.get('max_per_round').get('closor',None)
    if options.limit: max_per_round = options.limit
    random.shuffle( wfs )    
    if max_per_round: wfs = wfs[:max_per_round]

    batch_go = {}
    batch_warnings = defaultdict(set)
    batch_goodness = UC.get("batch_goodness")

    for wfo in wfs:

        if specific and not specific in wfo.name: continue

        ## what is the expected #lumis 
        wfi = workflowInfo(url, wfo.name )
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url , batch_name, details=True)
                batch_go[ batch_name ]  = all(map(lambda s : not s in ['completed','running-open','running-closed','acquired','assigned','assignment-approved'], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog('closor', 'Cannot close for now because the batch %s is not all close'% batch_name)
                continue


        if wfi.request['RequestStatus'] in  ['announced','normal-archived'] and not options.force:
            ## manually announced ??
            wfo.status = 'done'
            wfo.wm_status = wfi.request['RequestStatus']
            wfi.sendLog('closor','%s is announced already : %s'%( wfo.name,wfo.wm_status))
        session.commit()

        if jump_the_line:
            wfi.sendLog('closor','Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis']==0:
            print wfo.name,"is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda : False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs): 
            print wfo.name,wfi.request['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            odb = session.query(Output).filter(Output.datasetname==out).first()
            if not odb:
                print "adding an output object",out
                odb = Output( datasetname = out )
                odb.workflow = wfo
                session.add( odb )
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())
            session.commit()
            fraction = lumi_count/float(expected_lumis)*100.

            completion_line = "%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,fraction)
            wfi.sendLog('closor',"\t%s"% completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                batch_warnings[ wfi.getCampaign()].add( completion_line )
            stats[out] = lumi_count
            all_OK[out] = True 


        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence( url, out )
            where = [site for site,info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out,"is in full at",",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
                wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
                at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
                else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
                print json.dumps( at_destination )
                print json.dumps( else_where, indent=2 )
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to = there )
                    for l in late_info:
                        l.update({"workflow":wfo.name,"dataset":out})
                    all_late_files.extend( late_info )
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

    
        ## verify if we have to do harvesting

        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and ((wfi.request['RequestStatus'] in ['closed-out']) or options.force or jump_the_line):
            print wfo.name,"to be announced"
            results=[]
            if not results:
                for out in outputs:
                    if out in stats and not stats[out]: 
                        continue
                    _,dsn,process_string,tier = out.split('/')

                    if all_OK[out]:
                        results.append(setDatasetStatus(out, 'VALID'))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        
                        if destinations:
                            wfi.sendLog('closor', '%s to go to %s'%(out, ', '.join( sorted( destinations ))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(url, site, [out], 'Copy for release validation consumption', priority='normal', approve=True, mail=False, group='RelVal')
                            try:
                                request_id =  result['phedex']['request_created'][0]['id']
                                results.append( True )
                            except:
                                results.append( 'Failed relval transfer' )
                        
                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request['Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM")+UC.get("tiers_to_DDM"):
                            print "tier",tier,"neither TO or NO DDM for",out
                            results.append('Not recognitized tier %s'%tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog('closor', "could not recognize %s for injecting in DDM"% out, level='critical')
                            continue

                        n_copies = 2
                        destinations=[]
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[campaign]:
                            ddm_instructions = CI.campaigns[campaign]['DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier,indication in ddm_instructions.items():
                                    if ddmtier==tier or ddmtier in ['*','all']:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']
                                            
                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination="+",".join( destinations )
                        group_spec = "" ## not used yet 
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending",out," to DDM"
                            p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s %s --debug 0 --exec'%(n_copies, out,destination_spec, group_spec))
                            ddm_text = p.read()
                            print ddm_text
                            status = p.close()
                            if status!=None:
                                print "Failed DDM, retrying to send",out,"a second time"
                                p = os.popen('python assignDatasetToSite.py --nCopies=%d --dataset=%s %s %s --debug 1 --exec'%(n_copies, out,destination_spec, group_spec))

                                ddm_text = p.read()
                                print ddm_text
                                status = p.close()    
                                if status!=None:
                                    #sendEmail("failed DDM injection","could not add "+out+" to DDM pool. check closor logs.")
                                    sendLog('closor',"could not add "+out+" to DDM pool. check closor logs.", level='critical')
                                    if options.force: status = True
                            results.append( status )
                            if status == None:
                                wfi.sendLog('closor',ddm_text)
                                wfi.sendLog('closor','%s is send to AnalysisOps DDM pool in %s copies %s'%( out, n_copies, destination_spec))
                                                            
                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                        if not res in ['None',None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            wfo.wm_status = wl_bis.request['RequestStatus']
                            session.commit()
                            if wl_bis.request['RequestStatus'] in  ['announced','normal-archived']:
                                res = None
                            else:
                                ## retry ?
                                res = reqMgrClient.announceWorkflowCascade(url, wfo.name) 
                            
                        results.append( res )
                                
            #print results
            if all(map(lambda result : result in ['None',None,True],results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        wfo.status = wfo.status.replace('announce','announced')
                else:
                    wfo.status = 'done'
                session.commit()
                wfi.sendLog('closor',"workflow outputs are announced")
            else:
                wfi.sendLog('closor',"Error with %s to be announced \n%s"%( wfo.name, json.dumps( results )))
            
        elif wfi.request['RequestStatus'] in ['failed','aborted','aborted-archived','rejected','rejected-archived','aborted-completed']:
            if wfi.isRelval():
                wfo.status = 'forget'
                wfo.wm_status = wfi.request['RequestStatus']
                wfi.sendLog('closor',"%s is %s, but will not be set in trouble to find a replacement."%( wfo.name, wfo.wm_status))
            else:
                wfo.status = 'trouble'
                wfo.wm_status = wfi.request['RequestStatus']
            session.commit()
        else:
            print wfo.name,"not good for announcing:",wfi.request['RequestStatus']
            wfi.sendLog('closor',"cannot be announced")
            held.add( wfo.name )

    days_late = 0.
    retries_late = 10

    really_late_files = [info for info in all_late_files if info['retries']>=retries_late]
    really_late_files = [info for info in really_late_files if info['delay']/(60*60*24.)>=days_late]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s'%(len(really_late_files), days_late, retries_late, json.dumps( really_late_files , indent=2) )
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor',subject)
        print subject
        open('%s/stuck_files.json'%monitor_dir,'w').write( json.dumps( really_late_files , indent=2))

    if held:
        sendLog('closor',"the workflows below are held up \n%s"%("\n".join( sorted(held) )), level='critical')


    #batches = json.loads(open('batches.json').read())
    for bname,go in batch_go.items():
        if go:
            subject = "Release Validation Samples Batch %s"% bname
            issues=""
            if batch_warnings[ bname ]:
                issues="The following datasets have outstanding completion (<%d%%) issues:\n\n"% batch_goodness
                issues+="\n".join( sorted( batch_warnings[ bname ] ))
                issues+="\n\n"
            text = """
Dear all,

a batch of release validation workflows has finished.

Batch ID:

%s

Detail of the workflows

https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s

%s 
This is an automated message.
"""%( bname, 
      bname,
      issues)
            to = ['*****@*****.**']
            sendEmail(subject, text, destination=to )

Example #6

Show file

File: closor.py Project: CMSCompOps/WmAgentScripts

    def close(self):
        if os.path.isfile('.closor_stop'):
            print "The closing of workflows is shortened"
            return 

        url = self.url
        batch_go = self.batch_go
        CI = self.CI
        UC = self.UC
        wfo = self.wfo

        jump_the_line = self.jump_the_line
        batch_goodness = self.batch_goodness
        check_fullcopy_to_announce = UC.get('check_fullcopy_to_announce')

        ## what is the expected #lumis 
        self.wfi = workflowInfo(url, wfo.name )
        wfi = self.wfi
        wfo.wm_status = wfi.request['RequestStatus']

        if wfi.isRelval():
            has_batch_go = False
            batch_name = wfi.getCampaign()
            if not batch_name in batch_go:
                ## do the esimatation whethere this can be announced : only once per batch
                in_batches = getWorkflowByCampaign(url , batch_name, details=True)
                batch_go[ batch_name ]  = all(map(lambda s : not s in ['completed','running-open','running-closed','acquired','assigned','assignment-approved'], [r['RequestStatus'] for r in in_batches]))
            ## already verified
            has_batch_go = batch_go[batch_name]
            if not has_batch_go:
                wfi.sendLog('closor', 'Cannot close for now because the batch <a href=https://dmytro.web.cern.ch/dmytro/cmsprodmon/workflows.php?campaign=%s>%s</a> is not all close'%( batch_name, batch_name))
                return


        if wfi.request['RequestStatus'] in  ['announced','normal-archived'] and not options.force:
            ## manually announced ??
            self.to_status = 'done'
            self.to_wm_status = wfi.request['RequestStatus']
            wfi.sendLog('closor','%s is announced already : %s'%( wfo.name,self.to_wm_status))
            return 

        if jump_the_line:
            wfi.sendLog('closor','Announcing while completing')

        expected_lumis = 1
        if not 'TotalInputLumis' in wfi.request:
            print wfo.name,"has not been assigned yet, or the database is corrupted"
        elif wfi.request['TotalInputLumis']==0:
            print wfo.name,"is corrupted with 0 expected lumis"
        else:
            expected_lumis = wfi.request['TotalInputLumis']

        ## what are the outputs
        outputs = wfi.request['OutputDatasets']
        ## check whether the number of lumis is as expected for each
        all_OK = defaultdict(lambda : False)
        stats = defaultdict(int)
        #print outputs
        if len(outputs): 
            print wfo.name,wfi.request['RequestStatus']
        for out in outputs:
            event_count,lumi_count = getDatasetEventsAndLumis(dataset=out)
            self.outs.append( Output( datasetname = out ))
            odb = self.outs[-1]
            odb.workflow = wfo
            odb.nlumis = lumi_count
            odb.nevents = event_count
            odb.workfow_id = wfo.id
            if odb.expectedlumis < expected_lumis:
                odb.expectedlumis = expected_lumis
            else:
                expected_lumis = odb.expectedlumis
            odb.date = time.mktime(time.gmtime())

            fraction = lumi_count/float(expected_lumis)*100.

            completion_line = "%60s %d/%d = %3.2f%%"%(out,lumi_count,expected_lumis,fraction)
            wfi.sendLog('closor',"\t%s"% completion_line)
            if wfi.isRelval() and fraction < batch_goodness:
                self.batch_warnings[ wfi.getCampaign()].add( completion_line )
            stats[out] = lumi_count
            all_OK[out] = True 


        ## check for at least one full copy prior to moving on
        in_full = {}
        for out in outputs:
            in_full[out] = []
            presence = getDatasetPresence( url, out )
            where = [site for site,info in presence.items() if info[0]]
            if where:
                all_OK[out] = True
                print out,"is in full at",",".join(where)
                in_full[out] = copy.deepcopy(where)
            else:

                going_to = wfi.request['NonCustodialSites']+wfi.request['CustodialSites']
                wfi.sendLog('closor',"%s is not in full anywhere. send to %s"%(out, ",".join(sorted(going_to))))
                at_destination = dict([(k,v) for (k,v) in presence.items() if k in going_to])
                else_where = dict([(k,v) for (k,v) in presence.items() if not k in going_to])
                print json.dumps( at_destination )
                print json.dumps( else_where, indent=2 )
                ## do the full stuck transfer study, missing files and shit !
                for there in going_to:
                    late_info = findLateFiles(url, out, going_to = there )
                    for l in late_info:
                        l.update({"workflow":wfo.name,"dataset":out})
                    self.all_late_files.extend( late_info )
                if check_fullcopy_to_announce:
                    ## only set this false if the check is relevant
                    all_OK[out] = False

    
        ## verify if we have to do harvesting
        if not options.no_harvest and not jump_the_line:
            (OK, requests) = spawn_harvesting(url, wfi, in_full)
            all_OK.update( OK )

        ## only that status can let me go into announced
        if all(all_OK.values()) and ((wfi.request['RequestStatus'] in ['closed-out']) or options.force or jump_the_line):
            print wfo.name,"to be announced"
            results=[]
            if not results:
                for out in outputs:
                    print "dealing with",out
                    if out in stats and not stats[out]: 
                        continue
                    _,dsn,process_string,tier = out.split('/')

                    if all_OK[out]:
                        print "setting valid"
                        results.append(setDatasetStatus(out, 'VALID', withFiles=False))
                    if all_OK[out] and wfi.isRelval():
                        ## make the specific relval rules and the replicas
                        ## figure the destination(s) out
                        destinations = set()
                        if tier != "RECO" and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        if tier == "GEN-SIM":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-DIGI-RAW":
                            destinations.add('T1_US_FNAL_Disk')
                        if tier == "GEN-SIM-RECO":
                            destinations.add('T1_US_FNAL_Disk')

                        if "RelValTTBar" in dsn and "TkAlMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')

                        if "MinimumBias" in dsn and "SiStripCalMinBias" in process_string and tier != "ALCARECO":
                            destinations.add('T2_CH_CERN')
                        
                        if destinations:
                            wfi.sendLog('closor', '%s to go to %s'%(out, ', '.join( sorted( destinations ))))

                        ## call to makereplicarequest under relval => done
                        for site in destinations:
                            result = makeReplicaRequest(url, site, [out], 'Copy for release validation consumption', priority='normal', approve=True, mail=False, group='RelVal')
                            try:
                                request_id =  result['phedex']['request_created'][0]['id']
                                results.append( True )
                            except:
                                results.append( 'Failed relval transfer' )
                        
                    elif all_OK[out]:

                        campaign = None
                        try:
                            campaign = out.split('/')[2].split('-')[0]
                        except:
                            if 'Campaign' in wfi.request and wfi.request['Campaign']:
                                campaign = wfi.request['Campaign']
                        to_DDM = False
                        ## campaign override
                        if campaign and campaign in CI.campaigns and 'toDDM' in CI.campaigns[campaign] and tier in CI.campaigns[campaign]['toDDM']:
                            to_DDM = True

                        ## by typical enabling
                        if tier in UC.get("tiers_to_DDM"):
                            to_DDM = True
                        ## check for unitarity
                        if not tier in UC.get("tiers_no_DDM")+UC.get("tiers_to_DDM"):
                            print "tier",tier,"neither TO or NO DDM for",out
                            results.append('Not recognitized tier %s'%tier)
                            #sendEmail("failed DDM injection","could not recognize %s for injecting in DDM"% out)
                            sendLog('closor', "could not recognize %s for injecting in DDM"% out, level='critical')
                            continue

                        n_copies = 1
                        destinations=[]
                        if to_DDM and campaign and campaign in CI.campaigns and 'DDMcopies' in CI.campaigns[campaign]:
                            ddm_instructions = CI.campaigns[campaign]['DDMcopies']
                            if type(ddm_instructions) == int:
                                n_copies = CI.campaigns[campaign]['DDMcopies']
                            elif type(ddm_instructions) == dict:
                                ## a more fancy configuration
                                for ddmtier,indication in ddm_instructions.items():
                                    if ddmtier==tier or ddmtier in ['*','all']:
                                        ## this is for us
                                        if 'N' in indication:
                                            n_copies = indication['N']
                                        if 'host' in indication:
                                            destinations = indication['host']
                                            
                        destination_spec = ""
                        if destinations:
                            destination_spec = "--destination="+",".join( destinations )
                        group_spec = "" ## not used yet 
                        ### should make this a campaign configuration
                        ## inject to DDM when necessary
                        if to_DDM:
                            print "Sending",out," to DDM"
                            status = pass_to_dynamo( [out], N = n_copies, sites=destinations if destinations else None, group = group_spec if group_spec else None)
                            results.append( status )
                            if status == True:
                                wfi.sendLog('closor','%s is send to dynamo in %s copies %s %s'%( out, n_copies, sorted(destinations), group_spec))
                            else:
                                sendLog('closor',"could not add "+out+" to dynamo pool. check closor logs.", level='critical')
                                wfi.sendLog('closor',"could not add "+out+" to dynamo pool. check closor logs.")
                    else:
                        print wfo.name,"no stats for announcing",out
                        results.append('No Stats')

                if all(map(lambda result : result in ['None',None,True],results)):
                    if not jump_the_line:
                        ## only announce if all previous are fine
                        res = reqMgrClient.announceWorkflowCascade(url, wfo.name)
                        if not res in ['None',None]:
                            ## check the status again, it might well have toggled
                            wl_bis = workflowInfo(url, wfo.name)
                            self.to_wm_status = wl_bis.request['RequestStatus']
                            if wl_bis.request['RequestStatus'] in  ['announced','normal-archived']:
                                res = None
                            else:
                                res = reqMgrClient.announceWorkflowCascade(url, wfo.name) 
                            
                        results.append( res )
                                
            print results
            if all(map(lambda result : result in ['None',None,True],results)):
                if jump_the_line:
                    if not 'announced' in wfo.status:
                        self.to_status = wfo.status.replace('announce','announced')
                else:
                    self.to_status = 'done'
                    self.closing = True
                
                    
                wfi.sendLog('closor',"workflow outputs are announced")
            else:
                wfi.sendLog('closor',"Error with %s to be announced \n%s"%( wfo.name, json.dumps( results )))
            
        elif wfi.request['RequestStatus'] in ['failed','aborted','aborted-archived','rejected','rejected-archived','aborted-completed']:
            if wfi.isRelval():
                self.to_status = 'forget'
                self.to_wm_status = wfi.request['RequestStatus']
                wfi.sendLog('closor',"%s is %s, but will not be set in trouble to find a replacement."%( wfo.name, self.to_wm_status))
            else:
                self.to_status = 'trouble'
                self.to_wm_status = wfi.request['RequestStatus']
        else:
            print wfo.name,"not good for announcing:",wfi.request['RequestStatus']
            wfi.sendLog('closor',"cannot be announced")
            self.held.add( wfo.name )