Example #1
0
def getWorkflowInfo(workflow):
    """
    creates a single dictionary with all workflow information
    """
    #TODO replace all with info or cache
    wfObject = reqMgr.Workflow(workflow)
    #wlinfo = getWorkloadParameters(workflow)


    #Global stuff - common for all types of request
    wlinfo = {    # 'batch':batch  IGNORED - not useful
                'processingstring': wfObject.info['ProcessingString'],
                'requestname': workflow,
                'prepid': wfObject.info['PrepID'],
                'globaltag': wfObject.info['GlobalTag'],
                'timeev': wfObject.info['TimePerEvent'],
                'sizeev': wfObject.info['SizePerEvent'],
                'priority': wfObject.info['RequestPriority'],
                'sites': wfObject.info['Site Whitelist'],
                'acquisitionEra':wfObject.info['AcquisitionEra'],
                'processingVersion': wfObject.info['ProcessingVersion'],
                'campaign':wfObject.info['Campaign'],
                'cmssw':wfObject.info['CMSSWVersion'],
                'mergedLFNBase':wfObject.info['MergedLFNBase'],
                'type' : wfObject.type,
                'status':wfObject.status,
            }
    #get the following keys, or 0 by defaulr,
    getValue = lambda data, k, dflt = 0: data[k] if k in data else dflt; 

    #calculate general stuff
    #parse and format date
    now = datetime.datetime.now()
    dateArr = wfObject.info['RequestDate']
    #fill with missing zeros
    if len(dateArr) != 6:
        dateArr += (6-len(dateArr))*[0]
    reqdate = datetime.datetime.strptime(
                    "%s-%s-%s %s:%s:%s.0"%tuple(dateArr)
                    ,  DATE_FORMAT)
    wlinfo['reqdate'] = reqdate.strftime(DATE_FORMAT)
    #calculate days old of the request
    delta = now - reqdate
    days = delta.days + delta.seconds / 3600.0 / 24.0
    wlinfo['requestdays'] = days
    #assignment team and date
    if 'Assignments' in wfObject.info and wfObject.info['Assignments']:
        wlinfo['team'] = wfObject.info['Assignments'][0]
    else:
        wlinfo['team'] = ''
    wlinfo['update'] = getAssignmentDate(wfObject)

    wlinfo['expectedevents'] = getValue(wfObject.cache, 'TotalInputEvents')
    wlinfo['expectedjobs'] = getValue(wfObject.cache, 'TotalEstimatedJobs')
    
    #Job Information if available
    wlinfo['js'] = getJobSummary(workflow)
    
    #information about input dataset
    inputdataset = {}
    wlinfo['inputdatasetinfo'] = inputdataset
    if 'InputDatasets' in wfObject.info and wfObject.info['InputDatasets']:
        inputdataset['name'] = wfObject.info['InputDatasets'][0]
        
        wlinfo['blockswhitelist'] = getValue(wfObject.info, 'BlockWhitelist', None)
        if wlinfo['blockswhitelist']:
            wlinfo['blockswhitelist'] = eval(wlinfo['blockswhitelist'])
        
        inputdataset['events'] = dbs3.getEventCountDataSet(inputdataset['name'])
        dsinfo = dbs3.getDatasetInfo(inputdataset['name'])
        inputdataset['status'], wlinfo['inputdatasetinfo']['createts'], wlinfo['inputdatasetinfo']['lastmodts'] = dsinfo
    
        #block events
        if wlinfo['blockswhitelist']:
            inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList(inputdataset['name'], wlinfo['blockswhitelist'])
        else:
            inputdataset['bwevents'] = inputdataset['events']
        #load reqlist and subscriptions
        getDatasetPhedexInfo(inputdataset)

    #info about output datasets
    #expectedtotalsize = sizeev * expectedevents / 1000000
    outputdataset = []
    wlinfo['outputdatasetinfo'] = outputdataset
    eventsdone = 0
    if wfObject.status in ['running','running-open','running-closed','completed','closed-out','announced']:
        for o in wfObject.outputDatasets:
            oel = {}
            oel['name'] = o
            
            #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1)
            print "-",o, "-"
            oel['events'] = wfObject.getOutputEvents(o)
            oel['status'], oel['createts'], oel['lastmodts'] = dbs3.getDatasetInfo(o)
            #load reqlist and subscriptions
            getDatasetPhedexInfo(oel)

            eventsdone = eventsdone + oel['events']
            outputdataset.append(oel)


    #look for correspondin acdc's
    wlinfo['acdc'] = []
    if wlinfo['prepid']:
        for a in allacdc:
            if wlinfo['prepid'] in a:
                wlinfo['acdc'].append(a)

    if wfObject.type == 'TaskChain':
        pass
    #Stuff only for non-taskchain workflows
    else:
        wlinfo['primaryds'] =  wfObject.info['PrimaryDataset'],

        #get custodial sites from all output
        sites = []
        for ds, info in wfObject.info['SubscriptionInformation'].items():
            sites += info['CustodialSites']
        wlinfo['custodialsites'] = sites
        wlinfo['events_per_job'] = getValue(wfObject.info, 'EventsPerJob')
        wlinfo['lumis_per_job'] =  getValue(wfObject.info, 'LumisPerJob')
        wlinfo['events_per_lumi'] = getValue(wfObject.info, 'EventsPerLumi')
        wlinfo['max_events_per_lumi'] = getValue(wfObject.info, 'max_events_per_lumi')
        wlinfo['filtereff'] = getValue(wfObject.info, 'FilterEfficiency', 1.0)
        
        #calculate cpu hours        
        wlinfo['expectedjobcpuhours'] = wlinfo['timeev'] * wlinfo['expectedevents'] / wlinfo['filtereff']

    return wlinfo
Example #2
0
def getWorkflowInfo(workflow):
    """
    creates a single dictionary with all workflow information
    """
    #TODO replace all with info or cache
    wfObject = reqMgr.Workflow(workflow)
    #wlinfo = getWorkloadParameters(workflow)

    #Global stuff - common for all types of request
    wlinfo = {    # 'batch':batch  IGNORED - not useful
                'processingstring': wfObject.info['ProcessingString'],
                'requestname': workflow,
                'prepid': wfObject.info['PrepID'],
                'globaltag': wfObject.info['GlobalTag'],
                'timeev': wfObject.info['TimePerEvent'],
                'sizeev': wfObject.info['SizePerEvent'],
                'priority': wfObject.info['RequestPriority'],
                'sites': wfObject.info['Site Whitelist'],
                'acquisitionEra':wfObject.info['AcquisitionEra'],
                'processingVersion': wfObject.info['ProcessingVersion'],
                'campaign':wfObject.info['Campaign'],
                'cmssw':wfObject.info['CMSSWVersion'],
                'mergedLFNBase':wfObject.info['MergedLFNBase'],
                'type' : wfObject.type,
                'status':wfObject.status,
            }
    #get the following keys, or 0 by defaulr,
    getValue = lambda data, k, dflt=0: data[k] if k in data else dflt

    #calculate general stuff
    #parse and format date
    now = datetime.datetime.now()
    dateArr = wfObject.info['RequestDate']
    #fill with missing zeros
    if len(dateArr) != 6:
        dateArr += (6 - len(dateArr)) * [0]
    reqdate = datetime.datetime.strptime(
        "%s-%s-%s %s:%s:%s.0" % tuple(dateArr), DATE_FORMAT)
    wlinfo['reqdate'] = reqdate.strftime(DATE_FORMAT)
    #calculate days old of the request
    delta = now - reqdate
    days = delta.days + delta.seconds / 3600.0 / 24.0
    wlinfo['requestdays'] = days
    #assignment team and date
    if 'Assignments' in wfObject.info and wfObject.info['Assignments']:
        wlinfo['team'] = wfObject.info['Assignments'][0]
    else:
        wlinfo['team'] = ''
    wlinfo['update'] = getAssignmentDate(wfObject)

    wlinfo['expectedevents'] = getValue(wfObject.cache, 'TotalInputEvents')
    wlinfo['expectedjobs'] = getValue(wfObject.cache, 'TotalEstimatedJobs')

    #Job Information if available
    wlinfo['js'] = getJobSummary(workflow)

    #information about input dataset
    inputdataset = {}
    wlinfo['inputdatasetinfo'] = inputdataset
    if 'InputDatasets' in wfObject.info and wfObject.info['InputDatasets']:
        inputdataset['name'] = wfObject.info['InputDatasets'][0]

        wlinfo['blockswhitelist'] = getValue(wfObject.info, 'BlockWhitelist',
                                             None)
        if wlinfo['blockswhitelist']:
            wlinfo['blockswhitelist'] = eval(wlinfo['blockswhitelist'])

        inputdataset['events'] = dbs3.getEventCountDataSet(
            inputdataset['name'])
        dsinfo = dbs3.getDatasetInfo(inputdataset['name'])
        inputdataset['status'], wlinfo['inputdatasetinfo']['createts'], wlinfo[
            'inputdatasetinfo']['lastmodts'] = dsinfo

        #block events
        if wlinfo['blockswhitelist']:
            inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList(
                inputdataset['name'], wlinfo['blockswhitelist'])
        else:
            inputdataset['bwevents'] = inputdataset['events']
        #load reqlist and subscriptions
        getDatasetPhedexInfo(inputdataset)

    #info about output datasets
    #expectedtotalsize = sizeev * expectedevents / 1000000
    outputdataset = []
    wlinfo['outputdatasetinfo'] = outputdataset
    eventsdone = 0
    if wfObject.status in [
            'running', 'running-open', 'running-closed', 'completed',
            'closed-out', 'announced'
    ]:
        for o in wfObject.outputDatasets:
            oel = {}
            oel['name'] = o

            #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1)
            print "-", o, "-"
            oel['events'] = wfObject.getOutputEvents(o)
            oel['status'], oel['createts'], oel[
                'lastmodts'] = dbs3.getDatasetInfo(o)
            #load reqlist and subscriptions
            getDatasetPhedexInfo(oel)

            eventsdone = eventsdone + oel['events']
            outputdataset.append(oel)

    #look for correspondin acdc's
    wlinfo['acdc'] = []
    if wlinfo['prepid']:
        for a in allacdc:
            if wlinfo['prepid'] in a:
                wlinfo['acdc'].append(a)

    if wfObject.type == 'TaskChain':
        pass
    #Stuff only for non-taskchain workflows
    else:
        wlinfo['primaryds'] = wfObject.info['PrimaryDataset'],

        #get custodial sites from all output
        sites = []
        for ds, info in wfObject.info['SubscriptionInformation'].items():
            sites += info['CustodialSites']
        wlinfo['custodialsites'] = sites
        wlinfo['events_per_job'] = getValue(wfObject.info, 'EventsPerJob')
        wlinfo['lumis_per_job'] = getValue(wfObject.info, 'LumisPerJob')
        wlinfo['events_per_lumi'] = getValue(wfObject.info, 'EventsPerLumi')
        wlinfo['max_events_per_lumi'] = getValue(wfObject.info,
                                                 'max_events_per_lumi')
        wlinfo['filtereff'] = getValue(wfObject.info, 'FilterEfficiency', 1.0)

        #calculate cpu hours
        wlinfo['expectedjobcpuhours'] = wlinfo['timeev'] * wlinfo[
            'expectedevents'] / wlinfo['filtereff']

    return wlinfo
def getWorkflowInfo(workflow):
    """
    creates a single dictionary with all workflow information
    """
    
    wlinfo = getWorkloadParameters(workflow)
    timeev = wlinfo['timeev']
    sizeev = wlinfo['sizeev']
    prepid = wlinfo['prepid']
    sites = wlinfo['sites']
    custodialsites = wlinfo['custodialsites']
    events_per_job = wlinfo['events_per_job']
    lumis_per_job = wlinfo['lumis_per_job']
    blockswhitelist = wlinfo['blockswhitelist']

    #look for correspondin acdc's
    acdc = []
    for a in allacdc:
        if prepid in a:
            acdc.append(a)
    #check for one T1 that is in the whitelist
    custodialt1 = '?'
    for i in sites:
        if 'T1_' in i:
            custodialt1 = i
            break
    
    #retrieve reqMgr info
    s = reqMgr.getWorkflowInfo('cmsweb.cern.ch', workflow)
    
    #parse information
    filtereff = float(s['FilterEfficiency']) if 'FilterEfficiency' in s else 1
    team = s['Assignments'] if 'Assignments' in s else ''
    team = team[0] if type(team) is list and team else team
    typee = s['RequestType'] if 'RequestType' in s else ''
    status = s['RequestStatus'] if 'RequestStatus' in s else ''
    
    if 'RequestSizeEvents' in s:
        reqevts = s['RequestSizeEvents']
    elif 'RequestNumEvents' in s:
        reqevts = s['RequestNumEvents']
    
    inputdataset = {}
    if 'InputDatasets' in s and s['InputDatasets']:
        inputdataset['name'] = s['InputDatasets'][0]

    #TODO complete validation logic and expected run time
    
    if typee in ['MonteCarlo','LHEStepZero']:
    #if reqevts > 0:
        expectedevents = int(reqevts)
        if events_per_job > 0 and filtereff > 0:
            expectedjobs = int(math.ceil(expectedevents/(events_per_job*filtereff)))
            expectedjobcpuhours = int(timeev*(events_per_job*filtereff)/3600)
        else:
            expectedjobs = 0
            expectedjobcpuhours = 0
    elif typee in ['MonteCarloFromGEN','ReReco','ReDigi']:
        #datasets
        inputdataset['events'] = dbs3.getEventCountDataSet(inputdataset['name'])
        inputdataset['status'], inputdataset['createts'], inputdataset['lastmodts'] = dbs3.getDatasetInfo(inputdataset['name'])

        if blockswhitelist != []:
            inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList(inputdataset['name'],blockswhitelist)
        else:
            inputdataset['bwevents'] = inputdataset['events']

        if inputdataset['bwevents'] > 0 and filtereff > 0:
            expectedevents = int(filtereff*inputdataset['bwevents'])
        else:
            expectedevents = 0
        
        if events_per_job > 0 and filtereff > 0:
            expectedjobs = int(expectedevents/events_per_job)
        else:
            expectedjobs = 0
        
        try:
            expectedjobcpuhours = int(lumis_per_job*timeev*inputdataset['bwevents']/inputdataset['bwlumicount']/3600)
        except:
            expectedjobcpuhours = 0
        #TODO use phedexClient
        url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/RequestList?dataset=' + inputdataset['name']
        try:
            result = json.load(urllib.urlopen(url))
        except:
            print "Cannot get Requests List status from PhEDEx"
        try:
            r = result['phedex']['request']
        except:
            r = ''

        inputdataset['phreqinfo'] = []
        if r:
            for i in range(len(r)):
                phreqinfo = {}
                requested_by = r[i]['requested_by']
                nodes = []
                for j in range(len(r[i]['node'])):
                    nodes.append(r[i]['node'][j]['name'])
                id = r[i]['id']
                phreqinfo['nodes'] = nodes
                phreqinfo['id'] = id
                inputdataset['phreqinfo'].append(phreqinfo)
        #TODO use phedexclient
        url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/Subscriptions?dataset=' + inputdataset['name']
        try:
            result = json.load(urllib.urlopen(url))
        except:
            print "Cannot get Subscriptions from PhEDEx"
        inputdataset['phtrinfo'] = []
        try:
            print result
            rlist = result['phedex']['dataset'][0]['subscription']
            for r in rlist:
                phtrinfo = {}
                node = r['node']
                custodial = r['custodial']
                phtrinfo['node'] = node
                try:
                    phtrinfo['perc'] = int(float(r['percent_files']))
                except:
                    phtrinfo['perc'] = 0
                inputdataset['phtrinfo'].append(phtrinfo)
        except:
            r = {}

    else:
        expectedevents = -1
        expectedjobs = -1
        expectedjobcpuhours = -1
    
    expectedtotalsize = sizeev * expectedevents / 1000000
    conn  =  httplib.HTTPSConnection('cmsweb.cern.ch', cert_file = os.getenv('X509_USER_PROXY'), key_file = os.getenv('X509_USER_PROXY'))
    r1=conn.request('GET','/reqmgr/reqMgr/outputDatasetsByRequestName?requestName=' + workflow)
    r2=conn.getresponse()
    data = r2.read()
    ods = json.loads(data)
    conn.close()
    if len(ods)==0:
        print "No Outpudatasets for this workflow: "+workflow
    outputdataset = []
    eventsdone = 0
    for o in ods:
        oel = {}
        oel['name'] = o
        if status in ['running','running-open','running-closed','completed','closed-out','announced']:
            #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1)
            print "-",o, "-"
            oe = dbs3.getEventCountDataSet(o)
            ost, ocreatets, olastmodts = dbs3.getDatasetInfo(o)
            oel['events'] = oe
            oel['status'] = ost
            oel['createts'] = ocreatets
            oel['lastmodts'] = olastmodts
        
            phreqinfo = {}
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/requestlist?dataset=' + o
            try:
                result = json.load(urllib.urlopen(url))
            except:
                print "Cannot get request subscription status from PhEDEx"
            try:
                r = result['phedex']['request']
            except:
                r = None
            if r:
                try:
                    for i in range(0,len(r)):
                        approval = r[i]['approval']
                        requested_by = r[i]['requested_by']
                        custodialsite = r[i]['node'][0]['name']
                        id = r[i]['id']
                        if '_MSS' in custodialsite:
                            phreqinfo['custodialsite'] = custodialsite
                            phreqinfo['requested_by'] = requested_by
                            phreqinfo['approval'] = approval
                            phreqinfo['id'] = id
                except:
                    print "Error getting subscription status from Phedex"
                oel['phreqinfo'] = phreqinfo
        
            phtrinfo = {}
            url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/subscriptions?dataset=' + o
            try:
                result = json.load(urllib.urlopen(url))
            except:
                print "Cannot get transfer status from PhEDEx"
            try:
                rlist = result['phedex']['dataset'][0]['subscription']
            except:
                rlist = []
            
            phtrinfo = {}
            oel['phtrinfo'] = []
            for r in rlist:
                phtrinfo = {}
                node = r['node']
                custodial = r['custodial']
                if r['move'] == 'n':
                    phtype = 'Replica'
                else:
                    phtype = 'Move'
                phtrinfo['node'] = node
                phtrinfo['custodial'] = r['custodial']
                phtrinfo['time_create'] = datetime.datetime.fromtimestamp(int(r['time_create'])).ctime()
                phtrinfo['time_create_days'] = (datetime.datetime.now() - datetime.datetime.fromtimestamp(int(r['time_create']))).days
                try:
                    phtrinfo['perc'] = int(float(r['percent_files']))
                except:
                    phtrinfo['perc'] = 0
                phtrinfo['type'] = phtype

                oel['phtrinfo'].append(phtrinfo)
            eventsdone = eventsdone + oe
        else:
            eventsdone = 0
        outputdataset.append(oel)

    cpuhours = timeev*expectedevents/3600
    remainingcpuhours = max(0,timeev*(expectedevents-eventsdone)/3600)

    realremainingcpudays = 0
    totalslots = 0
    #pledge calculation
    for (psite,pslots) in pledged.items():
        if psite in sites:
            totalslots = totalslots + pslots
    if totalslots == 0:
        realremainingcpudays = 0
    else:
        realremainingcpudays = float(remainingcpuhours) / 24 / totalslots 
    try:
        zone = t2zone[custodialsites[0]]
    except:
        zone = '?'
    if workflow in jlist.keys():
        js = jlist[workflow]
    else:
        js = {}
    
    if status in LIVE_STATUS:
        updatedate = getAssignmentDate(workflow)
    else:
        updatedate = None
    wlinfo.update( {'filtereff':filtereff,'type':typee,'status':status,'expectedevents':expectedevents,
                    'inputdatasetinfo':inputdataset,'timeev':timeev,'sizeev':sizeev,'sites':sites,
                    'zone':zone,'js':js,'outputdatasetinfo':outputdataset,'cpuhours':cpuhours,
                    'realremainingcpudays':realremainingcpudays,'remainingcpuhours':remainingcpuhours,
                    'team':team,'expectedjobs':expectedjobs,'expectedjobcpuhours':expectedjobcpuhours,'acdc':acdc,
                    'update':updatedate} )
    return wlinfo