def getWorkflowInfo(workflow): """ creates a single dictionary with all workflow information """ #TODO replace all with info or cache wfObject = reqMgr.Workflow(workflow) #wlinfo = getWorkloadParameters(workflow) #Global stuff - common for all types of request wlinfo = { # 'batch':batch IGNORED - not useful 'processingstring': wfObject.info['ProcessingString'], 'requestname': workflow, 'prepid': wfObject.info['PrepID'], 'globaltag': wfObject.info['GlobalTag'], 'timeev': wfObject.info['TimePerEvent'], 'sizeev': wfObject.info['SizePerEvent'], 'priority': wfObject.info['RequestPriority'], 'sites': wfObject.info['Site Whitelist'], 'acquisitionEra':wfObject.info['AcquisitionEra'], 'processingVersion': wfObject.info['ProcessingVersion'], 'campaign':wfObject.info['Campaign'], 'cmssw':wfObject.info['CMSSWVersion'], 'mergedLFNBase':wfObject.info['MergedLFNBase'], 'type' : wfObject.type, 'status':wfObject.status, } #get the following keys, or 0 by defaulr, getValue = lambda data, k, dflt = 0: data[k] if k in data else dflt; #calculate general stuff #parse and format date now = datetime.datetime.now() dateArr = wfObject.info['RequestDate'] #fill with missing zeros if len(dateArr) != 6: dateArr += (6-len(dateArr))*[0] reqdate = datetime.datetime.strptime( "%s-%s-%s %s:%s:%s.0"%tuple(dateArr) , DATE_FORMAT) wlinfo['reqdate'] = reqdate.strftime(DATE_FORMAT) #calculate days old of the request delta = now - reqdate days = delta.days + delta.seconds / 3600.0 / 24.0 wlinfo['requestdays'] = days #assignment team and date if 'Assignments' in wfObject.info and wfObject.info['Assignments']: wlinfo['team'] = wfObject.info['Assignments'][0] else: wlinfo['team'] = '' wlinfo['update'] = getAssignmentDate(wfObject) wlinfo['expectedevents'] = getValue(wfObject.cache, 'TotalInputEvents') wlinfo['expectedjobs'] = getValue(wfObject.cache, 'TotalEstimatedJobs') #Job Information if available wlinfo['js'] = getJobSummary(workflow) #information about input dataset inputdataset = {} wlinfo['inputdatasetinfo'] = inputdataset if 'InputDatasets' in wfObject.info and wfObject.info['InputDatasets']: inputdataset['name'] = wfObject.info['InputDatasets'][0] wlinfo['blockswhitelist'] = getValue(wfObject.info, 'BlockWhitelist', None) if wlinfo['blockswhitelist']: wlinfo['blockswhitelist'] = eval(wlinfo['blockswhitelist']) inputdataset['events'] = dbs3.getEventCountDataSet(inputdataset['name']) dsinfo = dbs3.getDatasetInfo(inputdataset['name']) inputdataset['status'], wlinfo['inputdatasetinfo']['createts'], wlinfo['inputdatasetinfo']['lastmodts'] = dsinfo #block events if wlinfo['blockswhitelist']: inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList(inputdataset['name'], wlinfo['blockswhitelist']) else: inputdataset['bwevents'] = inputdataset['events'] #load reqlist and subscriptions getDatasetPhedexInfo(inputdataset) #info about output datasets #expectedtotalsize = sizeev * expectedevents / 1000000 outputdataset = [] wlinfo['outputdatasetinfo'] = outputdataset eventsdone = 0 if wfObject.status in ['running','running-open','running-closed','completed','closed-out','announced']: for o in wfObject.outputDatasets: oel = {} oel['name'] = o #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1) print "-",o, "-" oel['events'] = wfObject.getOutputEvents(o) oel['status'], oel['createts'], oel['lastmodts'] = dbs3.getDatasetInfo(o) #load reqlist and subscriptions getDatasetPhedexInfo(oel) eventsdone = eventsdone + oel['events'] outputdataset.append(oel) #look for correspondin acdc's wlinfo['acdc'] = [] if wlinfo['prepid']: for a in allacdc: if wlinfo['prepid'] in a: wlinfo['acdc'].append(a) if wfObject.type == 'TaskChain': pass #Stuff only for non-taskchain workflows else: wlinfo['primaryds'] = wfObject.info['PrimaryDataset'], #get custodial sites from all output sites = [] for ds, info in wfObject.info['SubscriptionInformation'].items(): sites += info['CustodialSites'] wlinfo['custodialsites'] = sites wlinfo['events_per_job'] = getValue(wfObject.info, 'EventsPerJob') wlinfo['lumis_per_job'] = getValue(wfObject.info, 'LumisPerJob') wlinfo['events_per_lumi'] = getValue(wfObject.info, 'EventsPerLumi') wlinfo['max_events_per_lumi'] = getValue(wfObject.info, 'max_events_per_lumi') wlinfo['filtereff'] = getValue(wfObject.info, 'FilterEfficiency', 1.0) #calculate cpu hours wlinfo['expectedjobcpuhours'] = wlinfo['timeev'] * wlinfo['expectedevents'] / wlinfo['filtereff'] return wlinfo
def getWorkflowInfo(workflow): """ creates a single dictionary with all workflow information """ #TODO replace all with info or cache wfObject = reqMgr.Workflow(workflow) #wlinfo = getWorkloadParameters(workflow) #Global stuff - common for all types of request wlinfo = { # 'batch':batch IGNORED - not useful 'processingstring': wfObject.info['ProcessingString'], 'requestname': workflow, 'prepid': wfObject.info['PrepID'], 'globaltag': wfObject.info['GlobalTag'], 'timeev': wfObject.info['TimePerEvent'], 'sizeev': wfObject.info['SizePerEvent'], 'priority': wfObject.info['RequestPriority'], 'sites': wfObject.info['Site Whitelist'], 'acquisitionEra':wfObject.info['AcquisitionEra'], 'processingVersion': wfObject.info['ProcessingVersion'], 'campaign':wfObject.info['Campaign'], 'cmssw':wfObject.info['CMSSWVersion'], 'mergedLFNBase':wfObject.info['MergedLFNBase'], 'type' : wfObject.type, 'status':wfObject.status, } #get the following keys, or 0 by defaulr, getValue = lambda data, k, dflt=0: data[k] if k in data else dflt #calculate general stuff #parse and format date now = datetime.datetime.now() dateArr = wfObject.info['RequestDate'] #fill with missing zeros if len(dateArr) != 6: dateArr += (6 - len(dateArr)) * [0] reqdate = datetime.datetime.strptime( "%s-%s-%s %s:%s:%s.0" % tuple(dateArr), DATE_FORMAT) wlinfo['reqdate'] = reqdate.strftime(DATE_FORMAT) #calculate days old of the request delta = now - reqdate days = delta.days + delta.seconds / 3600.0 / 24.0 wlinfo['requestdays'] = days #assignment team and date if 'Assignments' in wfObject.info and wfObject.info['Assignments']: wlinfo['team'] = wfObject.info['Assignments'][0] else: wlinfo['team'] = '' wlinfo['update'] = getAssignmentDate(wfObject) wlinfo['expectedevents'] = getValue(wfObject.cache, 'TotalInputEvents') wlinfo['expectedjobs'] = getValue(wfObject.cache, 'TotalEstimatedJobs') #Job Information if available wlinfo['js'] = getJobSummary(workflow) #information about input dataset inputdataset = {} wlinfo['inputdatasetinfo'] = inputdataset if 'InputDatasets' in wfObject.info and wfObject.info['InputDatasets']: inputdataset['name'] = wfObject.info['InputDatasets'][0] wlinfo['blockswhitelist'] = getValue(wfObject.info, 'BlockWhitelist', None) if wlinfo['blockswhitelist']: wlinfo['blockswhitelist'] = eval(wlinfo['blockswhitelist']) inputdataset['events'] = dbs3.getEventCountDataSet( inputdataset['name']) dsinfo = dbs3.getDatasetInfo(inputdataset['name']) inputdataset['status'], wlinfo['inputdatasetinfo']['createts'], wlinfo[ 'inputdatasetinfo']['lastmodts'] = dsinfo #block events if wlinfo['blockswhitelist']: inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList( inputdataset['name'], wlinfo['blockswhitelist']) else: inputdataset['bwevents'] = inputdataset['events'] #load reqlist and subscriptions getDatasetPhedexInfo(inputdataset) #info about output datasets #expectedtotalsize = sizeev * expectedevents / 1000000 outputdataset = [] wlinfo['outputdatasetinfo'] = outputdataset eventsdone = 0 if wfObject.status in [ 'running', 'running-open', 'running-closed', 'completed', 'closed-out', 'announced' ]: for o in wfObject.outputDatasets: oel = {} oel['name'] = o #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1) print "-", o, "-" oel['events'] = wfObject.getOutputEvents(o) oel['status'], oel['createts'], oel[ 'lastmodts'] = dbs3.getDatasetInfo(o) #load reqlist and subscriptions getDatasetPhedexInfo(oel) eventsdone = eventsdone + oel['events'] outputdataset.append(oel) #look for correspondin acdc's wlinfo['acdc'] = [] if wlinfo['prepid']: for a in allacdc: if wlinfo['prepid'] in a: wlinfo['acdc'].append(a) if wfObject.type == 'TaskChain': pass #Stuff only for non-taskchain workflows else: wlinfo['primaryds'] = wfObject.info['PrimaryDataset'], #get custodial sites from all output sites = [] for ds, info in wfObject.info['SubscriptionInformation'].items(): sites += info['CustodialSites'] wlinfo['custodialsites'] = sites wlinfo['events_per_job'] = getValue(wfObject.info, 'EventsPerJob') wlinfo['lumis_per_job'] = getValue(wfObject.info, 'LumisPerJob') wlinfo['events_per_lumi'] = getValue(wfObject.info, 'EventsPerLumi') wlinfo['max_events_per_lumi'] = getValue(wfObject.info, 'max_events_per_lumi') wlinfo['filtereff'] = getValue(wfObject.info, 'FilterEfficiency', 1.0) #calculate cpu hours wlinfo['expectedjobcpuhours'] = wlinfo['timeev'] * wlinfo[ 'expectedevents'] / wlinfo['filtereff'] return wlinfo
def getWorkflowInfo(workflow): """ creates a single dictionary with all workflow information """ wlinfo = getWorkloadParameters(workflow) timeev = wlinfo['timeev'] sizeev = wlinfo['sizeev'] prepid = wlinfo['prepid'] sites = wlinfo['sites'] custodialsites = wlinfo['custodialsites'] events_per_job = wlinfo['events_per_job'] lumis_per_job = wlinfo['lumis_per_job'] blockswhitelist = wlinfo['blockswhitelist'] #look for correspondin acdc's acdc = [] for a in allacdc: if prepid in a: acdc.append(a) #check for one T1 that is in the whitelist custodialt1 = '?' for i in sites: if 'T1_' in i: custodialt1 = i break #retrieve reqMgr info s = reqMgr.getWorkflowInfo('cmsweb.cern.ch', workflow) #parse information filtereff = float(s['FilterEfficiency']) if 'FilterEfficiency' in s else 1 team = s['Assignments'] if 'Assignments' in s else '' team = team[0] if type(team) is list and team else team typee = s['RequestType'] if 'RequestType' in s else '' status = s['RequestStatus'] if 'RequestStatus' in s else '' if 'RequestSizeEvents' in s: reqevts = s['RequestSizeEvents'] elif 'RequestNumEvents' in s: reqevts = s['RequestNumEvents'] inputdataset = {} if 'InputDatasets' in s and s['InputDatasets']: inputdataset['name'] = s['InputDatasets'][0] #TODO complete validation logic and expected run time if typee in ['MonteCarlo','LHEStepZero']: #if reqevts > 0: expectedevents = int(reqevts) if events_per_job > 0 and filtereff > 0: expectedjobs = int(math.ceil(expectedevents/(events_per_job*filtereff))) expectedjobcpuhours = int(timeev*(events_per_job*filtereff)/3600) else: expectedjobs = 0 expectedjobcpuhours = 0 elif typee in ['MonteCarloFromGEN','ReReco','ReDigi']: #datasets inputdataset['events'] = dbs3.getEventCountDataSet(inputdataset['name']) inputdataset['status'], inputdataset['createts'], inputdataset['lastmodts'] = dbs3.getDatasetInfo(inputdataset['name']) if blockswhitelist != []: inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList(inputdataset['name'],blockswhitelist) else: inputdataset['bwevents'] = inputdataset['events'] if inputdataset['bwevents'] > 0 and filtereff > 0: expectedevents = int(filtereff*inputdataset['bwevents']) else: expectedevents = 0 if events_per_job > 0 and filtereff > 0: expectedjobs = int(expectedevents/events_per_job) else: expectedjobs = 0 try: expectedjobcpuhours = int(lumis_per_job*timeev*inputdataset['bwevents']/inputdataset['bwlumicount']/3600) except: expectedjobcpuhours = 0 #TODO use phedexClient url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/RequestList?dataset=' + inputdataset['name'] try: result = json.load(urllib.urlopen(url)) except: print "Cannot get Requests List status from PhEDEx" try: r = result['phedex']['request'] except: r = '' inputdataset['phreqinfo'] = [] if r: for i in range(len(r)): phreqinfo = {} requested_by = r[i]['requested_by'] nodes = [] for j in range(len(r[i]['node'])): nodes.append(r[i]['node'][j]['name']) id = r[i]['id'] phreqinfo['nodes'] = nodes phreqinfo['id'] = id inputdataset['phreqinfo'].append(phreqinfo) #TODO use phedexclient url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/Subscriptions?dataset=' + inputdataset['name'] try: result = json.load(urllib.urlopen(url)) except: print "Cannot get Subscriptions from PhEDEx" inputdataset['phtrinfo'] = [] try: print result rlist = result['phedex']['dataset'][0]['subscription'] for r in rlist: phtrinfo = {} node = r['node'] custodial = r['custodial'] phtrinfo['node'] = node try: phtrinfo['perc'] = int(float(r['percent_files'])) except: phtrinfo['perc'] = 0 inputdataset['phtrinfo'].append(phtrinfo) except: r = {} else: expectedevents = -1 expectedjobs = -1 expectedjobcpuhours = -1 expectedtotalsize = sizeev * expectedevents / 1000000 conn = httplib.HTTPSConnection('cmsweb.cern.ch', cert_file = os.getenv('X509_USER_PROXY'), key_file = os.getenv('X509_USER_PROXY')) r1=conn.request('GET','/reqmgr/reqMgr/outputDatasetsByRequestName?requestName=' + workflow) r2=conn.getresponse() data = r2.read() ods = json.loads(data) conn.close() if len(ods)==0: print "No Outpudatasets for this workflow: "+workflow outputdataset = [] eventsdone = 0 for o in ods: oel = {} oel['name'] = o if status in ['running','running-open','running-closed','completed','closed-out','announced']: #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1) print "-",o, "-" oe = dbs3.getEventCountDataSet(o) ost, ocreatets, olastmodts = dbs3.getDatasetInfo(o) oel['events'] = oe oel['status'] = ost oel['createts'] = ocreatets oel['lastmodts'] = olastmodts phreqinfo = {} url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/requestlist?dataset=' + o try: result = json.load(urllib.urlopen(url)) except: print "Cannot get request subscription status from PhEDEx" try: r = result['phedex']['request'] except: r = None if r: try: for i in range(0,len(r)): approval = r[i]['approval'] requested_by = r[i]['requested_by'] custodialsite = r[i]['node'][0]['name'] id = r[i]['id'] if '_MSS' in custodialsite: phreqinfo['custodialsite'] = custodialsite phreqinfo['requested_by'] = requested_by phreqinfo['approval'] = approval phreqinfo['id'] = id except: print "Error getting subscription status from Phedex" oel['phreqinfo'] = phreqinfo phtrinfo = {} url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/subscriptions?dataset=' + o try: result = json.load(urllib.urlopen(url)) except: print "Cannot get transfer status from PhEDEx" try: rlist = result['phedex']['dataset'][0]['subscription'] except: rlist = [] phtrinfo = {} oel['phtrinfo'] = [] for r in rlist: phtrinfo = {} node = r['node'] custodial = r['custodial'] if r['move'] == 'n': phtype = 'Replica' else: phtype = 'Move' phtrinfo['node'] = node phtrinfo['custodial'] = r['custodial'] phtrinfo['time_create'] = datetime.datetime.fromtimestamp(int(r['time_create'])).ctime() phtrinfo['time_create_days'] = (datetime.datetime.now() - datetime.datetime.fromtimestamp(int(r['time_create']))).days try: phtrinfo['perc'] = int(float(r['percent_files'])) except: phtrinfo['perc'] = 0 phtrinfo['type'] = phtype oel['phtrinfo'].append(phtrinfo) eventsdone = eventsdone + oe else: eventsdone = 0 outputdataset.append(oel) cpuhours = timeev*expectedevents/3600 remainingcpuhours = max(0,timeev*(expectedevents-eventsdone)/3600) realremainingcpudays = 0 totalslots = 0 #pledge calculation for (psite,pslots) in pledged.items(): if psite in sites: totalslots = totalslots + pslots if totalslots == 0: realremainingcpudays = 0 else: realremainingcpudays = float(remainingcpuhours) / 24 / totalslots try: zone = t2zone[custodialsites[0]] except: zone = '?' if workflow in jlist.keys(): js = jlist[workflow] else: js = {} if status in LIVE_STATUS: updatedate = getAssignmentDate(workflow) else: updatedate = None wlinfo.update( {'filtereff':filtereff,'type':typee,'status':status,'expectedevents':expectedevents, 'inputdatasetinfo':inputdataset,'timeev':timeev,'sizeev':sizeev,'sites':sites, 'zone':zone,'js':js,'outputdatasetinfo':outputdataset,'cpuhours':cpuhours, 'realremainingcpudays':realremainingcpudays,'remainingcpuhours':remainingcpuhours, 'team':team,'expectedjobs':expectedjobs,'expectedjobcpuhours':expectedjobcpuhours,'acdc':acdc, 'update':updatedate} ) return wlinfo