def getWorkflowInfo(workflow): """ creates a single dictionary with all workflow information """ wlinfo = getWorkloadParameters(workflow) timeev = wlinfo['timeev'] sizeev = wlinfo['sizeev'] prepid = wlinfo['prepid'] sites = wlinfo['sites'] custodialsites = wlinfo['custodialsites'] events_per_job = wlinfo['events_per_job'] lumis_per_job = wlinfo['lumis_per_job'] blockswhitelist = wlinfo['blockswhitelist'] #look for correspondin acdc's acdc = [] for a in allacdc: if prepid in a: acdc.append(a) #retrieve reqMgr info s = reqMgr.getWorkflowInfo('cmsweb.cern.ch', workflow) #parse information filtereff = float(s['FilterEfficiency']) if 'FilterEfficiency' in s else 1 team = s['Assignments'] if 'Assignments' in s else '' team = team[0] if type(team) is list and team else team typee = s['RequestType'] if 'RequestType' in s else '' status = s['RequestStatus'] if 'RequestStatus' in s else '' if 'RequestSizeEvents' in s: reqevts = s['RequestSizeEvents'] elif 'RequestNumEvents' in s: reqevts = s['RequestNumEvents'] inputdataset = {} if 'InputDatasets' in s and s['InputDatasets']: inputdataset['name'] = s['InputDatasets'][0] #TODO complete validation logic and expected run time inputdataset['phreqinfo'] = [] inputdataset['phtrinfo'] = [] ods = {} if workflow in jlist.keys(): js = jlist[workflow] else: js = {} if status in LIVE_STATUS: updatedate = getAssignmentDate(workflow) else: updatedate = None wlinfo.update( {'filtereff':filtereff,'type':typee,'status':status, 'inputdatasetinfo':inputdataset,'timeev':timeev,'sizeev':sizeev,'sites':sites, 'js':js, 'team':team,'acdc':acdc, 'update':updatedate} ) return wlinfo
def getAssignmentDate(workflow): data = reqMgr.getWorkflowInfo('cmsweb.cern.ch', workflow) ls = data['RequestTransition'] date = None for status in ls: if status['Status'] == 'assigned': #time in epoch format date = datetime.datetime.fromtimestamp(status['UpdateTime']) #parse to '%Y-%m-%d %H:%M:%S.%f') date = date.strftime('%Y-%m-%d %H:%M:%S.%f') return date
def getWorkflowInfo(workflow): """ creates a single dictionary with all workflow information """ wlinfo = getWorkloadParameters(workflow) timeev = wlinfo['timeev'] sizeev = wlinfo['sizeev'] prepid = wlinfo['prepid'] sites = wlinfo['sites'] custodialsites = wlinfo['custodialsites'] events_per_job = wlinfo['events_per_job'] lumis_per_job = wlinfo['lumis_per_job'] blockswhitelist = wlinfo['blockswhitelist'] #look for correspondin acdc's acdc = [] for a in allacdc: if prepid in a: acdc.append(a) #check for one T1 that is in the whitelist custodialt1 = '?' for i in sites: if 'T1_' in i: custodialt1 = i break #retrieve reqMgr info s = reqMgr.getWorkflowInfo('cmsweb.cern.ch', workflow) #parse information filtereff = float(s['FilterEfficiency']) if 'FilterEfficiency' in s else 1 team = s['Assignments'] if 'Assignments' in s else '' team = team[0] if type(team) is list and team else team typee = s['RequestType'] if 'RequestType' in s else '' status = s['RequestStatus'] if 'RequestStatus' in s else '' if 'RequestSizeEvents' in s: reqevts = s['RequestSizeEvents'] elif 'RequestNumEvents' in s: reqevts = s['RequestNumEvents'] inputdataset = {} if 'InputDatasets' in s and s['InputDatasets']: inputdataset['name'] = s['InputDatasets'][0] #TODO complete validation logic and expected run time if typee in ['MonteCarlo','LHEStepZero']: #if reqevts > 0: expectedevents = int(reqevts) if events_per_job > 0 and filtereff > 0: expectedjobs = int(math.ceil(expectedevents/(events_per_job*filtereff))) expectedjobcpuhours = int(timeev*(events_per_job*filtereff)/3600) else: expectedjobs = 0 expectedjobcpuhours = 0 elif typee in ['MonteCarloFromGEN','ReReco','ReDigi']: #datasets inputdataset['events'] = dbs3.getEventCountDataSet(inputdataset['name']) inputdataset['status'], inputdataset['createts'], inputdataset['lastmodts'] = dbs3.getDatasetInfo(inputdataset['name']) if blockswhitelist != []: inputdataset['bwevents'] = dbs3.getEventCountDataSetBlockList(inputdataset['name'],blockswhitelist) else: inputdataset['bwevents'] = inputdataset['events'] if inputdataset['bwevents'] > 0 and filtereff > 0: expectedevents = int(filtereff*inputdataset['bwevents']) else: expectedevents = 0 if events_per_job > 0 and filtereff > 0: expectedjobs = int(expectedevents/events_per_job) else: expectedjobs = 0 try: expectedjobcpuhours = int(lumis_per_job*timeev*inputdataset['bwevents']/inputdataset['bwlumicount']/3600) except: expectedjobcpuhours = 0 #TODO use phedexClient url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/RequestList?dataset=' + inputdataset['name'] try: result = json.load(urllib.urlopen(url)) except: print "Cannot get Requests List status from PhEDEx" try: r = result['phedex']['request'] except: r = '' inputdataset['phreqinfo'] = [] if r: for i in range(len(r)): phreqinfo = {} requested_by = r[i]['requested_by'] nodes = [] for j in range(len(r[i]['node'])): nodes.append(r[i]['node'][j]['name']) id = r[i]['id'] phreqinfo['nodes'] = nodes phreqinfo['id'] = id inputdataset['phreqinfo'].append(phreqinfo) #TODO use phedexclient url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/Subscriptions?dataset=' + inputdataset['name'] try: result = json.load(urllib.urlopen(url)) except: print "Cannot get Subscriptions from PhEDEx" inputdataset['phtrinfo'] = [] try: print result rlist = result['phedex']['dataset'][0]['subscription'] for r in rlist: phtrinfo = {} node = r['node'] custodial = r['custodial'] phtrinfo['node'] = node try: phtrinfo['perc'] = int(float(r['percent_files'])) except: phtrinfo['perc'] = 0 inputdataset['phtrinfo'].append(phtrinfo) except: r = {} else: expectedevents = -1 expectedjobs = -1 expectedjobcpuhours = -1 expectedtotalsize = sizeev * expectedevents / 1000000 conn = httplib.HTTPSConnection('cmsweb.cern.ch', cert_file = os.getenv('X509_USER_PROXY'), key_file = os.getenv('X509_USER_PROXY')) r1=conn.request('GET','/reqmgr/reqMgr/outputDatasetsByRequestName?requestName=' + workflow) r2=conn.getresponse() data = r2.read() ods = json.loads(data) conn.close() if len(ods)==0: print "No Outpudatasets for this workflow: "+workflow outputdataset = [] eventsdone = 0 for o in ods: oel = {} oel['name'] = o if status in ['running','running-open','running-closed','completed','closed-out','announced']: #[oe,ost,ocreatets,olastmodts] = getdsdetail(o,timestamps=1) print "-",o, "-" oe = dbs3.getEventCountDataSet(o) ost, ocreatets, olastmodts = dbs3.getDatasetInfo(o) oel['events'] = oe oel['status'] = ost oel['createts'] = ocreatets oel['lastmodts'] = olastmodts phreqinfo = {} url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/requestlist?dataset=' + o try: result = json.load(urllib.urlopen(url)) except: print "Cannot get request subscription status from PhEDEx" try: r = result['phedex']['request'] except: r = None if r: try: for i in range(0,len(r)): approval = r[i]['approval'] requested_by = r[i]['requested_by'] custodialsite = r[i]['node'][0]['name'] id = r[i]['id'] if '_MSS' in custodialsite: phreqinfo['custodialsite'] = custodialsite phreqinfo['requested_by'] = requested_by phreqinfo['approval'] = approval phreqinfo['id'] = id except: print "Error getting subscription status from Phedex" oel['phreqinfo'] = phreqinfo phtrinfo = {} url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/subscriptions?dataset=' + o try: result = json.load(urllib.urlopen(url)) except: print "Cannot get transfer status from PhEDEx" try: rlist = result['phedex']['dataset'][0]['subscription'] except: rlist = [] phtrinfo = {} oel['phtrinfo'] = [] for r in rlist: phtrinfo = {} node = r['node'] custodial = r['custodial'] if r['move'] == 'n': phtype = 'Replica' else: phtype = 'Move' phtrinfo['node'] = node phtrinfo['custodial'] = r['custodial'] phtrinfo['time_create'] = datetime.datetime.fromtimestamp(int(r['time_create'])).ctime() phtrinfo['time_create_days'] = (datetime.datetime.now() - datetime.datetime.fromtimestamp(int(r['time_create']))).days try: phtrinfo['perc'] = int(float(r['percent_files'])) except: phtrinfo['perc'] = 0 phtrinfo['type'] = phtype oel['phtrinfo'].append(phtrinfo) eventsdone = eventsdone + oe else: eventsdone = 0 outputdataset.append(oel) cpuhours = timeev*expectedevents/3600 remainingcpuhours = max(0,timeev*(expectedevents-eventsdone)/3600) realremainingcpudays = 0 totalslots = 0 #pledge calculation for (psite,pslots) in pledged.items(): if psite in sites: totalslots = totalslots + pslots if totalslots == 0: realremainingcpudays = 0 else: realremainingcpudays = float(remainingcpuhours) / 24 / totalslots try: zone = t2zone[custodialsites[0]] except: zone = '?' if workflow in jlist.keys(): js = jlist[workflow] else: js = {} if status in LIVE_STATUS: updatedate = getAssignmentDate(workflow) else: updatedate = None wlinfo.update( {'filtereff':filtereff,'type':typee,'status':status,'expectedevents':expectedevents, 'inputdatasetinfo':inputdataset,'timeev':timeev,'sizeev':sizeev,'sites':sites, 'zone':zone,'js':js,'outputdatasetinfo':outputdataset,'cpuhours':cpuhours, 'realremainingcpudays':realremainingcpudays,'remainingcpuhours':remainingcpuhours, 'team':team,'expectedjobs':expectedjobs,'expectedjobcpuhours':expectedjobcpuhours,'acdc':acdc, 'update':updatedate} ) return wlinfo