def retrieveSites(self): """ _retrieveSites_ Return a list of all sites from the ResourceControl DB and stores them in this object for access by the plugins """ Session.set_database(dbConfig) Session.connect() Session.start_transaction() resCon = ResourceControlDB() siteNames = resCon.siteNames() for site in siteNames: siteData = resCon.getSiteData(site) self.allSites[site] = siteData siteIndex = siteData['SiteIndex'] if siteData['Active'] == True: self.activeSites.append(site) self.siteThresholds[site] = resCon.siteThresholds(siteIndex) self.siteAttributes[site] = resCon.siteAttributes(siteIndex) self.sitePerformance[site] = \ selectRcSitePerformance(siteIndex, self.performanceInterval) del resCon self.jq = JobQueueDB() self.sitejobs = self.jq.countQueuedActiveJobs() Session.commit_all() Session.close_all() return
def getNewRunNumber(workflowID,amount=1): """ __getNewRunNumber__ returns a new run number. The increment is bassed on the run number offset this offset is unique to every prodagent and we assume there is an upperbound of "increment" agents where the offset is smaller tan "increment" but larget than 0 """ global increment sqlStr="""UPDATE we_Workflow SET run_number_count = run_number_count+ %s WHERE id='%s' """ %(str(amount*increment), workflowID) Session.execute(sqlStr) sqlStr="""SELECT run_number_count FROM we_Workflow WHERE id='%s' """ %( workflowID) Session.execute(sqlStr) rows=Session.fetchall() # we retrieve the highest run number now count back result=[] for i in xrange(0,amount): result.append(rows[0][0]-i*increment) result.sort() return result
def retrieve(serverURL=None,method_name=None,componentID=None): try: if serverURL==None and method_name==None and componentID==None: sqlStr="""SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE call_state="call_placed" GROUP BY server_url; """ elif serverURL==None and method_name==None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE component_id="%s" AND call_state="call_placed" GROUP BY server_url; """ %(componentID) elif serverURL==None and method_name!=None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE component_id="%s" AND service_call="%s" AND call_state="call_placed" GROUP BY server_url; """ %(componentID,method_name) elif serverURL!=None and method_name==None and componentID!=None: sqlStr="""SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE component_id="%s" AND server_url="%s" AND call_state="call_placed" GROUP BY server_url; """ %(componentID,serverURL) Session.execute(sqlStr) rows=Session.fetchall() if len(rows)==0: raise ProdException("No result in local last service call table with componentID :"+\ str(componentID),1000) server_url=rows[0][0] service_call=rows[0][1] component_id=rows[0][2] return [server_url,service_call,component_id] except Exception,ex: raise ProdAgentException("Service commit Error: "+str(ex))
def index(self, workflow): errHtml = "<html><body><h2>No Graph Tools installed!!!</h2>\n " errHtml += "</body></html>" try: from graphtool.graphs.common_graphs import StackedBarGraph except ImportError: return errHtml Session.set_database(dbConfig) Session.connect() Session.start_transaction() procStatus = {} mergeStatus = {} for state in _States: procStatus[state] = len( WEUtil.jobsForWorkflow(workflow, "Processing", state )) mergeStatus[state] = len( WEUtil.jobsForWorkflow(workflow, "Merge", state)) Session.commit_all() Session.close_all() pngfile = os.path.join(self.workingDir, "%s-WorkflowGraph.png" % workflow) pngfileUrl = "%s?filepath=%s" % (self.imageServer, pngfile) data = { "Processing" : procStatus, "Merge" : mergeStatus} metadata = {"title" : "Job States for %s" % workflow } plotfile = open(pngfile, 'w') SBG = StackedBarGraph() SBG(data, plotfile, metadata) plotfile.close() html = "<html><body><img src=\"%s\"></body></html>" % pngfileUrl return html
def addFlag(self, triggerId, jobSpecId , flagId): """ _addFlag_ Adds a flag to a trigger. If this is the first flag for this trigger a new trigger will be created. input: -triggerId (string). Id of the trigger -flagId (string). Id of the flag -jobSpecId (string). Id of the job specification output: nothing or an exception if the flag already existed. """ try: if type(flagId) != list: sqlStr = """ INSERT INTO tr_Trigger(JobSpecID,TriggerID,FlagID,FlagValue) VALUES("%s","%s","%s","start") ;""" % (jobSpecId, triggerId, flagId) else: sqlStr = """ INSERT INTO tr_Trigger(JobSpecID,TriggerID,FlagID,FlagValue) VALUES """ comma = False for flag in flagId: if comma: sqlStr+= ',' sqlStr += """("%s","%s","%s","start") """ % (jobSpecId, triggerId, flag) comma = True Session.execute(sqlStr) except: raise ProdException(exceptions[3003], 3003)
def storeMessage(message): MessageQueue.insert("ProdMgrInterface",message['state'],message['server_url'],\ message['type'],\ message['parameters'],"00:00:10") logging.debug("Problem connecting to server "+message['server_url']) logging.debug("Attempt stored in message queue for later retries") Session.commit()
def retrieve(serverURL=None, method_name=None, componentID=None): try: if serverURL == None and method_name == None and componentID == None: sqlStr = """SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE call_state="call_placed" GROUP BY server_url; """ elif serverURL == None and method_name == None and componentID != None: sqlStr = """SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE component_id="%s" AND call_state="call_placed" GROUP BY server_url; """ % (componentID) elif serverURL == None and method_name != None and componentID != None: sqlStr = """SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE component_id="%s" AND service_call="%s" AND call_state="call_placed" GROUP BY server_url; """ % (componentID, method_name) elif serverURL != None and method_name == None and componentID != None: sqlStr = """SELECT server_url,service_call,component_id, max(log_time) FROM ws_last_call WHERE component_id="%s" AND server_url="%s" AND call_state="call_placed" GROUP BY server_url; """ % (componentID, serverURL) Session.execute(sqlStr) rows = Session.fetchall() if len(rows) == 0: raise ProdException("No result in local last service call table with componentID :"+\ str(componentID),1000) server_url = rows[0][0] service_call = rows[0][1] component_id = rows[0][2] return [server_url, service_call, component_id] except Exception, ex: raise ProdAgentException("Service commit Error: " + str(ex))
def setAction(self, jobSpecId, triggerId, actionName): """ _setAction_ Sets the associated action that will be called if all flags are set. This action is registered in the action registery. If this trigger already had an action, this action will replace it. input: -triggerId (string). Id of the trigger -actionName (string). Name of the action output: nothing or an exception if the trigger does not exists. output: nothing or an exception if the flag already existed. """ try: sqlStr = """INSERT INTO tr_Action(jobSpecId,triggerId,actionName) VALUES("%s","%s","%s") ;""" % (jobSpecId, triggerId, actionName) Session.execute(sqlStr) except Exception,ex: msg = ":"+str(jobSpecId)+","+str(triggerId)+","+str(actionName) logging.error(exceptions[3002]+msg+str(ex), 3002)
def queryJobsByStatus(status): """ _queryJobsByStatus_ Returns a list of jobs in the Job Emulator database that have a particular status. Each list item consists of the following tuple: (Job ID, Job Type (processing, merge or cleanup), Job Start Time, Job Status (new, finished, failed)) """ sqlStr = \ """ SELECT * FROM job_emulator WHERE status = '%s' order by start_time """ % status # return values are # job_id, job_type, start_time, status, worker_node_id # (x[0], x[1], x[2], x[3], x[4]) Session.execute(sqlStr) result = Session.fetchall() result = [ (x[0], x[1], x[2], x[3], x[4]) for x in result ] return result
def attributes(site = None): """ _attributes_ Retrieve the attributes for a specified site or all active sites if the site arg isnt provided """ Session.connect() Session.start_transaction() resourceControlDB = ResourceControlDB() sites = [] if site != None: siteData = resourceControlDB.getSiteData(site) if siteData == None: return {} sites.append(siteData) else: sites = activeSiteData() result = {} [ result.__setitem__( x['SiteName'], resourceControlDB.siteAttributes(x['SiteIndex'])) for x in sites ] return result
def __call__(self): """ _operator()_ Evaluate the status of this workflow from the WorkflowEntities data and publish any events that are triggered """ if self.processingComplete(): logging.info("Processing Complete for %s" % self.workflow) for dataset in self.unmergedDatasets(): if self.doMigration: logging.debug( "Publishing MigrateToGlobal for %s" % dataset) self.msgSvcRef.publish( "DBSInterface:MigrateDatasetToGlobal", dataset) self.msgSvcRef.commit() if self.doInjection: logging.debug("Publishing PollMigration for %s" % dataset) self.msgSvcRef.publish("StoreResultsAccountant:PollMigration", self.workflowFile, "00:02:00") self.msgSvcRef.commit() Session.commit_all() WEWorkflow.setFinished(self.workflow) WEWorkflow.remove(self.workflow) Session.commit_all() return
def workmapWFName2ID(workmap): """ do the name possible ID translation for a dictionary {WFname:cmssw_version} use merge_dataset WFs that have no merge_dataset entry, use full name --> should not be processing """ sqlStr='select workflow,id from merge_dataset;' Session.execute(sqlStr) rows=Session.fetchall() w2i={} for i in rows: w2i[str(i[0])]=str(i[1]) logging.debug("workmapWFName2ID "+str(w2i)) workmap2={} for wf,cmssw in workmap.items(): if w2i.has_key(wf): workmap2[w2i[wf]]=cmssw else: workmap2[wf]=cmssw return workmap2
def startComponent(self): """ _startComponent_ Start the component and subscribe to messages """ self.ms = MessageService() # register self.ms.registerAs("RelValInjector") # subscribe to messages self.ms.subscribeTo("RelValInjector:StartDebug") self.ms.subscribeTo("RelValInjector:EndDebug") self.ms.subscribeTo("RelValInjector:Inject") self.ms.subscribeTo("JobSuccess") self.ms.subscribeTo("GeneralJobFailure") self.ms.subscribeTo("RelValInjector:Poll") self.ms.publish("RelValInjector:Poll", "", self.args['PollInterval']) self.ms.commit() while True: Session.set_database(dbConfig) Session.connect() Session.start_transaction() type, payload = self.ms.get() self.ms.commit() logging.debug("RelValInjector: %s, %s" % (type, payload)) self.__call__(type, payload) Session.commit_all() Session.close_all()
def retrieveReleasedJobs(self, count = 1, jobType = None, workflow = None): """ _retrieveReleasedJobs_ Retrieve released Jobs without specifying site information This is a history method for job queue """ sqlStr = \ """ SELECT DISTINCT job_index FROM jq_queue WHERE status = 'released' """ if workflow != None: sqlStr +=" AND workflow_id=\"%s\" " % workflow if jobType != None: sqlStr += " AND job_type=\"%s\" " % jobType sqlStr += " ORDER BY priority DESC, time DESC LIMIT %s;" % count Session.execute(sqlStr) result = Session.fetchall() result = [ x[0] for x in result ] return result
def register(jobID,fileIDs=[]): """ __register__ registers a set of files associated to a jobid fileIDs is a array of dictionaries with a fileID (usually an lfn) an a number of events per file. if will only register this if the associated job is generated via the prodmgr. This can be easily checked by looking at the allocation ID of a job. It will give a warning if it is not part of an allocation and moves on. """ if len(fileIDs) == 0: return logging.debug("Registering files for job: "+str(jobID)) sqlStr="""INSERT INTO we_File(id,events_processed,job_id) VALUES""" comma=False for fileID in fileIDs: if comma: sqlStr+=',' else: comma=True sqlStr+='("'+str(fileID['lfn'])+'","'+str(fileID['events'])+'","'+str(jobID)+'")' Session.execute(sqlStr)
def get(allocationID=[]): """ __get__ returns the allocations associated to particular ID """ if(type(allocationID)!=list): allocationID=[str(allocationID)] if len(allocationID)==0: return if len(allocationID)==1: sqlStr="""SELECT id,events_missed,events_allocated,events_missed_cumul,events_processed,details,prod_mgr_url,workflow_id,allocation_spec_file FROM we_Allocation WHERE id="%s" """ %(str(allocationID[0])) else: sqlStr="""SELECT id,events_missed,events_allocated,events_missed_cumul,events_processed,details,prod_mgr_url,workflow_id,allocation_spec_file FROM we_Allocation WHERE id IN %s """ %(str(tuple(allocationID))) Session.execute(sqlStr) description=['id','events_missed','events_allocated','events_missed_cumul','events_processed','details','prod_mgr_url','workflow_id','allocation_spec_file'] result=Session.convert(description,Session.fetchall(),oneItem=False,decode=['details']) if len(result)==0: return None if len(result)==1: return result[0] return result
def setEventsMissed(allocationID,eventsMissed=0): """ """ sqlStr="""UPDATE we_Allocation SET events_missed=%s WHERE id="%s" """ %(str(eventsMissed),str(allocationID)) Session.execute(sqlStr)
def get(workflowID=[]): """ __getWorkflows__ returns workflow entries """ if(type(workflowID)!=list): workflowID=[str(workflowID)] if len(workflowID)==0: return [] if len(workflowID)==1: sqlStr="""SELECT events_processed,id,owner,priority,prod_mgr_url, workflow_spec_file,workflow_type,max_sites FROM we_Workflow WHERE id="%s" """ %(str(workflowID[0])) else: sqlStr="""SELECT events_processed,id,owner,priority,prod_mgr_url, workflow_spec_file,workflow_type,max_sites FROM we_Workflow WHERE id IN %s """ %(str(tuple(workflowID))) Session.execute(sqlStr) description=['events_processed','id','owner','priority','prod_mgr_url',\ 'workflow_spec_file','workflow_type', 'max_sites'] result=Session.convert(description,Session.fetchall()) if len(result)==1: return result[0] return result
def constraintID2WFname(constr): """ do the (possible) ID 2 WFname translation for a workflowconstraint (ie a commaseparated list of WFs) use merge_dataset WFs that have no merge_dataset entry, use full name --> but these should not be processing !!! """ ## empty constr = None if not constr: return None sqlStr='select id,workflow from merge_dataset;' Session.execute(sqlStr) rows=Session.fetchall() i2w={} for i in rows: i2w[str(i[0])]=str(i[1]) listt=[] for wf in constr.split(','): if i2w.has_key(wf): listt.append(i2w[wf]) else: listt.append(wf) listt.sort() return ','.join(listt)
def setEventsMissedIncrement(allocationID,eventsMissed=0): """ """ sqlStr="""UPDATE we_Allocation SET events_missed=events_missed+%s, events_missed_cumul=events_missed_cumul+%s WHERE id="%s" """ %(str(eventsMissed),str(eventsMissed),str(allocationID)) Session.execute(sqlStr)
def deleteTable(tableName): """ _deleteTable_ """ sqlStr = " DELETE from %s" % tableName Session.execute(sqlStr) return
def hasURL(url): sqlStr="""SELECT COUNT(*) FROM ws_queue WHERE server_url="%s"; """ %(url) Session.execute(sqlStr) rows=Session.fetchall() if rows[0][0]==0: return False return True
def reQueueJob(self, job_spec_id): """ Put job back in queue - generally used after a failure """ sqlStr = """UPDATE jq_queue SET status = 'new', time = NOW() WHERE job_spec_id = \"%s\" """ % job_spec_id Session.execute(sqlStr) return
def hasURL(url): sqlStr = """SELECT COUNT(*) FROM pm_cooloff WHERE url = "%s"; """ %(url) Session.execute(sqlStr) rows = Session.fetchall() if rows[0][0] == 0: return False return True
def recordLog(workflow, se, log): """ record logs to db """ sqlStr = """INSERT INTO log_input (lfn, se_name, workflow) VALUES("%s", "%s", "%s")""" % (log, se, workflow) Session.execute(sqlStr)
def setWorkflowLocation(workflowID,workflowLocation): """ __setWorkflowLocation__ sets the (local) location of the workflow as downloaded. """ sqlStr="""UPDATE we_Workflow SET workflow_spec_file="%s" WHERE id="%s" """ %(str(workflowLocation),str(workflowID)) Session.execute(sqlStr)
def register(workflowID, parameters={}, renew = False): """ __register__ register a workflow parameters:priority,request_type,prod_mgr_url if the workflow has already been registered it gives a warning and moves on. """ global offset if not renew: descriptionMap={'priority':'priority','request_type':'workflow_type',\ 'prod_mgr_url':'prod_mgr_url','workflow_spec_file':'workflow_spec_file','owner':'owner',\ 'run_number_count':'run_number_count', 'max_sites' : 'max_sites'} # check with attributes are provided. parameters['run_number_count']=offset else: descriptionMap={'priority':'priority','request_type':'workflow_type',\ 'prod_mgr_url':'prod_mgr_url','workflow_spec_file':'workflow_spec_file','owner':'owner', 'max_sites' : 'max_sites' } description=parameters.keys() # create values part sqlStrValues='(' comma=False for attribute in description: if comma : sqlStrValues+=',' elif not comma : comma=True sqlStrValues+=descriptionMap[attribute] sqlStrValues+=',id' sqlStrValues+=')' # build sql statement sqlStr="INSERT INTO we_Workflow"+sqlStrValues+" VALUES(" valueComma=False for attribute in description: if valueComma: sqlStr+=',' else: valueComma=True sqlStr+='"'+str(parameters[attribute])+'"' sqlStr+=',"'+str(workflowID)+'"' sqlStr+=')' sqlStr+=" ON DUPLICATE KEY UPDATE " comma=False for attribute in description: if comma: sqlStr+=',' elif not comma : comma=True sqlStr+=descriptionMap[attribute]+'="'+str(parameters[attribute])+'"' Session.execute(sqlStr)
def remove(client_id,service_call): global db_config logging.debug('removing logged service call') try: sqlStr=Dialect.buildQuery("ProdCom.Query4",{'client_id':client_id,\ 'service_call':service_call}) Session.execute(sqlStr) except Exception,ex: raise ProdException(str(ex),1001)
def execute(self): logging.debug("Executing state: Cleanup") componentState="start" componentStateParameters={} State.setState("ProdMgrInterface",componentState) State.setParameters("ProdMgrInterface",componentStateParameters) Session.commit() # set session back to default Session.set_session("default") return componentState
def log(serverUrl,method_name,args,componentID="defaultComponent"): try: sqlStr="""INSERT INTO ws_last_call(server_url,component_id,service_call,service_parameters,call_state) VALUES("%s","%s","%s","%s","%s") ON DUPLICATE KEY UPDATE service_parameters="%s", call_state="%s"; """ %(serverUrl,componentID,method_name,base64.encodestring(cPickle.dumps(args)),"call_placed",base64.encodestring(cPickle.dumps(args)),"call_placed") Session.execute(sqlStr) lastCall=(serverUrl,method_name,componentID) except Exception,ex: raise ProdAgentException("Service logging Error: "+str(ex))
def remove(client_id, service_call): global db_config logging.debug('removing logged service call') try: sqlStr=Dialect.buildQuery("ProdCom.Query4",{'client_id':client_id,\ 'service_call':service_call}) Session.execute(sqlStr) except Exception, ex: raise ProdException(str(ex), 1001)
def hasMissingEvents(): sqlStr= """ SELECT we_Allocation.id,we_Allocation.events_missed,we_Allocation.events_missed_cumul,we_Allocation.events_processed,we_Allocation.details,we_Allocation.prod_mgr_url,we_Allocation.workflow_id,we_Allocation.allocation_spec_file FROM we_Allocation WHERE we_Allocation.id NOT IN (SELECT DISTINCT allocation_id FROM we_Job) AND events_missed > 0; """ Session.execute(sqlStr) description=['id','events_missed','events_missed_cumul','events_processed','details','prod_mgr_url','workflow_id','allocation_spec_file'] result=Session.convert(description,Session.fetchall(),oneItem=False,decode=['details']) if len(result)==0: return [] return result
def log(serverUrl, method_name, args, componentID="defaultComponent"): try: sqlStr = """INSERT INTO ws_last_call(server_url,component_id,service_call,service_parameters,call_state) VALUES("%s","%s","%s","%s","%s") ON DUPLICATE KEY UPDATE service_parameters="%s", call_state="%s"; """ % (serverUrl, componentID, method_name, base64.encodestring(cPickle.dumps(args)), "call_placed", base64.encodestring(cPickle.dumps(args)), "call_placed") Session.execute(sqlStr) lastCall = (serverUrl, method_name, componentID) except Exception, ex: raise ProdAgentException("Service logging Error: " + str(ex))
def commit(serverUrl=None, method_name=None, componentID=None): global lastCall try: if (serverUrl == None) or (method_name == None) or (componentID == None): serverUrl, method_name, componentID = lastCall sqlStr = """UPDATE ws_last_call SET call_state="result_retrieved" WHERE server_url="%s" AND component_id="%s" AND service_call="%s"; """ % (serverUrl, componentID, method_name) Session.execute(sqlStr) except Exception, ex: raise ProdAgentException("Service commit Error: " + str(ex))
def log(client_id, service_call, service_parameters, service_result, client_tag='0'): global db_config logging.debug('logging service call') try: sqlStr=Dialect.buildQuery("ProdCom.Query1",{'client_id':client_id,\ 'service_call':service_call,'service_parameters':service_parameters,\ 'service_result':service_result,'client_tag':client_tag,}) # NOTE: this has to be done different, we do this to keep the log time unique Session.execute(sqlStr) logging.debug('service call logged') except Exception, ex: logging.debug('ERROR in logging call: ' + str(ex)) raise ProdException(str(ex), 1001)
def retrieve(client_id, client_tag='0', service_call=None): global db_config logging.debug('retrieving logged service call ' + str(client_id) + ',' + str(client_tag) + ',' + str(service_call)) try: if service_call == None: sqlStr=Dialect.buildQuery("ProdCom.Query2",{'client_id':client_id,\ 'client_tag':client_tag}) Session.execute(sqlStr) rows = Session.fetchall() if len(rows) != 1: raise ProdException( "No entries found for client ID: " + str(client_id) + " and tag " + str(client_tag), 1002) service_results = cPickle.loads(base64.decodestring(rows[0][0])) service_parameters = cPickle.loads(base64.decodestring(rows[0][2])) return [str(rows[0][1]), service_parameters, service_results] sqlStr=Dialect.buildQuery("ProdCom.Query3",{'client_id':client_id,\ 'client_tag':client_tag,'service_call':service_call}) Session.execute(sqlStr) rows = Session.fetchall() if len(rows) != 1: raise ProdException( "No entries found for client ID: " + str(client_id) + " and tag " + str(client_tag), 1002) service_results = cPickle.loads(base64.decodestring(rows[0][0])) service_parameters = cPickle.loads(base64.decodestring(rows[0][1])) return [service_results, service_parameters] except Exception, ex: raise ProdException(exceptions[4001] + str(ex), 4001)
def setProdCommonDB(): Session.set_database(Cconfiguration.get('DB')) Session.connect('ProdCommon') Session.set_session('ProdCommon') Session.start_transaction()
def fail(): Session.rollback_all() Session.close_all()
def finish(): Session.commit_all() Session.close_all()