class JobMonitoringHandler(RequestHandler): @classmethod def initializeHandler(cls, svcInfoDict): cls.gJobDB = JobDB() cls.gJobLoggingDB = JobLoggingDB() cls.gTaskQueueDB = TaskQueueDB() cls.gElasticJobParametersDB = None useESForJobParametersFlag = Operations().getValue( '/Services/JobMonitoring/useESForJobParametersFlag', False) if useESForJobParametersFlag: cls.gElasticJobParametersDB = ElasticJobParametersDB() return S_OK() def initialize(self): """ Flags useESForJobParametersFlag (in /Operations/[]/Services/JobMonitoring/) have bool value (True/False) and determines the switching of backends from MySQL to ElasticSearch for the JobParameters DB table. For version v7r0, the MySQL backend is (still) the default. """ credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] operations = Operations(group=self.ownerGroup) self.globalJobsInfo = operations.getValue('/Services/JobMonitoring/GlobalJobsInfo', True) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.globalJobsInfo) self.jobPolicy.jobDB = self.gJobDB return S_OK() @classmethod def getAttributesForJobList(cls, *args, **kwargs): """ Utility function for unpacking """ res = cls.gJobDB.getAttributesForJobList(*args, **kwargs) if not res['OK']: return res return S_OK(strToIntDict(res['Value'])) ############################################################################## types_getApplicationStates = [] @classmethod def export_getApplicationStates(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of ApplicationStatus job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('ApplicationStatus', condDict, older, newer) ############################################################################## types_getJobTypes = [] @classmethod def export_getJobTypes(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of JobType job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('JobType', condDict, older, newer) ############################################################################## types_getOwners = [] @classmethod def export_getOwners(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of Owner job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('Owner', condDict, older, newer) ############################################################################## types_getProductionIds = [] @classmethod def export_getProductionIds(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of ProductionId job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('JobGroup', condDict, older, newer) ############################################################################## types_getJobGroups = [] @classmethod def export_getJobGroups(cls, condDict=None, older=None, cutDate=None): """ Return Distinct Values of ProductionId job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('JobGroup', condDict, older, newer=cutDate) ############################################################################## types_getSites = [] @classmethod def export_getSites(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of Site job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('Site', condDict, older, newer) ############################################################################## types_getStates = [] @classmethod def export_getStates(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of Status job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('Status', condDict, older, newer) ############################################################################## types_getMinorStates = [] @classmethod def export_getMinorStates(cls, condDict=None, older=None, newer=None): """ Return Distinct Values of Minor Status job Attribute in WMS """ return cls.gJobDB.getDistinctJobAttributes('MinorStatus', condDict, older, newer) ############################################################################## types_getJobs = [] @classmethod def export_getJobs(cls, attrDict=None, cutDate=None): """ Return list of JobIds matching the condition given in attrDict """ # queryDict = {} # if attrDict: # if type ( attrDict ) != dict: # return S_ERROR( 'Argument must be of Dict Type' ) # for attribute in self.queryAttributes: # # Only those Attribute in self.queryAttributes can be used # if attrDict.has_key(attribute): # queryDict[attribute] = attrDict[attribute] return cls.gJobDB.selectJobs(attrDict, newer=cutDate) ############################################################################## types_getCounters = [list] @classmethod def export_getCounters(cls, attrList, attrDict=None, cutDate=''): """ Retrieve list of distinct attributes values from attrList with attrDict as condition. For each set of distinct values, count number of occurences. Return a list. Each item is a list with 2 items, the list of distinct attribute values and the counter """ # Check that Attributes in attrList and attrDict, they must be in # self.queryAttributes. # for attr in attrList: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Requested Attribute not Allowed: %s.' % attr ) # # for attr in attrDict: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Condition Attribute not Allowed: %s.' % attr ) cutDate = str(cutDate) if not attrDict: attrDict = {} return cls.gJobDB.getCounters('Jobs', attrList, attrDict, newer=cutDate, timeStamp='LastUpdateTime') ############################################################################## types_getCurrentJobCounters = [] @classmethod def export_getCurrentJobCounters(cls, attrDict=None): """ Get job counters per Status with attrDict selection. Final statuses are given for the last day. """ if not attrDict: attrDict = {} result = cls.gJobDB.getCounters('Jobs', ['Status'], attrDict, timeStamp='LastUpdateTime') if not result['OK']: return result last_update = Time.dateTime() - Time.day resultDay = cls.gJobDB.getCounters('Jobs', ['Status'], attrDict, newer=last_update, timeStamp='LastUpdateTime') if not resultDay['OK']: return resultDay resultDict = {} for statusDict, count in result['Value']: status = statusDict['Status'] resultDict[status] = count if status in FINAL_STATES: resultDict[status] = 0 for statusDayDict, ccount in resultDay['Value']: if status == statusDayDict['Status']: resultDict[status] = ccount break return S_OK(resultDict) ############################################################################## types_getJobStatus = [int] @classmethod def export_getJobStatus(cls, jobID): return cls.gJobDB.getJobAttribute(jobID, 'Status') ############################################################################## types_getJobOwner = [int] @classmethod def export_getJobOwner(cls, jobID): return cls.gJobDB.getJobAttribute(jobID, 'Owner') ############################################################################## types_getJobSite = [int] @classmethod def export_getJobSite(cls, jobID): return cls.gJobDB.getJobAttribute(jobID, 'Site') ############################################################################## types_getJobJDL = [int, bool] @classmethod def export_getJobJDL(cls, jobID, original): return cls.gJobDB.getJobJDL(jobID, original=original) ############################################################################## types_getJobLoggingInfo = [int] @classmethod def export_getJobLoggingInfo(cls, jobID): return cls.gJobLoggingDB.getJobLoggingInfo(jobID) ############################################################################## types_getJobsParameters = [list, list] @classmethod @ignoreEncodeWarning def export_getJobsParameters(cls, jobIDs, parameters): if not (jobIDs and parameters): return S_OK({}) return cls.getAttributesForJobList(jobIDs, parameters) ############################################################################## types_getJobsStatus = [list] @classmethod @ignoreEncodeWarning def export_getJobsStatus(cls, jobIDs): if not jobIDs: return S_OK({}) return cls.getAttributesForJobList(jobIDs, ['Status']) ############################################################################## types_getJobsMinorStatus = [list] @classmethod @ignoreEncodeWarning def export_getJobsMinorStatus(cls, jobIDs): return cls.getAttributesForJobList(jobIDs, ['MinorStatus']) ############################################################################## types_getJobsApplicationStatus = [list] @classmethod @ignoreEncodeWarning def export_getJobsApplicationStatus(cls, jobIDs): return cls.getAttributesForJobList(jobIDs, ['ApplicationStatus']) ############################################################################## types_getJobsSites = [list] @classmethod @ignoreEncodeWarning def export_getJobsSites(cls, jobIDs): return cls.getAttributesForJobList(jobIDs, ['Site']) ############################################################################## types_getJobSummary = [int] @classmethod def export_getJobSummary(cls, jobID): return cls.gJobDB.getJobAttributes(jobID, SUMMARY) ############################################################################## types_getJobPrimarySummary = [int] @classmethod def export_getJobPrimarySummary(cls, jobID): return cls.gJobDB.getJobAttributes(jobID, PRIMARY_SUMMARY) ############################################################################## types_getJobsSummary = [list] @classmethod def export_getJobsSummary(cls, jobIDs): if not jobIDs: return S_ERROR('JobMonitoring.getJobsSummary: Received empty job list') result = cls.getAttributesForJobList(jobIDs, SUMMARY) # return result restring = str(result['Value']) return S_OK(restring) ############################################################################## types_getJobPageSummaryWeb = [dict, list, int, int] def export_getJobPageSummaryWeb(self, selectDict, sortList, startItem, maxItems, selectJobs=True): """ Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] # Provide JobID bound to a specific PilotJobReference # There is no reason to have both PilotJobReference and JobID in selectDict # If that occurs, use the JobID instead of the PilotJobReference pilotJobRefs = selectDict.get('PilotJobReference') if pilotJobRefs: del selectDict['PilotJobReference'] if 'JobID' not in selectDict or not selectDict['JobID']: if not isinstance(pilotJobRefs, list): pilotJobRefs = [pilotJobRefs] selectDict['JobID'] = [] for pilotJobRef in pilotJobRefs: res = PilotManagerClient().getPilotInfo(pilotJobRef) if res['OK'] and 'Jobs' in res['Value'][pilotJobRef]: selectDict['JobID'].extend(res['Value'][pilotJobRef]['Jobs']) result = self.jobPolicy.getControlledUsers(RIGHT_GET_INFO) if not result['OK']: return S_ERROR('Failed to evaluate user rights') if result['Value'] != 'ALL': selectDict[('Owner', 'OwnerGroup')] = result['Value'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None statusDict = {} result = self.gJobDB.getCounters('Jobs', ['Status'], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') nJobs = 0 if result['OK']: for stDict, count in result['Value']: nJobs += count statusDict[stDict['Status']] = count resultDict['TotalRecords'] = nJobs if nJobs == 0: return S_OK(resultDict) resultDict['Extras'] = statusDict if selectJobs: iniJob = startItem if iniJob >= nJobs: return S_ERROR('Item number out of range') result = self.gJobDB.selectJobs(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, limit=(maxItems, iniJob)) if not result['OK']: return S_ERROR('Failed to select jobs: ' + result['Message']) summaryJobList = result['Value'] if not self.globalJobsInfo: validJobs, _invalidJobs, _nonauthJobs, _ownJobs = self.jobPolicy.evaluateJobRights(summaryJobList, RIGHT_GET_INFO) summaryJobList = validJobs result = self.getAttributesForJobList(summaryJobList, SUMMARY) if not result['OK']: return S_ERROR('Failed to get job summary: ' + result['Message']) summaryDict = result['Value'] # Evaluate last sign of life time for jobID, jobDict in summaryDict.items(): if jobDict['HeartBeatTime'] == 'None': jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] else: lastTime = Time.fromString(jobDict['LastUpdateTime']) hbTime = Time.fromString(jobDict['HeartBeatTime']) # Not only Stalled jobs but also Failed jobs because Stalled if ((hbTime - lastTime) > timedelta(0) or jobDict['Status'] == "Stalled" or jobDict['MinorStatus'].startswith('Job stalled') or jobDict['MinorStatus'].startswith('Stalling')): jobDict['LastSignOfLife'] = jobDict['HeartBeatTime'] else: jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] tqDict = {} result = self.gTaskQueueDB.getTaskQueueForJobs(summaryJobList) if result['OK']: tqDict = result['Value'] # If no jobs can be selected after the properties check if not summaryDict.keys(): return S_OK(resultDict) # prepare the standard structure now key = summaryDict.keys()[0] paramNames = summaryDict[key].keys() records = [] for jobID, jobDict in summaryDict.items(): jParList = [] for pname in paramNames: jParList.append(jobDict[pname]) jParList.append(tqDict.get(jobID, 0)) records.append(jParList) resultDict['ParameterNames'] = paramNames + ['TaskQueueID'] resultDict['Records'] = records return S_OK(resultDict) ############################################################################## types_getJobStats = [basestring, dict] @classmethod def export_getJobStats(cls, attribute, selectDict): """ Get job statistics distribution per attribute value with a given selection """ startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = cls.gJobDB.getCounters('Jobs', [attribute], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') resultDict = {} if result['OK']: for cDict, count in result['Value']: resultDict[cDict[attribute]] = count return S_OK(resultDict) ############################################################################## types_getJobsPrimarySummary = [list] @classmethod @ignoreEncodeWarning def export_getJobsPrimarySummary(cls, jobIDs): return cls.getAttributesForJobList(jobIDs, PRIMARY_SUMMARY) ############################################################################## types_getJobParameter = [[basestring, int, long], basestring] @classmethod @ignoreEncodeWarning def export_getJobParameter(cls, jobID, parName): """ :param str/int/long jobID: one single Job ID :param str parName: one single parameter name """ if cls.gElasticJobParametersDB: res = cls.gElasticJobParametersDB.getJobParameters(jobID, [parName]) if not res['OK']: return res if res['Value'].get(int(jobID)): return S_OK(res['Value'][int(jobID)]) res = cls.gJobDB.getJobParameters(jobID, [parName]) if not res['OK']: return res return S_OK(res['Value'].get(int(jobID), {})) ############################################################################## types_getJobOptParameters = [int] @classmethod def export_getJobOptParameters(cls, jobID): return cls.gJobDB.getJobOptParameters(jobID) ############################################################################## types_getJobParameters = [[basestring, int, long, list]] @classmethod @ignoreEncodeWarning def export_getJobParameters(cls, jobIDs, parName=None): """ :param str/int/long/list jobIDs: one single job ID or a list of them :param str parName: one single parameter name, a list or None (meaning all of them) """ if cls.gElasticJobParametersDB: if not isinstance(jobIDs, list): jobIDs = [jobIDs] parameters = {} for jobID in jobIDs: res = cls.gElasticJobParametersDB.getJobParameters(jobID, parName) if not res['OK']: return res parameters.update(res['Value']) # Need anyway to get also from JobDB, for those jobs with parameters registered in MySQL or in both backends res = cls.gJobDB.getJobParameters(jobIDs, parName) if not res['OK']: return res parametersM = res['Value'] # and now combine final = dict(parametersM) # if job in JobDB, update with parameters from ES if any for jobID in final: final[jobID].update(parameters.get(jobID, {})) # if job in ES and not in JobDB, take ES for jobID in parameters: if jobID not in final: final[jobID] = parameters[jobID] return S_OK(final) return cls.gJobDB.getJobParameters(jobIDs, parName) ############################################################################## types_traceJobParameter = [basestring, [basestring, int, long, list], basestring, [basestring, None], [basestring, None]] @classmethod def export_traceJobParameter(cls, site, localID, parameter, date, until): return cls.gJobDB.traceJobParameter(site, localID, parameter, date, until) ############################################################################## types_traceJobParameters = [basestring, [basestring, int, long, list], [list, None], [list, None], [basestring, None], [basestring, None]] @classmethod def export_traceJobParameters(cls, site, localID, parameterList, attributeList, date, until): return cls.gJobDB.traceJobParameters(site, localID, parameterList, attributeList, date, until) ############################################################################## types_getAtticJobParameters = [[int, long]] @classmethod def export_getAtticJobParameters(cls, jobID, parameters=None, rescheduleCycle=-1): if not parameters: parameters = [] return cls.gJobDB.getAtticJobParameters(jobID, parameters, rescheduleCycle) ############################################################################## types_getJobAttributes = [int] @classmethod def export_getJobAttributes(cls, jobID, attrList=None): """ :param int jobID: one single Job ID :param list attrList: optional list of attributes """ return cls.gJobDB.getJobAttributes(jobID, attrList=attrList) ############################################################################## types_getJobAttribute = [int, basestring] @classmethod def export_getJobAttribute(cls, jobID, attribute): """ :param int jobID: one single Job ID :param str attribute: one single attribute name """ return cls.gJobDB.getJobAttribute(jobID, attribute) ############################################################################## types_getSiteSummary = [] @classmethod def export_getSiteSummary(cls): return cls.gJobDB.getSiteSummary() ############################################################################## types_getJobHeartBeatData = [int] @classmethod def export_getJobHeartBeatData(cls, jobID): return cls.gJobDB.getHeartBeatData(jobID) ############################################################################## types_getInputData = [[int, long]] @classmethod def export_getInputData(cls, jobID): """ Get input data for the specified jobs """ return cls.gJobDB.getInputData(jobID) ############################################################################## types_getOwnerGroup = [] @classmethod def export_getOwnerGroup(cls): """ Return Distinct Values of OwnerGroup from the JobsDB """ return cls.gJobDB.getDistinctJobAttributes('OwnerGroup')
class JobMonitoringHandler(RequestHandler): def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] operations = Operations(group=self.ownerGroup) self.globalJobsInfo = operations.getValue('/Services/JobMonitoring/GlobalJobsInfo', True) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.globalJobsInfo) self.jobPolicy.setJobDB(gJobDB) return S_OK() ############################################################################## types_getApplicationStates = [] @staticmethod def export_getApplicationStates(): """ Return Distinct Values of ApplicationStatus job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('ApplicationStatus') ############################################################################## types_getJobTypes = [] @staticmethod def export_getJobTypes(): """ Return Distinct Values of JobType job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobType') ############################################################################## types_getOwners = [] @staticmethod def export_getOwners(): """ Return Distinct Values of Owner job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Owner') ############################################################################## types_getProductionIds = [] @staticmethod def export_getProductionIds(): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup') ############################################################################## types_getJobGroups = [] @staticmethod def export_getJobGroups(condDict=None, cutDate=None): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup', condDict, newer=cutDate) ############################################################################## types_getSites = [] @staticmethod def export_getSites(): """ Return Distinct Values of Site job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Site') ############################################################################## types_getStates = [] @staticmethod def export_getStates(): """ Return Distinct Values of Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Status') ############################################################################## types_getMinorStates = [] @staticmethod def export_getMinorStates(): """ Return Distinct Values of Minor Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('MinorStatus') ############################################################################## types_getJobs = [] @staticmethod def export_getJobs(attrDict=None, cutDate=None): """ Return list of JobIds matching the condition given in attrDict """ # queryDict = {} # if attrDict: # if type ( attrDict ) != dict: # return S_ERROR( 'Argument must be of Dict Type' ) # for attribute in self.queryAttributes: # # Only those Attribute in self.queryAttributes can be used # if attrDict.has_key(attribute): # queryDict[attribute] = attrDict[attribute] print attrDict return gJobDB.selectJobs(attrDict, newer=cutDate) ############################################################################## types_getCounters = [list] @staticmethod def export_getCounters(attrList, attrDict=None, cutDate=''): """ Retrieve list of distinct attributes values from attrList with attrDict as condition. For each set of distinct values, count number of occurences. Return a list. Each item is a list with 2 items, the list of distinct attribute values and the counter """ # Check that Attributes in attrList and attrDict, they must be in # self.queryAttributes. # for attr in attrList: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Requested Attribute not Allowed: %s.' % attr ) # # for attr in attrDict: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Condition Attribute not Allowed: %s.' % attr ) cutDate = str(cutDate) if not attrDict: attrDict = {} return gJobDB.getCounters('Jobs', attrList, attrDict, newer=cutDate, timeStamp='LastUpdateTime') ############################################################################## types_getCurrentJobCounters = [] @staticmethod def export_getCurrentJobCounters(attrDict=None): """ Get job counters per Status with attrDict selection. Final statuses are given for the last day. """ if not attrDict: attrDict = {} result = gJobDB.getCounters('Jobs', ['Status'], attrDict, timeStamp='LastUpdateTime') if not result['OK']: return result last_update = Time.dateTime() - Time.day resultDay = gJobDB.getCounters('Jobs', ['Status'], attrDict, newer=last_update, timeStamp='LastUpdateTime') if not resultDay['OK']: return resultDay resultDict = {} for statusDict, count in result['Value']: status = statusDict['Status'] resultDict[status] = count if status in FINAL_STATES: resultDict[status] = 0 for statusDayDict, ccount in resultDay['Value']: if status == statusDayDict['Status']: resultDict[status] = ccount break return S_OK(resultDict) ############################################################################## types_getJobStatus = [int] @staticmethod def export_getJobStatus(jobID): return gJobDB.getJobAttribute(jobID, 'Status') ############################################################################## types_getJobOwner = [int] @staticmethod def export_getJobOwner(jobID): return gJobDB.getJobAttribute(jobID, 'Owner') ############################################################################## types_getJobSite = [int] @staticmethod def export_getJobSite(jobID): return gJobDB.getJobAttribute(jobID, 'Site') ############################################################################## types_getJobJDL = [int, bool] @staticmethod def export_getJobJDL(jobID, original): return gJobDB.getJobJDL(jobID, original=original) ############################################################################## types_getJobLoggingInfo = [int] @staticmethod def export_getJobLoggingInfo(jobID): return gJobLoggingDB.getJobLoggingInfo(jobID) ############################################################################## types_getJobsParameters = [list, list] @staticmethod def export_getJobsParameters(jobIDs, parameters): if not (jobIDs and parameters): return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, parameters) ############################################################################## types_getJobsStatus = [list] @staticmethod def export_getJobsStatus(jobIDs): if not jobIDs: return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, ['Status']) ############################################################################## types_getJobsMinorStatus = [list] @staticmethod def export_getJobsMinorStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['MinorStatus']) ############################################################################## types_getJobsApplicationStatus = [list] @staticmethod def export_getJobsApplicationStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['ApplicationStatus']) ############################################################################## types_getJobsSites = [list] @staticmethod def export_getJobsSites(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['Site']) ############################################################################## types_getJobSummary = [int] @staticmethod def export_getJobSummary(jobID): return gJobDB.getJobAttributes(jobID, SUMMARY) ############################################################################## types_getJobPrimarySummary = [int] @staticmethod def export_getJobPrimarySummary(jobID): return gJobDB.getJobAttributes(jobID, PRIMARY_SUMMARY) ############################################################################## types_getJobsSummary = [list] @staticmethod def export_getJobsSummary(jobIDs): if not jobIDs: return S_ERROR('JobMonitoring.getJobsSummary: Received empty job list') result = gJobDB.getAttributesForJobList(jobIDs, SUMMARY) # return result restring = str(result['Value']) return S_OK(restring) ############################################################################## types_getJobPageSummaryWeb = [dict, list, int, int] def export_getJobPageSummaryWeb(self, selectDict, sortList, startItem, maxItems, selectJobs=True): """ Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = self.jobPolicy.getControlledUsers(RIGHT_GET_INFO) if not result['OK']: return S_ERROR('Failed to evaluate user rights') if result['Value'] != 'ALL': selectDict[('Owner', 'OwnerGroup')] = result['Value'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None statusDict = {} result = gJobDB.getCounters('Jobs', ['Status'], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') nJobs = 0 if result['OK']: for stDict, count in result['Value']: nJobs += count statusDict[stDict['Status']] = count resultDict['TotalRecords'] = nJobs if nJobs == 0: return S_OK(resultDict) resultDict['Extras'] = statusDict if selectJobs: iniJob = startItem if iniJob >= nJobs: return S_ERROR('Item number out of range') result = gJobDB.selectJobs(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, limit=(maxItems, iniJob)) if not result['OK']: return S_ERROR('Failed to select jobs: ' + result['Message']) summaryJobList = result['Value'] if not self.globalJobsInfo: validJobs, _invalidJobs, _nonauthJobs, _ownJobs = self.jobPolicy.evaluateJobRights(summaryJobList, RIGHT_GET_INFO) summaryJobList = validJobs result = gJobDB.getAttributesForJobList(summaryJobList, SUMMARY) if not result['OK']: return S_ERROR('Failed to get job summary: ' + result['Message']) summaryDict = result['Value'] # Evaluate last sign of life time for jobID, jobDict in summaryDict.items(): if jobDict['HeartBeatTime'] == 'None': jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] else: lastTime = Time.fromString(jobDict['LastUpdateTime']) hbTime = Time.fromString(jobDict['HeartBeatTime']) # There is no way to express a timedelta of 0 ;-) # Not only Stalled jobs but also Failed jobs because Stalled if ((hbTime - lastTime) > (lastTime - lastTime) or jobDict['Status'] == "Stalled" or jobDict['MinorStatus'].startswith('Job stalled') or jobDict['MinorStatus'].startswith('Stalling')): jobDict['LastSignOfLife'] = jobDict['HeartBeatTime'] else: jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] tqDict = {} result = gTaskQueueDB.getTaskQueueForJobs(summaryJobList) if result['OK']: tqDict = result['Value'] # If no jobs can be selected after the properties check if not summaryDict.keys(): return S_OK(resultDict) # prepare the standard structure now key = summaryDict.keys()[0] paramNames = summaryDict[key].keys() records = [] for jobID, jobDict in summaryDict.items(): jParList = [] for pname in paramNames: jParList.append(jobDict[pname]) jParList.append(tqDict.get(jobID, 0)) records.append(jParList) resultDict['ParameterNames'] = paramNames + ['TaskQueueID'] resultDict['Records'] = records return S_OK(resultDict) ############################################################################## types_getJobStats = [basestring, dict] @staticmethod def export_getJobStats(attribute, selectDict): """ Get job statistics distribution per attribute value with a given selection """ startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = gJobDB.getCounters('Jobs', [attribute], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') resultDict = {} if result['OK']: for cDict, count in result['Value']: resultDict[cDict[attribute]] = count return S_OK(resultDict) ############################################################################## types_getJobsPrimarySummary = [list] @staticmethod def export_getJobsPrimarySummary(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, PRIMARY_SUMMARY) ############################################################################## types_getJobParameter = [[basestring, int, long], basestring] @staticmethod def export_getJobParameter(jobID, parName): return gJobDB.getJobParameters(jobID, [parName]) ############################################################################## types_getJobParameters = [[int, long]] @staticmethod def export_getJobParameters(jobID): return gJobDB.getJobParameters(jobID) ############################################################################## types_traceJobParameter = [basestring, [basestring, int, long, list], basestring, [basestring, None], [basestring, None]] @staticmethod def export_traceJobParameter(site, localID, parameter, date, until): return gJobDB.traceJobParameter(site, localID, parameter, date, until) ############################################################################## types_traceJobParameters = [basestring, [basestring, int, long, list], [list, None], [list, None], [basestring, None], [basestring, None]] @staticmethod def export_traceJobParameters(site, localID, parameterList, attributeList, date, until): return gJobDB.traceJobParameters(site, localID, parameterList, attributeList, date, until) ############################################################################## types_getAtticJobParameters = [[int, long]] @staticmethod def export_getAtticJobParameters(jobID, parameters=None, rescheduleCycle=-1): if not parameters: parameters = [] return gJobDB.getAtticJobParameters(jobID, parameters, rescheduleCycle) ############################################################################## types_getJobAttributes = [int] @staticmethod def export_getJobAttributes(jobID): return gJobDB.getJobAttributes(jobID) ############################################################################## types_getJobAttribute = [int, basestring] @staticmethod def export_getJobAttribute(jobID, attribute): return gJobDB.getJobAttribute(jobID, attribute) ############################################################################## types_getSiteSummary = [] @staticmethod def export_getSiteSummary(): return gJobDB.getSiteSummary() ############################################################################## types_getJobHeartBeatData = [int] @staticmethod def export_getJobHeartBeatData(jobID): return gJobDB.getHeartBeatData(jobID) ############################################################################## types_getInputData = [[int, long]] @staticmethod def export_getInputData(jobID): """ Get input data for the specified jobs """ return gJobDB.getInputData(jobID) ############################################################################## types_getOwnerGroup = [] @staticmethod def export_getOwnerGroup(): """ Return Distinct Values of OwnerGroup from the JobsDB """ return gJobDB.getDistinctJobAttributes('OwnerGroup')
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendNewJobsToMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = jids result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringType] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): value = jobClassAd.get_expression( 'ParameterStart').replace('"', '') try: pStart = int(value) except: try: pStart = float(value) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: pStep = jobClassAd.getAttributeFloat('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterFactor'): pFactor = jobClassAd.getAttributeInt('ParameterFactor') if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor') if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor') return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value) parameterList = list() parameterList.append(pStart) for i in range(nParameters - 1): parameterList.append(parameterList[i] * pFactor + pStep) if len(parameterList) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs) jobDescList = [] nParam = len(parameterList) - 1 for n, p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s', str(p)).replace( '%n', str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters', 'ParameterStep', 'ParameterFactor']: newClassAd.deleteAttribute(attr) if type(p) == type(' ') and p.startswith('{'): newClassAd.insertAttributeInt('Parameter', str(p)) else: newClassAd.insertAttributeString('Parameter', str(p)) newClassAd.insertAttributeInt('ParameterNumber', n) newJDL = newClassAd.asJDL() jobDescList.append(newJDL) else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if type(jobInput) == IntType: return [jobInput] if type(jobInput) == StringType: try: ijob = int(jobInput) return [ijob] except: return [] if type(jobInput) == ListType: try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __killJob(self, jobID): """ Kill one job """ result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result else: gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: deleteJobList.append(jobID) bad_ids = [] for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendNewJobsToMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringTypes] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) nParameters = getNumberOfParameters(jobClassAd) parametricJob = False if nParameters > 0: parametricJob = True result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobMonitoringHandler(RequestHandler): def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] operations = Operations(group=self.ownerGroup) self.globalJobsInfo = operations.getValue( '/Services/JobMonitoring/GlobalJobsInfo', True) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.globalJobsInfo) self.jobPolicy.setJobDB(gJobDB) return S_OK() ############################################################################## types_getApplicationStates = [] @staticmethod def export_getApplicationStates(): """ Return Distinct Values of ApplicationStatus job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('ApplicationStatus') ############################################################################## types_getJobTypes = [] @staticmethod def export_getJobTypes(): """ Return Distinct Values of JobType job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobType') ############################################################################## types_getOwners = [] @staticmethod def export_getOwners(): """ Return Distinct Values of Owner job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Owner') ############################################################################## types_getProductionIds = [] @staticmethod def export_getProductionIds(): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup') ############################################################################## types_getJobGroups = [] @staticmethod def export_getJobGroups(condDict=None, cutDate=None): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup', condDict, newer=cutDate) ############################################################################## types_getSites = [] @staticmethod def export_getSites(): """ Return Distinct Values of Site job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Site') ############################################################################## types_getStates = [] @staticmethod def export_getStates(): """ Return Distinct Values of Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Status') ############################################################################## types_getMinorStates = [] @staticmethod def export_getMinorStates(): """ Return Distinct Values of Minor Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('MinorStatus') ############################################################################## types_getJobs = [] @staticmethod def export_getJobs(attrDict=None, cutDate=None): """ Return list of JobIds matching the condition given in attrDict """ # queryDict = {} # if attrDict: # if type ( attrDict ) != DictType: # return S_ERROR( 'Argument must be of Dict Type' ) # for attribute in self.queryAttributes: # # Only those Attribute in self.queryAttributes can be used # if attrDict.has_key(attribute): # queryDict[attribute] = attrDict[attribute] print attrDict return gJobDB.selectJobs(attrDict, newer=cutDate) ############################################################################## types_getCounters = [ListType] @staticmethod def export_getCounters(attrList, attrDict=None, cutDate=''): """ Retrieve list of distinct attributes values from attrList with attrDict as condition. For each set of distinct values, count number of occurences. Return a list. Each item is a list with 2 items, the list of distinct attribute values and the counter """ # Check that Attributes in attrList and attrDict, they must be in # self.queryAttributes. # for attr in attrList: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Requested Attribute not Allowed: %s.' % attr ) # # for attr in attrDict: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Condition Attribute not Allowed: %s.' % attr ) cutDate = str(cutDate) if not attrDict: attrDict = {} return gJobDB.getCounters('Jobs', attrList, attrDict, newer=cutDate, timeStamp='LastUpdateTime') ############################################################################## types_getCurrentJobCounters = [] @staticmethod def export_getCurrentJobCounters(attrDict=None): """ Get job counters per Status with attrDict selection. Final statuses are given for the last day. """ if not attrDict: attrDict = {} result = gJobDB.getCounters('Jobs', ['Status'], attrDict, timeStamp='LastUpdateTime') if not result['OK']: return result last_update = Time.dateTime() - Time.day resultDay = gJobDB.getCounters('Jobs', ['Status'], attrDict, newer=last_update, timeStamp='LastUpdateTime') if not resultDay['OK']: return resultDay resultDict = {} for statusDict, count in result['Value']: status = statusDict['Status'] resultDict[status] = count if status in FINAL_STATES: resultDict[status] = 0 for statusDayDict, ccount in resultDay['Value']: if status == statusDayDict['Status']: resultDict[status] = ccount break return S_OK(resultDict) ############################################################################## types_getJobStatus = [IntType] @staticmethod def export_getJobStatus(jobID): return gJobDB.getJobAttribute(jobID, 'Status') ############################################################################## types_getJobOwner = [IntType] @staticmethod def export_getJobOwner(jobID): return gJobDB.getJobAttribute(jobID, 'Owner') ############################################################################## types_getJobSite = [IntType] @staticmethod def export_getJobSite(jobID): return gJobDB.getJobAttribute(jobID, 'Site') ############################################################################## types_getJobJDL = [IntType, BooleanType] @staticmethod def export_getJobJDL(jobID, original): return gJobDB.getJobJDL(jobID, original=original) ############################################################################## types_getJobLoggingInfo = [IntType] @staticmethod def export_getJobLoggingInfo(jobID): return gJobLoggingDB.getJobLoggingInfo(jobID) ############################################################################## types_getJobsParameters = [ListType, ListType] @staticmethod def export_getJobsParameters(jobIDs, parameters): if not (jobIDs and parameters): return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, parameters) ############################################################################## types_getJobsStatus = [ListType] @staticmethod def export_getJobsStatus(jobIDs): if not jobIDs: return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, ['Status']) ############################################################################## types_getJobsMinorStatus = [ListType] @staticmethod def export_getJobsMinorStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['MinorStatus']) ############################################################################## types_getJobsApplicationStatus = [ListType] @staticmethod def export_getJobsApplicationStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['ApplicationStatus']) ############################################################################## types_getJobsSites = [ListType] @staticmethod def export_getJobsSites(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['Site']) ############################################################################## types_getJobSummary = [IntType] @staticmethod def export_getJobSummary(jobID): return gJobDB.getJobAttributes(jobID, SUMMARY) ############################################################################## types_getJobPrimarySummary = [IntType] @staticmethod def export_getJobPrimarySummary(jobID): return gJobDB.getJobAttributes(jobID, PRIMARY_SUMMARY) ############################################################################## types_getJobsSummary = [ListType] @staticmethod def export_getJobsSummary(jobIDs): if not jobIDs: return S_ERROR( 'JobMonitoring.getJobsSummary: Received empty job list') result = gJobDB.getAttributesForJobList(jobIDs, SUMMARY) # return result restring = str(result['Value']) return S_OK(restring) ############################################################################## types_getJobPageSummaryWeb = [DictType, ListType, IntType, IntType] def export_getJobPageSummaryWeb(self, selectDict, sortList, startItem, maxItems, selectJobs=True): """ Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = self.jobPolicy.getControlledUsers(RIGHT_GET_INFO) if not result['OK']: return S_ERROR('Failed to evaluate user rights') if result['Value'] != 'ALL': selectDict[('Owner', 'OwnerGroup')] = result['Value'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None statusDict = {} result = gJobDB.getCounters('Jobs', ['Status'], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') nJobs = 0 if result['OK']: for stDict, count in result['Value']: nJobs += count statusDict[stDict['Status']] = count resultDict['TotalRecords'] = nJobs if nJobs == 0: return S_OK(resultDict) resultDict['Extras'] = statusDict if selectJobs: iniJob = startItem if iniJob >= nJobs: return S_ERROR('Item number out of range') result = gJobDB.selectJobs(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, limit=(maxItems, iniJob)) if not result['OK']: return S_ERROR('Failed to select jobs: ' + result['Message']) summaryJobList = result['Value'] if not self.globalJobsInfo: validJobs, _invalidJobs, _nonauthJobs, _ownJobs = self.jobPolicy.evaluateJobRights( summaryJobList, RIGHT_GET_INFO) summaryJobList = validJobs result = gJobDB.getAttributesForJobList(summaryJobList, SUMMARY) if not result['OK']: return S_ERROR('Failed to get job summary: ' + result['Message']) summaryDict = result['Value'] # Evaluate last sign of life time for jobID, jobDict in summaryDict.items(): if jobDict['HeartBeatTime'] == 'None': jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] else: lastTime = Time.fromString(jobDict['LastUpdateTime']) hbTime = Time.fromString(jobDict['HeartBeatTime']) if (hbTime - lastTime) > ( lastTime - lastTime) or jobDict['Status'] == "Stalled": jobDict['LastSignOfLife'] = jobDict['HeartBeatTime'] else: jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] tqDict = {} result = gTaskQueueDB.getTaskQueueForJobs(summaryJobList) if result['OK']: tqDict = result['Value'] # If no jobs can be selected after the properties check if not summaryDict.keys(): return S_OK(resultDict) # prepare the standard structure now key = summaryDict.keys()[0] paramNames = summaryDict[key].keys() records = [] for jobID, jobDict in summaryDict.items(): jParList = [] for pname in paramNames: jParList.append(jobDict[pname]) jParList.append(tqDict.get(jobID, 0)) records.append(jParList) resultDict['ParameterNames'] = paramNames + ['TaskQueueID'] resultDict['Records'] = records return S_OK(resultDict) ############################################################################## types_getJobStats = [StringTypes, DictType] @staticmethod def export_getJobStats(attribute, selectDict): """ Get job statistics distribution per attribute value with a given selection """ startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = gJobDB.getCounters('Jobs', [attribute], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') resultDict = {} if result['OK']: for cDict, count in result['Value']: resultDict[cDict[attribute]] = count return S_OK(resultDict) ############################################################################## types_getJobsPrimarySummary = [ListType] @staticmethod def export_getJobsPrimarySummary(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, PRIMARY_SUMMARY) ############################################################################## types_getJobParameter = [[StringType, IntType, LongType], StringTypes] @staticmethod def export_getJobParameter(jobID, parName): return gJobDB.getJobParameters(jobID, [parName]) ############################################################################## types_getJobParameters = [[IntType, LongType]] @staticmethod def export_getJobParameters(jobID): return gJobDB.getJobParameters(jobID) ############################################################################## types_traceJobParameter = [ StringTypes, [IntType, StringType, LongType, ListType], StringTypes, [StringType, NoneType], [StringType, NoneType] ] @staticmethod def export_traceJobParameter(site, localID, parameter, date, until): return gJobDB.traceJobParameter(site, localID, parameter, date, until) ############################################################################## types_traceJobParameters = [ StringTypes, [IntType, StringType, LongType, ListType], [ListType, NoneType], [ListType, NoneType], [StringType, NoneType], [StringType, NoneType] ] @staticmethod def export_traceJobParameters(site, localID, parameterList, attributeList, date, until): return gJobDB.traceJobParameters(site, localID, parameterList, attributeList, date, until) ############################################################################## types_getAtticJobParameters = [[IntType, LongType]] @staticmethod def export_getAtticJobParameters(jobID, parameters=None, rescheduleCycle=-1): if not parameters: parameters = [] return gJobDB.getAtticJobParameters(jobID, parameters, rescheduleCycle) ############################################################################## types_getJobAttributes = [IntType] @staticmethod def export_getJobAttributes(jobID): return gJobDB.getJobAttributes(jobID) ############################################################################## types_getJobAttribute = [IntType, StringTypes] @staticmethod def export_getJobAttribute(jobID, attribute): return gJobDB.getJobAttribute(jobID, attribute) ############################################################################## types_getSiteSummary = [] @staticmethod def export_getSiteSummary(): return gJobDB.getSiteSummary() ############################################################################## types_getJobHeartBeatData = [IntType] @staticmethod def export_getJobHeartBeatData(jobID): return gJobDB.getHeartBeatData(jobID) ############################################################################## types_getInputData = [[IntType, LongType]] @staticmethod def export_getInputData(jobID): """ Get input data for the specified jobs """ return gJobDB.getInputData(jobID) ############################################################################## types_getOwnerGroup = [] @staticmethod def export_getOwnerGroup(): """ Return Distinct Values of OwnerGroup from the JobsDB """ return gJobDB.getDistinctJobAttributes('OwnerGroup')
class JobManagerHandler(RequestHandler): """ RequestHandler implementation of the JobManager """ @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [basestring] def export_submitJob(self, jobDesc): """ Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR(EWMSSUBM, 'Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, 'Job submission not authorized') # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result['OK']: return result nJobs = result['Value'] parametricJob = False if nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: return S_ERROR(EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = 'Submitting' initialMinorStatus = 'Bulk transaction confirmation' else: initialStatus = 'Received' initialMinorStatus = 'Job accepted' for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """ Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR(EWMSSUBM, 'Invalid job specification: ' + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') result = gJobDB.getAttributesForJobList(jobList, ['Status', 'MinorStatus']) if not result['OK']: return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') jobStatusDict = result['Value'] # Check if the jobs are already activated jobEnabledList = [jobID for jobID in jobList if jobStatusDict[jobID]['Status'] in ["Received", "Checking", "Waiting", "Matched", "Running"]] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list(jobID for jobID in jobList if jobStatusDict[jobID]['Status'] == "Submitting") if set(jobUpdateStatusList) != set(jobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') # Update status of all the requested jobs in one transaction result = gJobDB.setJobAttributes(jobUpdateStatusList, ['Status', 'MinorStatus'], ['Received', 'Job accepted']) if not result['OK']: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True # Check if an upload is required return not result['Value'] ########################################################################### def __getJobList(self, jobInput): """ Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except BaseException: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except BaseException: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') # if it was the last job for the pilot, clear PilotsLogging about it result = gPilotAgentsDB.getPilotsForJobID(jobID) if not result['OK']: gLogger.error("Failed to get Pilots for JobID", result['Message']) return result for pilot in result['Value']: res = gPilotAgentsDB.getJobsForPilot(pilot['PilotID']) if not res['OK']: gLogger.error("Failed to get jobs for pilot", res['Message']) return res if not res['Value']: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = gPilotAgentsDB.getPilotInfo(pilotID=pilot['PilotID']) if not result['OK']: gLogger.error("Failed to get pilot info", result['Message']) return result pilotRef = result[0]['PilotJobReference'] ret = gPilotAgentsDB.deletePilot(pilot['PilotID']) if not ret['OK']: gLogger.error("Failed to delete pilot from PilotAgentsDB", ret['Message']) return ret if enablePilotsLogging: ret = gPilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret['OK']: gLogger.error("Failed to delete pilot logging from PilotAgentsDB", ret['Message']) return ret return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status', result['Message']) result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue', result['Message']) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: badIDs.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: badIDs.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: result['FailedJobIDs'] = badIDs return result result = S_OK() result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler( RequestHandler ): @classmethod def initializeHandler( cls, serviceInfoDict ): cls.msgClient = MessageClient( "WorkloadManagement/OptimizationMind" ) cls.__connectToOptMind() gThreadScheduler.addPeriodicTask( 60, cls.__connectToOptMind ) return S_OK() @classmethod def __connectToOptMind( cls ): if not cls.msgClient.connected: result = cls.msgClient.connect( JobManager = True ) if not result[ 'OK' ]: cls.log.warn( "Cannot connect to OptimizationMind!", result[ 'Message' ] ) def initialize( self ): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict[ 'properties' ] self.owner = credDict[ 'username' ] self.peerUsesLimitedProxy = credDict[ 'isLimitedProxy' ] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption( 'MaxParametricJobs', MAX_PARAMETRIC_JOBS ) self.jobPolicy = JobPolicy( self.ownerDN, self.ownerGroup, self.userProperties ) self.jobPolicy.setJobDB( gJobDB ) return S_OK() def __sendNewJobsToMind( self, jids ): if not self.msgClient.connected: return result = self.msgClient.createMessage( "OptimizeJobs" ) if not result[ 'OK' ]: self.log.error( "Cannot create Optimize message: %s" % result[ 'Message' ] ) return msgObj = result[ 'Value' ] msgObj.jids = jids result = self.msgClient.sendMessage( msgObj ) if not result[ 'OK' ]: self.log.error( "Cannot send Optimize message: %s" % result[ 'Message' ] ) return self.log.info( "Optimize msg sent for %s jobs" % len( jids ) ) ########################################################################### types_submitJob = [ StringType ] def export_submitJob( self, jobDesc ): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR( "Can't submit using a limited proxy! (bad boy!)" ) # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR( 'Failed to get job policies' ) policyDict = result['Value'] if not policyDict[ RIGHT_SUBMIT ]: return S_ERROR( 'Job submission not authorized' ) #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd( jobDesc ) parametricJob = False if jobClassAd.lookupAttribute( 'Parameters' ): parametricJob = True if jobClassAd.isAttributeList( 'Parameters' ): parameterList = jobClassAd.getListFromExpression( 'Parameters' ) else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt( 'Parameters' ) if not nParameters: value = jobClassAd.get_expression( 'Parameters' ) return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStart' ): value = jobClassAd.get_expression( 'ParameterStart' ).replace( '"', '' ) try: pStart = int( value ) except: try: pStart = float( value ) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStep' ): pStep = jobClassAd.getAttributeInt( 'ParameterStep' ) if not pStep: pStep = jobClassAd.getAttributeFloat( 'ParameterStep' ) if not pStep: value = jobClassAd.get_expression( 'ParameterStep' ) return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterFactor' ): pFactor = jobClassAd.getAttributeInt( 'ParameterFactor' ) if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor' ) if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor' ) return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value ) parameterList = list() parameterList.append( pStart ) for i in range( nParameters - 1 ): parameterList.append( parameterList[i] * pFactor + pStep ) if len( parameterList ) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs ) jobDescList = [] nParam = len( parameterList ) - 1 for n, p in enumerate( parameterList ): newJobDesc = jobDesc.replace( '%s', str( p ) ).replace( '%n', str( n ).zfill( len( str( nParam ) ) ) ) newClassAd = ClassAd( newJobDesc ) for attr in ['Parameters', 'ParameterStep', 'ParameterFactor']: newClassAd.deleteAttribute( attr ) if type( p ) == type ( ' ' ) and p.startswith( '{' ): newClassAd.insertAttributeInt( 'Parameter', str( p ) ) else: newClassAd.insertAttributeString( 'Parameter', str( p ) ) newClassAd.insertAttributeInt( 'ParameterNumber', n ) newJDL = newClassAd.asJDL() jobDescList.append( newJDL ) else: jobDescList = [ jobDesc ] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result['OK']: return result jobID = result['JobID'] gLogger.info( 'Job %s added to the JobDB for %s/%s' % ( jobID, self.ownerDN, self.ownerGroup ) ) gJobLoggingDB.addLoggingRecord( jobID, result['Status'], result['MinorStatus'], source = 'JobManager' ) jobIDList.append( jobID ) #Set persistency flag retVal = gProxyManager.getUserPersistence( self.ownerDN, self.ownerGroup ) if 'Value' not in retVal or not retVal[ 'Value' ]: gProxyManager.setPersistency( self.ownerDN, self.ownerGroup, True ) if parametricJob: result = S_OK( jobIDList ) else: result = S_OK( jobIDList[0] ) result['JobID'] = result['Value'] result[ 'requireProxyUpload' ] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( jobIDList ) return result ########################################################################### def __checkIfProxyUploadIsRequired( self ): result = gProxyManager.userHasProxy( self.ownerDN, self.ownerGroup, validSeconds = 18000 ) if not result[ 'OK' ]: gLogger.error( "Can't check if the user has proxy uploaded:", result[ 'Message' ] ) return True #Check if an upload is required return result[ 'Value' ] == False ########################################################################### types_invalidateJob = [ IntType ] def invalidateJob( self, jobID ): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list( self, jobInput ): """ Evaluate the jobInput into a list of ints """ if type( jobInput ) == IntType: return [jobInput] if type( jobInput ) == StringType: try: ijob = int( jobInput ) return [ijob] except: return [] if type( jobInput ) == ListType: try: ljob = [ int( x ) for x in jobInput ] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [ ] def export_rescheduleJob( self, jobIDs ): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE ) for jobID in validJobList: gtaskQueueDB.deleteJob( jobID ) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob( jobID ) gLogger.debug( str( result ) ) if not result['OK']: return result gJobLoggingDB.addLoggingRecord( result['JobID'], result['Status'], result['MinorStatus'], application = 'Unknown', source = 'JobManager' ) if invalidJobList or nonauthJobList: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( validJobList ) return result def __deleteJob( self, jobID ): """ Delete one job """ result = gJobDB.setJobStatus( jobID, 'Deleted', 'Checking accounting' ) if not result['OK']: return result result = gtaskQueueDB.deleteJob( jobID ) if not result['OK']: gLogger.warn( 'Failed to delete job from the TaskQueue' ) return S_OK() def __killJob( self, jobID ): """ Kill one job """ result = gJobDB.setJobCommand( jobID, 'Kill' ) if not result['OK']: return result else: gLogger.info( 'Job %d is marked for termination' % jobID ) result = gJobDB.setJobStatus( jobID, 'Killed', 'Marked for termination' ) if not result['OK']: gLogger.warn( 'Failed to set job Killed status' ) result = gtaskQueueDB.deleteJob( jobID ) if not result['OK']: gLogger.warn( 'Failed to delete job from the TaskQueue' ) return S_OK() def __kill_delete_jobs( self, jobIDList, right ): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list( jobIDList ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDList ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right ) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList( validJobList, ['Status'] ) if not result['OK']: return result killJobList = [] deleteJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running','Matched','Stalled']: killJobList.append( jobID ) elif sDict['Status'] in ['Done','Failed']: if not right == RIGHT_KILL: deleteJobList.append( jobID ) else: deleteJobList.append( jobID ) if sDict['Status'] in ['Staging']: stagingJobList.append( jobID ) bad_ids = [] for jobID in killJobList: result = self.__killJob( jobID ) if not result['OK']: bad_ids.append( jobID ) for jobID in deleteJobList: result = self.__deleteJob( jobID ) if not result['OK']: bad_ids.append( jobID ) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn( 'Failed to kill some Stager tasks: %s' % result['Message'] ) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_deleteJob = [ ] def export_deleteJob( self, jobIDs ): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs( jobIDs, RIGHT_DELETE ) ########################################################################### types_killJob = [ ] def export_killJob( self, jobIDs ): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs( jobIDs, RIGHT_KILL ) ########################################################################### types_resetJob = [ ] def export_resetJob( self, jobIDs ): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute( jobID, 'RescheduleCounter', -1 ) if not result['OK']: bad_ids.append( jobID ) else: gtaskQueueDB.deleteJob( jobID ) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob( jobID ) if not result['OK']: bad_ids.append( jobID ) else: good_ids.append( jobID ) gJobLoggingDB.addLoggingRecord( result['JobID'], result['Status'], result['MinorStatus'], application = 'Unknown', source = 'JobManager' ) self.__sendNewJobsToMind( good_ids ) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR( 'Some jobs failed resetting' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result["OK"]: cls.log.warn("Cannot connect to OptimizationMind!", result["Message"]) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict["DN"] self.ownerGroup = credDict["group"] self.userProperties = credDict["properties"] self.owner = credDict["username"] self.peerUsesLimitedProxy = credDict["isLimitedProxy"] self.diracSetup = self.serviceInfoDict["clientSetup"] self.maxParametricJobs = self.srv_getCSOption("MaxParametricJobs", MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result["OK"]: self.log.error("Cannot create Optimize message: %s" % result["Message"]) return msgObj = result["Value"] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result["OK"]: self.log.error("Cannot send Optimize message: %s" % result["Message"]) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringTypes] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result["OK"]: return S_ERROR("Failed to get job policies") policyDict = result["Value"] if not policyDict[RIGHT_SUBMIT]: return S_ERROR("Job submission not authorized") # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) nParameters = getNumberOfParameters(jobClassAd) parametricJob = False if nParameters > 0: parametricJob = True result = generateParametricJobs(jobClassAd) if not result["OK"]: return result jobDescList = result["Value"] else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result["OK"]: return result jobID = result["JobID"] gLogger.info("Job %s added to the JobDB for %s/%s" % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result["Status"], result["MinorStatus"], source="JobManager") jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if "Value" not in retVal or not retVal["Value"]: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result["JobID"] = result["Value"] result["requireProxyUpload"] = self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result["OK"]: gLogger.error("Can't check if the user has proxy uploaded:", result["Message"]) return True # Check if an upload is required return result["Value"] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE ) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result["OK"]: return result gJobLoggingDB.addLoggingRecord( result["JobID"], result["Status"], result["MinorStatus"], application="Unknown", source="JobManager" ) if invalidJobList or nonauthJobList: result = S_ERROR("Some jobs failed reschedule") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList return result result = S_OK(validJobList) result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, "Deleted", "Checking accounting") if not result["OK"]: return result result = gtaskQueueDB.deleteJob(jobID) if not result["OK"]: gLogger.warn("Failed to delete job from the TaskQueue") return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, "Kill") if not result["OK"]: return result gLogger.info("Job %d is marked for termination" % jobID) result = gJobDB.setJobStatus(jobID, "Killed", "Marked for termination") if not result["OK"]: gLogger.warn("Failed to set job Killed status") result = gtaskQueueDB.deleteJob(jobID) if not result["OK"]: gLogger.warn("Failed to delete job from the TaskQueue") return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ["Status"]) if not result["OK"]: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result["Value"].items(): if sDict["Status"] in ["Running", "Matched", "Stalled"]: killJobList.append(jobID) elif sDict["Status"] in ["Done", "Failed"]: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict["Status"] in ["Staging"]: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result["OK"]: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result["OK"]: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result["OK"]: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info("Going to send killing signal to stager as well!") result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result["OK"]: gLogger.warn("Failed to kill some Stager tasks: %s" % result["Message"]) if nonauthJobList or bad_ids: result = S_ERROR("Some jobs failed deletion") if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if bad_ids: result["FailedJobIDs"] = bad_ids return result result = S_OK(validJobList) result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result["InvalidJobIDs"] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, "RescheduleCounter", -1) if not result["OK"]: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result["OK"]: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord( result["JobID"], result["Status"], result["MinorStatus"], application="Unknown", source="JobManager" ) self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR("Some jobs failed resetting") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if bad_ids: result["FailedJobIDs"] = bad_ids return result result = S_OK() result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): """ RequestHandler implementation of the JobManager """ @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.jobDB = gJobDB return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_getMaxParametricJobs = [] def export_getMaxParametricJobs(self): """ Get the maximum number of parametric jobs """ return S_OK(self.maxParametricJobs) types_submitJob = [basestring] def export_submitJob(self, jobDesc): """ Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR(EWMSSUBM, 'Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, 'Job submission not authorized') # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result['OK']: gLogger.error("Issue with getParameterVectorLength:", result['Message']) return result nJobs = result['Value'] parametricJob = False if nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: gLogger.error("Maximum of parametric jobs exceeded:", "limit %d smaller than number of jobs %d" % (self.maxParametricJobs, nJobs)) return S_ERROR(EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = 'Submitting' initialMinorStatus = 'Bulk transaction confirmation' else: initialStatus = 'Received' initialMinorStatus = 'Job accepted' for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() # Ensure non-parametric jobs (i.e. non-bulk) get sent to optimizer immediately if not parametricJob: self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """ Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: gLogger.error("Issue with __getJobList", ": invalid job specification %s" % str(jobIDs)) return S_ERROR(EWMSSUBM, 'Invalid job specification: ' + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') result = gJobDB.getAttributesForJobList(jobList, ['Status', 'MinorStatus']) if not result['OK']: return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') jobStatusDict = result['Value'] # Check if the jobs are already activated jobEnabledList = [jobID for jobID in jobList if jobStatusDict[jobID]['Status'] in ["Received", "Checking", "Waiting", "Matched", "Running"]] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list(jobID for jobID in jobList if jobStatusDict[jobID]['Status'] == "Submitting") if set(jobUpdateStatusList) != set(jobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') # Update status of all the requested jobs in one transaction result = gJobDB.setJobAttributes(jobUpdateStatusList, ['Status', 'MinorStatus'], ['Received', 'Job accepted']) if not result['OK']: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True # Check if an upload is required return not result['Value'] ########################################################################### @staticmethod def __getJobList(jobInput): """ Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except BaseException: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except BaseException: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result @staticmethod def __deleteJob(jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') # if it was the last job for the pilot, clear PilotsLogging about it result = gPilotAgentsDB.getPilotsForJobID(jobID) if not result['OK']: gLogger.error("Failed to get Pilots for JobID", result['Message']) return result for pilot in result['Value']: res = gPilotAgentsDB.getJobsForPilot(pilot) if not res['OK']: gLogger.error("Failed to get jobs for pilot", res['Message']) return res if not res['Value']: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = gPilotAgentsDB.getPilotInfo(pilotID=pilot) if not result['OK']: gLogger.error("Failed to get pilot info", result['Message']) return result pilotRef = result[0]['PilotJobReference'] ret = gPilotAgentsDB.deletePilot(pilot) if not ret['OK']: gLogger.error("Failed to delete pilot from PilotAgentsDB", ret['Message']) return ret if enablePilotsLogging: ret = gPilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret['OK']: gLogger.error("Failed to delete pilot logging from PilotAgentsDB", ret['Message']) return ret return S_OK() @staticmethod def __killJob(jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status', result['Message']) result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue', result['Message']) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: badIDs.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: badIDs.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: result['FailedJobIDs'] = badIDs return result result = S_OK() result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandlerMixin: """RequestHandler implementation of the JobManager""" @classmethod def initializeHandler(cls, serviceInfoDict): """Initialization of DB objects and OptimizationMind""" try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.JobDB", "JobDB") if not result["OK"]: return result cls.jobDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.JobLoggingDB", "JobLoggingDB") if not result["OK"]: return result cls.jobLoggingDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.TaskQueueDB", "TaskQueueDB") if not result["OK"]: return result cls.taskQueueDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.PilotAgentsDB", "PilotAgentsDB") if not result["OK"]: return result cls.pilotAgentsDB = result["Value"](parentLogger=cls.log) except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) cls.pilotsLoggingDB = None enablePilotsLogging = Operations().getValue( "/Services/JobMonitoring/usePilotsLoggingFlag", False) if enablePilotsLogging: try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.PilotsLoggingDB", "PilotsLoggingDB") if not result["OK"]: return result cls.pilotsLoggingDB = result["Value"](parentLogger=cls.log) except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") result = cls.msgClient.connect(JobManager=True) if not result["OK"]: cls.log.warn("Cannot connect to OptimizationMind!", result["Message"]) return S_OK() def initializeRequest(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict["DN"] self.ownerGroup = credDict["group"] self.userProperties = credDict["properties"] self.owner = credDict["username"] self.peerUsesLimitedProxy = credDict["isLimitedProxy"] self.maxParametricJobs = self.srv_getCSOption("MaxParametricJobs", MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.jobDB = self.jobDB return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: result = self.msgClient.connect(JobManager=True) if not result["OK"]: self.log.warn("Cannot connect to OptimizationMind!", result["Message"]) return result = self.msgClient.createMessage("OptimizeJobs") if not result["OK"]: self.log.error("Cannot create Optimize message", result["Message"]) return msgObj = result["Value"] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result["OK"]: self.log.error("Cannot send Optimize message", result["Message"]) return self.log.info("Optimize msg sent", "for %s jobs" % len(jids)) ########################################################################### types_getMaxParametricJobs = [] def export_getMaxParametricJobs(self): """Get the maximum number of parametric jobs :return: S_OK()/S_ERROR() """ return S_OK(self.maxParametricJobs) types_submitJob = [str] def export_submitJob(self, jobDesc): """Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result["OK"]: return S_ERROR(EWMSSUBM, "Failed to get job policies") policyDict = result["Value"] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, "Job submission not authorized") # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result["OK"]: self.log.error("Issue with getParameterVectorLength", result["Message"]) return result nJobs = result["Value"] parametricJob = False if nJobs is not None and nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: self.log.error( "Maximum of parametric jobs exceeded:", "limit %d smaller than number of jobs %d" % (self.maxParametricJobs, nJobs), ) return S_ERROR( EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result["OK"]: return result jobDescList = result["Value"] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = JobStatus.SUBMITTING initialMinorStatus = "Bulk transaction confirmation" else: initialStatus = JobStatus.RECEIVED initialMinorStatus = "Job accepted" for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = self.jobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus, ) if not result["OK"]: return result jobID = result["JobID"] self.log.info('Job added to the JobDB", "%s for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) self.jobLoggingDB.addLoggingRecord(jobID, result["Status"], result["MinorStatus"], source="JobManager") jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if "Value" not in retVal or not retVal["Value"]: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result["JobID"] = result["Value"] result["requireProxyUpload"] = self.__checkIfProxyUploadIsRequired() # Ensure non-parametric jobs (i.e. non-bulk) get sent to optimizer immediately if not parametricJob: self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param list jobIDs: list of job IDs :return: S_OK(list)/S_ERROR() -- confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: self.log.error("Issue with __getJobList", ": invalid job specification %s" % str(jobIDs)) return S_ERROR(EWMSSUBM, "Invalid job specification: " + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR( EWMSSUBM, "Requested jobs for bulk transaction are not valid") result = self.jobDB.getJobsAttributes(jobList, ["Status", "MinorStatus"]) if not result["OK"]: return S_ERROR( EWMSSUBM, "Requested jobs for bulk transaction are not valid") js_dict = strToIntDict(result["Value"]) # Check if the jobs are already activated jobEnabledList = [ jobID for jobID in jobList if js_dict[jobID]["Status"] in [ JobStatus.RECEIVED, JobStatus.CHECKING, JobStatus.WAITING, JobStatus.MATCHED, JobStatus.RUNNING ] ] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list( jobID for jobID in jobList if js_dict[jobID]["Status"] == JobStatus.SUBMITTING) if set(jobUpdateStatusList) != set(jobList): return S_ERROR( EWMSSUBM, "Requested jobs for bulk transaction are not valid") # Update status of all the requested jobs in one transaction result = self.jobDB.setJobAttributes( jobUpdateStatusList, ["Status", "MinorStatus"], [JobStatus.RECEIVED, "Job accepted"]) if not result["OK"]: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): """Check if an upload is required :return: bool """ result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result["OK"]: self.log.error("Can't check if the user has proxy uploaded", result["Message"]) return True # Check if an upload is required return not result["Value"] ########################################################################### @staticmethod def __getJobList(jobInput): """Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, str): try: ijob = int(jobInput) return [ijob] except ValueError: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except ValueError: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param list jobIDs: list of job IDs :return: S_OK()/S_ERROR() -- confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: self.taskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = self.jobDB.rescheduleJob(jobID) self.log.debug(str(result)) if not result["OK"]: return result self.jobLoggingDB.addLoggingRecord( result["JobID"], status=result["Status"], minorStatus=result["MinorStatus"], applicationStatus="Unknown", source="JobManager", ) if invalidJobList or nonauthJobList: result = S_ERROR("Some jobs failed reschedule") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList return result result = S_OK(validJobList) result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result types_removeJob = [] def export_removeJob(self, jobIDs): """ Completely remove a list of jobs, also from TaskQueueDB, and including its JobLogging info. Only authorized users are allowed to remove jobs. :param list jobIDs: list of job IDs :return: S_OK()/S_ERROR() -- confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, _ = self.jobPolicy.evaluateJobRights( jobList, RIGHT_DELETE) count = 0 error_count = 0 if validJobList: self.log.verbose("Removing jobs", "(n=%d)" % len(validJobList)) result = self.jobDB.removeJobFromDB(validJobList) if not result["OK"]: self.log.error("Failed to remove jobs from JobDB", "(n=%d)" % len(validJobList)) else: self.log.info("Removed jobs from JobDB", "(n=%d)" % len(validJobList)) for jobID in validJobList: resultTQ = self.taskQueueDB.deleteJob(jobID) if not resultTQ["OK"]: self.log.warn("Failed to remove job from TaskQueueDB", "(%d): %s" % (jobID, resultTQ["Message"])) error_count += 1 else: count += 1 result = self.jobLoggingDB.deleteJob(validJobList) if not result["OK"]: self.log.error("Failed to remove jobs from JobLoggingDB", "(n=%d)" % len(validJobList)) else: self.log.info("Removed jobs from JobLoggingDB", "(n=%d)" % len(validJobList)) if count > 0 or error_count > 0: self.log.info( "Removed jobs from DB", "(%d jobs with %d errors)" % (count, error_count)) if invalidJobList or nonauthJobList: self.log.error( "Jobs can not be removed", ": %d invalid and %d in nonauthJobList" % (len(invalidJobList), len(nonauthJobList)), ) errMsg = "Some jobs failed removal" res = S_ERROR() if invalidJobList: self.log.debug("Invalid jobs: %s" % ",".join(str(ij) for ij in invalidJobList)) res["InvalidJobIDs"] = invalidJobList errMsg += ": invalid jobs" if nonauthJobList: self.log.debug("nonauthJobList jobs: %s" % ",".join(str(nj) for nj in nonauthJobList)) res["NonauthorizedJobIDs"] = nonauthJobList errMsg += ": non-authorized jobs" res["Message"] = errMsg return res return S_OK(validJobList) def __deleteJob(self, jobID): """Set the job status to "Deleted" and remove the pilot that ran and its logging info if the pilot is finished. :param int jobID: job ID :return: S_OK()/S_ERROR() """ result = self.jobDB.setJobStatus(jobID, JobStatus.DELETED, "Checking accounting") if not result["OK"]: return result result = self.taskQueueDB.deleteJob(jobID) if not result["OK"]: self.log.warn("Failed to delete job from the TaskQueue") # if it was the last job for the pilot, clear PilotsLogging about it result = self.pilotAgentsDB.getPilotsForJobID(jobID) if not result["OK"]: self.log.error("Failed to get Pilots for JobID", result["Message"]) return result for pilot in result["Value"]: res = self.pilotAgentsDB.getJobsForPilot(pilot) if not res["OK"]: self.log.error("Failed to get jobs for pilot", res["Message"]) return res if not res[ "Value"]: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = self.pilotAgentsDB.getPilotInfo(pilotID=pilot) if not result["OK"]: self.log.error("Failed to get pilot info", result["Message"]) return result pilotRef = result[0]["PilotJobReference"] ret = self.pilotAgentsDB.deletePilot(pilot) if not ret["OK"]: self.log.error("Failed to delete pilot from PilotAgentsDB", ret["Message"]) return ret if self.pilotsLoggingDB: ret = self.pilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret["OK"]: self.log.error( "Failed to delete pilot logging from PilotAgentsDB", ret["Message"]) return ret return S_OK() def __killJob(self, jobID, sendKillCommand=True): """Kill one job :param int jobID: job ID :param bool sendKillCommand: send kill command :return: S_OK()/S_ERROR() """ if sendKillCommand: result = self.jobDB.setJobCommand(jobID, "Kill") if not result["OK"]: return result self.log.info("Job marked for termination", jobID) result = self.jobDB.setJobStatus(jobID, JobStatus.KILLED, "Marked for termination") if not result["OK"]: self.log.warn("Failed to set job Killed status", result["Message"]) result = self.taskQueueDB.deleteJob(jobID) if not result["OK"]: self.log.warn("Failed to delete job from the TaskQueue", result["Message"]) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """Kill (== set the status to "KILLED") or delete (== set the status to "DELETED") jobs as necessary :param list jobIDList: job IDs :param str right: right :return: S_OK()/S_ERROR() """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) badIDs = [] if validJobList: # Get job status to see what is to be killed or deleted result = self.jobDB.getJobsAttributes(validJobList, ["Status"]) if not result["OK"]: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result["Value"].items(): # can be an iterator if sDict["Status"] in (JobStatus.RUNNING, JobStatus.MATCHED, JobStatus.STALLED): killJobList.append(jobID) elif sDict["Status"] in ( JobStatus.SUBMITTING, JobStatus.RECEIVED, JobStatus.CHECKING, JobStatus.WAITING, JobStatus.RESCHEDULED, JobStatus.DONE, JobStatus.FAILED, JobStatus.KILLED, ): if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict["Status"] in [JobStatus.STAGING]: stagingJobList.append(jobID) for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result["OK"]: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result["OK"]: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result["OK"]: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() self.log.info( "Going to send killing signal to stager as well!") result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result["OK"]: self.log.warn("Failed to kill some Stager tasks", result["Message"]) if nonauthJobList or badIDs: result = S_ERROR("Some jobs failed deletion") if nonauthJobList: self.log.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result["NonauthorizedJobIDs"] = nonauthJobList if badIDs: self.log.warn("JobIDs failed to be deleted", str(badIDs)) result["FailedJobIDs"] = badIDs return result result = S_OK(validJobList) result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result["InvalidJobIDs"] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """Delete jobs specified in the jobIDs list :param list jobIDs: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """Kill jobs specified in the jobIDs list :param list jobIDs: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """Reset jobs specified in the jobIDs list :param list jobIDs: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = self.jobDB.setJobAttribute(jobID, "RescheduleCounter", -1) if not result["OK"]: badIDs.append(jobID) else: self.taskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = self.jobDB.rescheduleJob(jobID) if not result["OK"]: badIDs.append(jobID) else: good_ids.append(jobID) self.jobLoggingDB.addLoggingRecord( result["JobID"], status=result["Status"], minorStatus=result["MinorStatus"], applicationStatus="Unknown", source="JobManager", ) self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR("Some jobs failed resetting") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if badIDs: result["FailedJobIDs"] = badIDs return result result = S_OK() result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
def export_getJobPageSummaryWeb(self, selectDict, sortList, startItem, maxItems, selectJobs=True): """Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate, endDate, selectDict = self.parseSelectors(selectDict) # initialize jobPolicy credDict = self.getRemoteCredentials() ownerDN = credDict["DN"] ownerGroup = credDict["group"] operations = Operations(group=ownerGroup) globalJobsInfo = operations.getValue( "/Services/JobMonitoring/GlobalJobsInfo", True) jobPolicy = JobPolicy(ownerDN, ownerGroup, globalJobsInfo) jobPolicy.jobDB = self.jobDB result = jobPolicy.getControlledUsers(RIGHT_GET_INFO) if not result["OK"]: return result if not result["Value"]: return S_ERROR( "User and group combination has no job rights (%r, %r)" % (ownerDN, ownerGroup)) if result["Value"] != "ALL": selectDict[("Owner", "OwnerGroup")] = result["Value"] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None result = self.jobDB.getCounters("Jobs", ["Status"], selectDict, newer=startDate, older=endDate, timeStamp="LastUpdateTime") if not result["OK"]: return result statusDict = {} nJobs = 0 for stDict, count in result["Value"]: nJobs += count statusDict[stDict["Status"]] = count resultDict["TotalRecords"] = nJobs if nJobs == 0: return S_OK(resultDict) resultDict["Extras"] = statusDict if selectJobs: iniJob = startItem if iniJob >= nJobs: return S_ERROR("Item number out of range") result = self.jobDB.selectJobs(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, limit=(maxItems, iniJob)) if not result["OK"]: return result summaryJobList = result["Value"] if not globalJobsInfo: validJobs, _invalidJobs, _nonauthJobs, _ownJobs = jobPolicy.evaluateJobRights( summaryJobList, RIGHT_GET_INFO) summaryJobList = validJobs result = self.getJobsAttributes(summaryJobList, SUMMARY) if not result["OK"]: return result summaryDict = result["Value"] # If no jobs can be selected after the properties check if not summaryDict: return S_OK(resultDict) # Evaluate last sign of life time for jobDict in summaryDict.values(): if not jobDict.get( "HeartBeatTime") or jobDict["HeartBeatTime"] == "None": jobDict["LastSignOfLife"] = jobDict["LastUpdateTime"] elif False: # Code kept in case this is not working, but if we update the HeartBeatTime # at each status change from the jobs it should not be needed # Items are always strings lastTime = TimeUtilities.fromString( jobDict["LastUpdateTime"]) hbTime = TimeUtilities.fromString(jobDict["HeartBeatTime"]) # Try and identify statuses not set by the job itself as too expensive to get logging info # Not only Stalled jobs but also Failed jobs because Stalled if (hbTime > lastTime or jobDict["Status"] == JobStatus.STALLED or jobDict["MinorStatus"] in ( JobMinorStatus.REQUESTS_DONE, JobMinorStatus.STALLED_PILOT_NOT_RUNNING, ) or jobDict["MinorStatus"].startswith("Stalling")): jobDict["LastSignOfLife"] = jobDict["HeartBeatTime"] else: jobDict["LastSignOfLife"] = jobDict["LastUpdateTime"] else: jobDict["LastSignOfLife"] = jobDict["HeartBeatTime"] # prepare the standard structure now # This should be faster than making a list of values() for jobDict in summaryDict.values(): paramNames = list(jobDict) break records = [ list(jobDict.values()) for jobDict in summaryDict.values() ] resultDict["ParameterNames"] = paramNames resultDict["Records"] = records return S_OK(resultDict)