def __getJobSiteRequirement( self, job, classAdJob ): """Returns any candidate sites specified by the job or sites that have been banned and could affect the scheduling decision. """ result = self.jobDB.getJobAttribute( job, 'Site' ) if not result['OK']: site = [] else: site = List.fromChar( result['Value'] ) result = S_OK() bannedSites = classAdJob.getAttributeString( 'BannedSites' ) bannedSites = bannedSites.replace( '{', '' ).replace( '}', '' ) bannedSites = List.fromChar( bannedSites ) if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site: if len( site ) == 1: self.log.info( 'Job %s has single chosen site %s specified in JDL' % ( job, site[0] ) ) result['Sites'] = site elif 'Multiple' in site: result['Sites'] = classAdJob.getListFromExpression( 'Site' ) else: result['Sites'] = [] if bannedSites: self.log.info( 'Job %s has JDL requirement to ban %s' % ( job, bannedSites ) ) result['BannedSites'] = bannedSites else: result['BannedSites'] = [] return result
def checkJob( self, job, classAdJob ): """This method controls the checking of the job. """ jobDesc = JobDescription() result = jobDesc.loadDescription( classAdJob.asJDL() ) if not result[ 'OK' ]: self.setFailedJob( job, result['Message'], classAdJob ) return result self.__syncJobDesc( job, jobDesc, classAdJob ) #Check if job defines a path itself # FIXME: only some group might be able to overwrite the jobPath jobPath = classAdJob.get_expression( 'JobPath' ).replace( '"', '' ).replace( 'Unknown', '' ) #jobPath = jobDesc.getVarWithDefault( 'JobPath' ).replace( 'Unknown', '' ) if jobPath: # HACK: Remove the { and } to ensure we have a simple string jobPath = jobPath.replace( "{", "" ).replace( "}", "" ) self.log.info( 'Job %s defines its own optimizer chain %s' % ( job, jobPath ) ) return self.processJob( job, List.fromChar( jobPath ) ) #If no path, construct based on JDL and VO path module if present path = list( self.basePath ) if self.voPlugin: argumentsDict = {'JobID':job, 'ClassAd':classAdJob, 'ConfigPath':self.am_getModuleParam( "section" )} moduleFactory = ModuleFactory() moduleInstance = moduleFactory.getModule( self.voPlugin, argumentsDict ) if not moduleInstance['OK']: self.log.error( 'Could not instantiate module:', '%s' % ( self.voPlugin ) ) self.setFailedJob( job, 'Could not instantiate module: %s' % ( self.voPlugin ), classAdJob ) return S_ERROR( 'Holding pending jobs' ) module = moduleInstance['Value'] result = module.execute() if not result['OK']: self.log.warn( 'Execution of %s failed' % ( self.voPlugin ) ) return result extraPath = List.fromChar( result['Value'] ) if extraPath: path.extend( extraPath ) self.log.verbose( 'Adding extra VO specific optimizers to path: %s' % ( extraPath ) ) else: self.log.verbose( 'No VO specific plugin module specified' ) #Should only rely on an input data setting in absence of VO plugin result = self.jobDB.getInputData( job ) if not result['OK']: self.log.error( 'Failed to get input data from JobDB', job ) self.log.warn( result['Message'] ) return result if result['Value']: # if the returned tuple is not empty it will evaluate true self.log.info( 'Job %s has an input data requirement' % ( job ) ) path.extend( self.inputData ) else: self.log.info( 'Job %s has no input data requirement' % ( job ) ) path.extend( self.endPath ) self.log.info( 'Constructed path for job %s is: %s' % ( job, path ) ) return self.processJob( job, path )
def __getJobSiteRequirement(self, job, classAdJob): """Returns any candidate sites specified by the job or sites that have been banned and could affect the scheduling decision. """ result = self.jobDB.getJobAttribute(job, 'Site') if not result['OK']: site = [] else: site = List.fromChar(result['Value']) result = S_OK() bannedSites = classAdJob.getAttributeString('BannedSite') if not bannedSites: # Just try out the legacy option variant bannedSites = classAdJob.getAttributeString('BannedSites') bannedSites = bannedSites.replace('{', '').replace('}', '') bannedSites = List.fromChar(bannedSites) groupFlag = False for s in site: if "Group" in s: groupFlag = True if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site and not groupFlag: if len(site) == 1: self.log.info( 'Job %s has single chosen site %s specified in JDL' % (job, site[0])) result['Sites'] = site elif 'Multiple' in site or groupFlag: result['Sites'] = classAdJob.getListFromExpression('Site') # We might also be here after a Staging Request where several Sites are allowed if 'ANY' in result['Sites'] or '' in result['Sites']: result['Sites'] = [] else: result['Sites'] = [] if bannedSites: self.log.info('Job %s has JDL requirement to ban %s' % (job, bannedSites)) result['BannedSites'] = bannedSites else: result['BannedSites'] = [] return result
def __getJobSiteRequirement( self, job, classAdJob ): """Returns any candidate sites specified by the job or sites that have been banned and could affect the scheduling decision. """ result = self.jobDB.getJobAttribute( job, 'Site' ) if not result['OK']: site = [] else: site = List.fromChar( result['Value'] ) result = S_OK() bannedSites = classAdJob.getAttributeString( 'BannedSite' ) if not bannedSites: # Just try out the legacy option variant bannedSites = classAdJob.getAttributeString( 'BannedSites' ) bannedSites = bannedSites.replace( '{', '' ).replace( '}', '' ) bannedSites = List.fromChar( bannedSites ) groupFlag = False for s in site: if "Group" in s: groupFlag = True if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site and not groupFlag: if len( site ) == 1: self.log.info( 'Job %s has single chosen site %s specified in JDL' % ( job, site[0] ) ) result['Sites'] = site elif 'Multiple' in site or groupFlag: result['Sites'] = classAdJob.getListFromExpression( 'Site' ) # We might also be here after a Staging Request where several Sites are allowed if 'ANY' in result['Sites'] or '' in result['Sites']: result['Sites'] = [] else: result['Sites'] = [] if bannedSites: self.log.info( 'Job %s has JDL requirement to ban %s' % ( job, bannedSites ) ) result['BannedSites'] = bannedSites else: result['BannedSites'] = [] return result
def __getJobSiteRequirement(self, job, classAdJob): """Returns any candidate sites specified by the job or sites that have been banned and could affect the scheduling decision. """ result = self.jobDB.getJobAttribute(job, "Site") if not result["OK"]: site = [] else: site = List.fromChar(result["Value"]) result = S_OK() bannedSites = classAdJob.getAttributeString("BannedSites") bannedSites = bannedSites.replace("{", "").replace("}", "") bannedSites = List.fromChar(bannedSites) groupFlag = False for s in site: if "Group" in s: groupFlag = True if not "ANY" in site and not "Unknown" in site and not "Multiple" in site and not groupFlag: if len(site) == 1: self.log.info("Job %s has single chosen site %s specified in JDL" % (job, site[0])) result["Sites"] = site elif "Multiple" in site or groupFlag: result["Sites"] = classAdJob.getListFromExpression("Site") # We might also be here after a Staging Request where several Sites are allowed if "ANY" in result["Sites"] or "" in result["Sites"]: result["Sites"] = [] else: result["Sites"] = [] if bannedSites: self.log.info("Job %s has JDL requirement to ban %s" % (job, bannedSites)) result["BannedSites"] = bannedSites else: result["BannedSites"] = [] return result
def __executeVOPlugin( self, voPlugin, jobState ): if voPlugin not in self.__voPlugins: modName = List.fromChar( voPlugin, "." )[-1] try: module = __import__( voPlugin, globals(), locals(), [ modName ] ) except ImportError, excp: self.jobLog.exception( "Could not import VO plugin %s" % voPlugin ) return S_ERROR( "Could not import VO plugin %s: %s" % ( voPlugin, excp ) ) try: self.__voPlugins[ voPlugin ] = getattr( module, modName ) except AttributeError, excp: return S_ERROR( "Could not get plugin %s from module %s: %s" % ( modName, voPlugin, str( excp ) ) )
def __getJobSiteRequirement(self, job, classAdJob): """Returns any candidate sites specified by the job or sites that have been banned and could affect the scheduling decision. """ result = self.jobDB.getJobAttribute(job, 'Site') if not result['OK']: site = [] else: site = List.fromChar(result['Value']) result = S_OK() bannedSites = classAdJob.getAttributeString('BannedSites') bannedSites = bannedSites.replace('{', '').replace('}', '') bannedSites = List.fromChar(bannedSites) if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site: if len(site) == 1: self.log.info( 'Job %s has single chosen site %s specified in JDL' % (job, site[0])) result['Sites'] = site elif 'Multiple' in site: result['Sites'] = classAdJob.getListFromExpression('Site') else: result['Sites'] = [] if bannedSites: self.log.info('Job %s has JDL requirement to ban %s' % (job, bannedSites)) result['BannedSites'] = bannedSites else: result['BannedSites'] = [] return result
def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb): """ Parse Job Submit stdout to return pilot reference """ start = time.time() self.log.verbose('Executing Job Submit for TaskQueue', taskQueueID) ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret['OK']: self.log.error('Failed to execute Job Submit:', ret['Message']) self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy) return False if ret['Value'][0] != 0: self.log.error('Error executing Job Submit:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy) return False self.log.info('Job Submit Execution Time: %.2f for TaskQueue %d' % ((time.time() - start), taskQueueID)) stdout = ret['Value'][1] stderr = ret['Value'][2] submittedPilot = None failed = 1 rb = '' for line in List.fromChar(stdout, '\n'): m = re.search("(https:\S+)", line) if (m): glite_id = m.group(1) submittedPilot = glite_id if not rb: m = re.search("https://(.+):.+", glite_id) rb = m.group(1) failed = 0 if failed: self.log.error('Job Submit returns no Reference:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) return False self.log.info('Reference %s for TaskQueue %s' % (glite_id, taskQueueID)) return glite_id, rb
def parseListMatchStdout(self, proxy, cmd, taskQueueID, rb): """ Parse List Match stdout to return list of matched CE's """ self.log.verbose('Executing List Match for TaskQueue', taskQueueID) start = time.time() ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret['OK']: self.log.error('Failed to execute List Match:', ret['Message']) self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy) return False if ret['Value'][0] != 0: self.log.error('Error executing List Match:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy) return False self.log.info('List Match Execution Time: %.2f for TaskQueue %d' % ((time.time() - start), taskQueueID)) stdout = ret['Value'][1] stderr = ret['Value'][2] availableCEs = [] # Parse std.out for line in List.fromChar(stdout, '\n'): if re.search('/jobmanager-', line) or re.search('/cream-', line): # TODO: the line has to be stripped from extra info availableCEs.append(line) if not availableCEs: self.log.info('List-Match failed to find CEs for TaskQueue', taskQueueID) self.log.info(stdout) self.log.info(stderr) else: self.log.debug('List-Match returns:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) self.log.info( 'List-Match found %s CEs for TaskQueue' % len(availableCEs), taskQueueID) self.log.verbose(', '.join(availableCEs)) return availableCEs
def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb): """ Parse Job Submit stdout to return pilot reference """ start = time.time() self.log.verbose("Executing Job Submit for TaskQueue", taskQueueID) ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret["OK"]: self.log.error("Failed to execute Job Submit:", ret["Message"]) self.__sendErrorMail(rb, "Job Submit", cmd, ret, proxy) return False if ret["Value"][0] != 0: self.log.error("Error executing Job Submit:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3])) self.__sendErrorMail(rb, "Job Submit", cmd, ret, proxy) return False self.log.info("Job Submit Execution Time: %.2f for TaskQueue %d" % ((time.time() - start), taskQueueID)) stdout = ret["Value"][1] stderr = ret["Value"][2] submittedPilot = None failed = 1 rb = "" for line in List.fromChar(stdout, "\n"): m = re.search("(https:\S+)", line) if m: glite_id = m.group(1) submittedPilot = glite_id if not rb: m = re.search("https://(.+):.+", glite_id) rb = m.group(1) failed = 0 if failed: self.log.error("Job Submit returns no Reference:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3])) return False self.log.info("Reference %s for TaskQueue %s" % (glite_id, taskQueueID)) return glite_id, rb
def parseJobSubmitStdout( self, proxy, cmd, taskQueueID, rb ): """ Parse Job Submit stdout to return pilot reference """ start = time.time() self.log.verbose( 'Executing Job Submit for TaskQueue', taskQueueID ) ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute Job Submit:', ret['Message'] ) self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing Job Submit:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy ) return False self.log.info( 'Job Submit Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) ) stdout = ret['Value'][1] stderr = ret['Value'][2] submittedPilot = None failed = 1 rb = '' for line in List.fromChar( stdout, '\n' ): m = re.search( "(https:\S+)", line ) if ( m ): glite_id = m.group( 1 ) submittedPilot = glite_id if not rb: m = re.search( "https://(.+):.+", glite_id ) rb = m.group( 1 ) failed = 0 if failed: self.log.error( 'Job Submit returns no Reference:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) return False self.log.info( 'Reference %s for TaskQueue %s' % ( glite_id, taskQueueID ) ) return glite_id, rb
def _getChildrenReferences(self, proxy, parentReference, taskQueueID): """ Get reference for all Children """ cmd = ['glite-wms-job-status', parentReference] start = time.time() self.log.verbose('Executing Job Status for TaskQueue', taskQueueID) ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret['OK']: self.log.error('Failed to execute Job Status', ret['Message']) return False if ret['Value'][0] != 0: self.log.error('Error executing Job Status:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) return False self.log.info('Job Status Execution Time: %.2f' % (time.time() - start)) stdout = ret['Value'][1] # stderr = ret['Value'][2] references = [] failed = 1 for line in List.fromChar(stdout, '\n'): match = re.search("Status info for the Job : (https:\S+)", line) if (match): glite_id = match.group(1) if glite_id not in references and glite_id != parentReference: references.append(glite_id) failed = 0 if failed: error = str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]) self.log.error('Job Status returns no Child Reference:', error) return [parentReference] return references
def parseListMatchStdout( self, proxy, cmd, taskQueueID, rb ): """ Parse List Match stdout to return list of matched CE's """ self.log.verbose( 'Executing List Match for TaskQueue', taskQueueID ) start = time.time() ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute List Match:', ret['Message'] ) self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing List Match:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy ) return False self.log.info( 'List Match Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) ) stdout = ret['Value'][1] stderr = ret['Value'][2] availableCEs = [] # Parse std.out for line in List.fromChar( stdout, '\n' ): if re.search( '/jobmanager-', line ) or re.search( '/cream-', line ): # TODO: the line has to be stripped from extra info availableCEs.append( line ) if not availableCEs: self.log.info( 'List-Match failed to find CEs for TaskQueue', taskQueueID ) self.log.info( stdout ) self.log.info( stderr ) else: self.log.debug( 'List-Match returns:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.log.info( 'List-Match found %s CEs for TaskQueue' % len( availableCEs ), taskQueueID ) self.log.verbose( ', '.join( availableCEs ) ) return availableCEs
def _getChildrenReferences(self, proxy, parentReference, taskQueueID): """ Get reference for all Children """ cmd = ["glite-wms-job-status", parentReference] start = time.time() self.log.verbose("Executing Job Status for TaskQueue", taskQueueID) ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret["OK"]: self.log.error("Failed to execute Job Status", ret["Message"]) return [] if ret["Value"][0] != 0: self.log.error("Error executing Job Status:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3])) return [] self.log.info("Job Status Execution Time: %.2f" % (time.time() - start)) stdout = ret["Value"][1] # stderr = ret['Value'][2] references = [] failed = 1 for line in List.fromChar(stdout, "\n"): match = re.search("Status info for the Job : (https:\S+)", line) if match: glite_id = match.group(1) if glite_id not in references and glite_id != parentReference: references.append(glite_id) failed = 0 if failed: error = str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]) self.log.error("Job Status returns no Child Reference:", error) return [parentReference] return references
def _getChildrenReferences( self, proxy, parentReference, taskQueueID ): """ Get reference for all Children """ cmd = [ 'glite-wms-job-status', parentReference ] start = time.time() self.log.verbose( 'Executing Job Status for TaskQueue', taskQueueID ) ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute Job Status', ret['Message'] ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing Job Status:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) return False self.log.info( 'Job Status Execution Time: %.2f' % ( time.time() - start ) ) stdout = ret['Value'][1] # stderr = ret['Value'][2] references = [] failed = 1 for line in List.fromChar( stdout, '\n' ): match = re.search( "Status info for the Job : (https:\S+)", line ) if ( match ): glite_id = match.group( 1 ) if glite_id not in references and glite_id != parentReference: references.append( glite_id ) failed = 0 if failed: error = str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) self.log.error( 'Job Status returns no Child Reference:', error ) return [parentReference] return references
def parseListMatchStdout(self, proxy, cmd, taskQueueID, rb): """ Parse List Match stdout to return list of matched CE's """ self.log.verbose("Executing List Match for TaskQueue", taskQueueID) start = time.time() ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret["OK"]: self.log.error("Failed to execute List Match:", ret["Message"]) self.__sendErrorMail(rb, "List Match", cmd, ret, proxy) return False if ret["Value"][0] != 0: self.log.error("Error executing List Match:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3])) self.__sendErrorMail(rb, "List Match", cmd, ret, proxy) return False self.log.info("List Match Execution Time: %.2f for TaskQueue %d" % ((time.time() - start), taskQueueID)) stdout = ret["Value"][1] stderr = ret["Value"][2] availableCEs = [] # Parse std.out for line in List.fromChar(stdout, "\n"): if re.search("/jobmanager-", line) or re.search("/cream-", line): # TODO: the line has to be stripped from extra info availableCEs.append(line) if not availableCEs: self.log.info("List-Match failed to find CEs for TaskQueue", taskQueueID) self.log.info(stdout) self.log.info(stderr) else: self.log.debug("List-Match returns:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3])) self.log.info("List-Match found %s CEs for TaskQueue" % len(availableCEs), taskQueueID) self.log.verbose(", ".join(availableCEs)) return availableCEs
argsDict = { 'JobID': jobState.jid, 'JobState' : jobState, 'ConfigPath':self.ex_getProperty( "section" ) } try: modInstance = self.__voPlugins[ voPlugin ]( argsDict ) result = modInstance.execute() except Exception, excp: self.jobLog.exception( "Excp while executing %s" % voPlugin ) return S_ERROR( "Could not execute VO plugin %s: %s" % ( voPlugin, excp ) ) if not result['OK']: return result extraPath = result[ 'Value' ] if type( extraPath ) in types.StringTypes: extraPath = List.fromChar( result['Value'] ) return S_OK( extraPath ) def optimizeJob( self, jid, jobState ): result = jobState.getManifest() if not result[ 'OK' ]: return result jobManifest = result[ 'Value' ] opChain = jobManifest.getOption( "JobPath", [] ) if opChain: self.jobLog.info( 'Job defines its own optimizer chain %s' % opChain ) return self.__setOptimizerChain( jobState, opChain ) #Construct path opPath = self.ex_getOption( 'BasePath', ['JobPath', 'JobSanity'] ) voPlugin = self.ex_getOption( 'VOPlugin', '' )
def getPilotStatus( self, proxy, gridType, pilotRefList ): """ Get GRID job status information using the job's owner proxy and GRID job IDs. Returns for each JobID its status in the GRID WMS and its destination CE as a tuple of 2 elements """ if gridType == 'LCG': cmd = [ 'edg-job-status' ] elif gridType == 'gLite': cmd = [ 'glite-wms-job-status' ] else: return S_ERROR() cmd.extend( pilotRefList ) start = time.time() ret = executeGridCommand( proxy, cmd, self.gridEnv ) self.log.info( '%s Job Status Execution Time for %d jobs:' % ( gridType, len( pilotRefList ) ), time.time() - start ) if not ret['OK']: self.log.error( 'Failed to execute %s Job Status' % gridType, ret['Message'] ) return S_ERROR() if ret['Value'][0] != 0: stderr = ret['Value'][2] stdout = ret['Value'][1] deleted = 0 resultDict = {} status = 'Deleted' destination = 'Unknown' deletedJobDict = { 'Status': status, 'DestinationSite': destination, 'StatusDate': Time.dateTime(), 'isChild': False, 'isParent': False, 'ParentRef': False, 'FinalStatus' : status in self.finalStateList, 'ChildRefs' : [] } # Glite returns this error for Deleted jobs to std.err for job in List.fromChar( stderr, '\nUnable to retrieve the status for:' )[1:]: pRef = List.fromChar( job, '\n' )[0].strip() resultDict[pRef] = deletedJobDict self.pilotDB.setPilotStatus( pRef, "Deleted" ) deleted += 1 # EDG returns a similar error for Deleted jobs to std.out for job in List.fromChar( stdout, '\nUnable to retrieve the status for:' )[1:]: pRef = List.fromChar( job, '\n' )[0].strip() if re.search( "No such file or directory: no matching jobs found", job ): resultDict[pRef] = deletedJobDict self.pilotDB.setPilotStatus( pRef, "Deleted" ) deleted += 1 if re.search( "edg_wll_JobStatus: Connection refused: edg_wll_ssl_connect()", job ): # the Broker is not accesible return S_ERROR( 'Broker not Available' ) if not deleted: self.log.error( 'Error executing %s Job Status:' % gridType, str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) return S_ERROR() return S_OK( resultDict ) stdout = ret['Value'][1] stderr = ret['Value'][2] resultDict = {} for job in List.fromChar( stdout, '\nStatus info for the Job :' )[1:]: pRef = List.fromChar( job, '\n' )[0].strip() resultDict[pRef] = self.__parseJobStatus( job, gridType ) return S_OK( resultDict )
def __parseJobStatus( self, job, gridType ): """ Parse output of grid pilot status command """ statusRE = 'Current Status:\s*(\w*)' destinationRE = 'Destination:\s*([\w\.-]*)' statusDateLCGRE = 'reached on:\s*....(.*)' submittedDateRE = 'Submitted:\s*....(.*)' statusFailedRE = 'Current Status:.*\(Failed\)' status = None destination = 'Unknown' statusDate = None submittedDate = None try: status = re.search( statusRE, job ).group( 1 ) if status == 'Done' and re.search( statusFailedRE, job ): status = 'Failed' if re.search( destinationRE, job ): destination = re.search( destinationRE, job ).group( 1 ) if gridType == 'LCG' and re.search( statusDateLCGRE, job ): statusDate = re.search( statusDateLCGRE, job ).group( 1 ) statusDate = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime( statusDate, '%b %d %H:%M:%S %Y' ) ) if gridType == 'gLite' and re.search( submittedDateRE, job ): submittedDate = re.search( submittedDateRE, job ).group( 1 ) submittedDate = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime( submittedDate, '%b %d %H:%M:%S %Y %Z' ) ) except: self.log.exception( 'Error parsing %s Job Status output:\n' % gridType, job ) isParent = False if re.search( 'Nodes information', job ): isParent = True isChild = False if re.search( 'Parent Job', job ): isChild = True if status == "Running": # Pilots can be in Running state for too long, due to bugs in the WMS if statusDate: statusTime = Time.fromString( statusDate ) delta = Time.dateTime() - statusTime if delta > 4 * Time.day: self.log.info( 'Setting pilot status to Deleted after 4 days in Running' ) status = "Deleted" statusDate = statusTime + 4 * Time.day elif submittedDate: statusTime = Time.fromString( submittedDate ) delta = Time.dateTime() - statusTime if delta > 7 * Time.day: self.log.info( 'Setting pilot status to Deleted more than 7 days after submission still in Running' ) status = "Deleted" statusDate = statusTime + 7 * Time.day childRefs = [] childDicts = {} if isParent: for subjob in List.fromChar( job, ' Status info for the Job :' )[1:]: chRef = List.fromChar( subjob, '\n' )[0].strip() childDict = self.__parseJobStatus( subjob, gridType ) childRefs.append( chRef ) childDicts[chRef] = childDict return { 'Status': status, 'DestinationSite': destination, 'StatusDate': statusDate, 'isChild': isChild, 'isParent': isParent, 'ParentRef': False, 'FinalStatus' : status in self.finalStateList, 'ChildRefs' : childRefs, 'ChildDicts' : childDicts }
'JobState': jobState, 'ConfigPath': self.ex_getProperty("section") } try: modInstance = self.__voPlugins[voPlugin](argsDict) result = modInstance.execute() except Exception, excp: self.jobLog.exception("Excp while executing %s" % voPlugin) return S_ERROR("Could not execute VO plugin %s: %s" % (voPlugin, excp)) if not result['OK']: return result extraPath = result['Value'] if type(extraPath) in types.StringTypes: extraPath = List.fromChar(result['Value']) return S_OK(extraPath) def optimizeJob(self, jid, jobState): result = jobState.getManifest() if not result['OK']: return result jobManifest = result['Value'] opChain = jobManifest.getOption("JobPath", []) if opChain: self.jobLog.info('Job defines its own optimizer chain %s' % opChain) return self.__setOptimizerChain(jobState, opChain) #Construct path opPath = self.ex_getOption('BasePath', ['JobPath', 'JobSanity']) voPlugin = self.ex_getOption('VOPlugin', '')
def checkJob(self, job, classAdJob): """This method controls the checking of the job. """ jobDesc = JobDescription() result = jobDesc.loadDescription(classAdJob.asJDL()) if not result['OK']: self.setFailedJob(job, result['Message'], classAdJob) return result self.__syncJobDesc(job, jobDesc, classAdJob) #Check if job defines a path itself # FIXME: only some group might be able to overwrite the jobPath jobPath = classAdJob.get_expression('JobPath').replace( '"', '').replace('Unknown', '') #jobPath = jobDesc.getVarWithDefault( 'JobPath' ).replace( 'Unknown', '' ) if jobPath: # HACK: Remove the { and } to ensure we have a simple string jobPath = jobPath.replace("{", "").replace("}", "") self.log.info('Job %s defines its own optimizer chain %s' % (job, jobPath)) return self.processJob(job, List.fromChar(jobPath)) #If no path, construct based on JDL and VO path module if present path = list(self.basePath) if self.voPlugin: argumentsDict = { 'JobID': job, 'ClassAd': classAdJob, 'ConfigPath': self.am_getModuleParam("section") } moduleFactory = ModuleFactory() moduleInstance = moduleFactory.getModule(self.voPlugin, argumentsDict) if not moduleInstance['OK']: self.log.error('Could not instantiate module:', '%s' % (self.voPlugin)) self.setFailedJob( job, 'Could not instantiate module: %s' % (self.voPlugin), classAdJob) return S_ERROR('Holding pending jobs') module = moduleInstance['Value'] result = module.execute() if not result['OK']: self.log.warn('Execution of %s failed' % (self.voPlugin)) return result extraPath = List.fromChar(result['Value']) if extraPath: path.extend(extraPath) self.log.verbose( 'Adding extra VO specific optimizers to path: %s' % (extraPath)) else: self.log.verbose('No VO specific plugin module specified') #Should only rely on an input data setting in absence of VO plugin result = self.jobDB.getInputData(job) if not result['OK']: self.log.error('Failed to get input data from JobDB', job) self.log.warn(result['Message']) return result if result['Value']: # if the returned tuple is not empty it will evaluate true self.log.info('Job %s has an input data requirement' % (job)) path.extend(self.inputData) else: self.log.info('Job %s has no input data requirement' % (job)) path.extend(self.endPath) self.log.info('Constructed path for job %s is: %s' % (job, path)) return self.processJob(job, path)