예제 #1
0
  def __getJobSiteRequirement( self, job, classAdJob ):
    """Returns any candidate sites specified by the job or sites that have been
       banned and could affect the scheduling decision.
    """

    result = self.jobDB.getJobAttribute( job, 'Site' )
    if not result['OK']:
      site = []
    else:
      site = List.fromChar( result['Value'] )

    result = S_OK()

    bannedSites = classAdJob.getAttributeString( 'BannedSites' )
    bannedSites = bannedSites.replace( '{', '' ).replace( '}', '' )
    bannedSites = List.fromChar( bannedSites )

    if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site:
      if len( site ) == 1:
        self.log.info( 'Job %s has single chosen site %s specified in JDL' % ( job, site[0] ) )
      result['Sites'] = site
    elif 'Multiple' in site:
      result['Sites'] = classAdJob.getListFromExpression( 'Site' )
    else:
      result['Sites'] = []

    if bannedSites:
      self.log.info( 'Job %s has JDL requirement to ban %s' % ( job, bannedSites ) )
      result['BannedSites'] = bannedSites
    else:
      result['BannedSites'] = []

    return result
예제 #2
0
  def checkJob( self, job, classAdJob ):
    """This method controls the checking of the job.
    """
    jobDesc = JobDescription()
    result = jobDesc.loadDescription( classAdJob.asJDL() )
    if not result[ 'OK' ]:
      self.setFailedJob( job, result['Message'], classAdJob )
      return result
    self.__syncJobDesc( job, jobDesc, classAdJob )

    #Check if job defines a path itself
    # FIXME: only some group might be able to overwrite the jobPath
    jobPath = classAdJob.get_expression( 'JobPath' ).replace( '"', '' ).replace( 'Unknown', '' )
    #jobPath = jobDesc.getVarWithDefault( 'JobPath' ).replace( 'Unknown', '' )
    if jobPath:
      # HACK: Remove the { and } to ensure we have a simple string
      jobPath = jobPath.replace( "{", "" ).replace( "}", "" )
      self.log.info( 'Job %s defines its own optimizer chain %s' % ( job, jobPath ) )
      return self.processJob( job, List.fromChar( jobPath ) )

    #If no path, construct based on JDL and VO path module if present
    path = list( self.basePath )
    if self.voPlugin:
      argumentsDict = {'JobID':job, 'ClassAd':classAdJob, 'ConfigPath':self.am_getModuleParam( "section" )}
      moduleFactory = ModuleFactory()
      moduleInstance = moduleFactory.getModule( self.voPlugin, argumentsDict )
      if not moduleInstance['OK']:
        self.log.error( 'Could not instantiate module:', '%s' % ( self.voPlugin ) )
        self.setFailedJob( job, 'Could not instantiate module: %s' % ( self.voPlugin ), classAdJob )
        return S_ERROR( 'Holding pending jobs' )

      module = moduleInstance['Value']
      result = module.execute()
      if not result['OK']:
        self.log.warn( 'Execution of %s failed' % ( self.voPlugin ) )
        return result
      extraPath = List.fromChar( result['Value'] )
      if extraPath:
        path.extend( extraPath )
        self.log.verbose( 'Adding extra VO specific optimizers to path: %s' % ( extraPath ) )
    else:
      self.log.verbose( 'No VO specific plugin module specified' )
      #Should only rely on an input data setting in absence of VO plugin
      result = self.jobDB.getInputData( job )
      if not result['OK']:
        self.log.error( 'Failed to get input data from JobDB', job )
        self.log.warn( result['Message'] )
        return result

      if result['Value']:
        # if the returned tuple is not empty it will evaluate true
        self.log.info( 'Job %s has an input data requirement' % ( job ) )
        path.extend( self.inputData )
      else:
        self.log.info( 'Job %s has no input data requirement' % ( job ) )

    path.extend( self.endPath )
    self.log.info( 'Constructed path for job %s is: %s' % ( job, path ) )
    return self.processJob( job, path )
예제 #3
0
    def __getJobSiteRequirement(self, job, classAdJob):
        """Returns any candidate sites specified by the job or sites that have been
       banned and could affect the scheduling decision.
    """

        result = self.jobDB.getJobAttribute(job, 'Site')
        if not result['OK']:
            site = []
        else:
            site = List.fromChar(result['Value'])

        result = S_OK()

        bannedSites = classAdJob.getAttributeString('BannedSite')
        if not bannedSites:
            # Just try out the legacy option variant
            bannedSites = classAdJob.getAttributeString('BannedSites')
        bannedSites = bannedSites.replace('{', '').replace('}', '')
        bannedSites = List.fromChar(bannedSites)

        groupFlag = False
        for s in site:
            if "Group" in s:
                groupFlag = True

        if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site and not groupFlag:
            if len(site) == 1:
                self.log.info(
                    'Job %s has single chosen site %s specified in JDL' %
                    (job, site[0]))
            result['Sites'] = site
        elif 'Multiple' in site or groupFlag:
            result['Sites'] = classAdJob.getListFromExpression('Site')
            # We might also be here after a Staging Request where several Sites are allowed
            if 'ANY' in result['Sites'] or '' in result['Sites']:
                result['Sites'] = []
        else:
            result['Sites'] = []

        if bannedSites:
            self.log.info('Job %s has JDL requirement to ban %s' %
                          (job, bannedSites))
            result['BannedSites'] = bannedSites
        else:
            result['BannedSites'] = []

        return result
예제 #4
0
  def __getJobSiteRequirement( self, job, classAdJob ):
    """Returns any candidate sites specified by the job or sites that have been
       banned and could affect the scheduling decision.
    """

    result = self.jobDB.getJobAttribute( job, 'Site' )
    if not result['OK']:
      site = []
    else:
      site = List.fromChar( result['Value'] )

    result = S_OK()

    bannedSites = classAdJob.getAttributeString( 'BannedSite' )
    if not bannedSites:
      # Just try out the legacy option variant
      bannedSites = classAdJob.getAttributeString( 'BannedSites' )
    bannedSites = bannedSites.replace( '{', '' ).replace( '}', '' )
    bannedSites = List.fromChar( bannedSites )

    groupFlag = False
    for s in site:
      if "Group" in s:
        groupFlag = True

    if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site and not groupFlag:
      if len( site ) == 1:
        self.log.info( 'Job %s has single chosen site %s specified in JDL' % ( job, site[0] ) )
      result['Sites'] = site
    elif 'Multiple' in site or groupFlag:
      result['Sites'] = classAdJob.getListFromExpression( 'Site' )
      # We might also be here after a Staging Request where several Sites are allowed
      if 'ANY' in result['Sites'] or '' in result['Sites']:
        result['Sites'] = []
    else:
      result['Sites'] = []

    if bannedSites:
      self.log.info( 'Job %s has JDL requirement to ban %s' % ( job, bannedSites ) )
      result['BannedSites'] = bannedSites
    else:
      result['BannedSites'] = []

    return result
예제 #5
0
    def __getJobSiteRequirement(self, job, classAdJob):
        """Returns any candidate sites specified by the job or sites that have been
       banned and could affect the scheduling decision.
    """

        result = self.jobDB.getJobAttribute(job, "Site")
        if not result["OK"]:
            site = []
        else:
            site = List.fromChar(result["Value"])

        result = S_OK()

        bannedSites = classAdJob.getAttributeString("BannedSites")
        bannedSites = bannedSites.replace("{", "").replace("}", "")
        bannedSites = List.fromChar(bannedSites)

        groupFlag = False
        for s in site:
            if "Group" in s:
                groupFlag = True

        if not "ANY" in site and not "Unknown" in site and not "Multiple" in site and not groupFlag:
            if len(site) == 1:
                self.log.info("Job %s has single chosen site %s specified in JDL" % (job, site[0]))
            result["Sites"] = site
        elif "Multiple" in site or groupFlag:
            result["Sites"] = classAdJob.getListFromExpression("Site")
            # We might also be here after a Staging Request where several Sites are allowed
            if "ANY" in result["Sites"] or "" in result["Sites"]:
                result["Sites"] = []
        else:
            result["Sites"] = []

        if bannedSites:
            self.log.info("Job %s has JDL requirement to ban %s" % (job, bannedSites))
            result["BannedSites"] = bannedSites
        else:
            result["BannedSites"] = []

        return result
예제 #6
0
파일: JobPath.py 프로젝트: sbel/bes3-jinr
  def __executeVOPlugin( self, voPlugin, jobState ):
    if voPlugin not in self.__voPlugins:
      modName = List.fromChar( voPlugin, "." )[-1]
      try:
        module = __import__( voPlugin, globals(), locals(), [ modName ] )
      except ImportError, excp:
        self.jobLog.exception( "Could not import VO plugin %s" % voPlugin )
        return S_ERROR( "Could not import VO plugin %s: %s" % ( voPlugin, excp ) )

      try:
        self.__voPlugins[ voPlugin ] = getattr( module, modName )
      except AttributeError, excp:
        return S_ERROR( "Could not get plugin %s from module %s: %s" % ( modName, voPlugin, str( excp ) ) )
예제 #7
0
  def __executeVOPlugin( self, voPlugin, jobState ):
    if voPlugin not in self.__voPlugins:
      modName = List.fromChar( voPlugin, "." )[-1]
      try:
        module = __import__( voPlugin, globals(), locals(), [ modName ] )
      except ImportError, excp:
        self.jobLog.exception( "Could not import VO plugin %s" % voPlugin )
        return S_ERROR( "Could not import VO plugin %s: %s" % ( voPlugin, excp ) )

      try:
        self.__voPlugins[ voPlugin ] = getattr( module, modName )
      except AttributeError, excp:
        return S_ERROR( "Could not get plugin %s from module %s: %s" % ( modName, voPlugin, str( excp ) ) )
예제 #8
0
    def __getJobSiteRequirement(self, job, classAdJob):
        """Returns any candidate sites specified by the job or sites that have been
       banned and could affect the scheduling decision.
    """

        result = self.jobDB.getJobAttribute(job, 'Site')
        if not result['OK']:
            site = []
        else:
            site = List.fromChar(result['Value'])

        result = S_OK()

        bannedSites = classAdJob.getAttributeString('BannedSites')
        bannedSites = bannedSites.replace('{', '').replace('}', '')
        bannedSites = List.fromChar(bannedSites)

        if not 'ANY' in site and not 'Unknown' in site and not 'Multiple' in site:
            if len(site) == 1:
                self.log.info(
                    'Job %s has single chosen site %s specified in JDL' %
                    (job, site[0]))
            result['Sites'] = site
        elif 'Multiple' in site:
            result['Sites'] = classAdJob.getListFromExpression('Site')
        else:
            result['Sites'] = []

        if bannedSites:
            self.log.info('Job %s has JDL requirement to ban %s' %
                          (job, bannedSites))
            result['BannedSites'] = bannedSites
        else:
            result['BannedSites'] = []

        return result
예제 #9
0
    def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse Job Submit stdout to return pilot reference
    """
        start = time.time()
        self.log.verbose('Executing Job Submit for TaskQueue', taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret['OK']:
            self.log.error('Failed to execute Job Submit:', ret['Message'])
            self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy)
            return False
        if ret['Value'][0] != 0:
            self.log.error('Error executing Job Submit:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy)
            return False
        self.log.info('Job Submit Execution Time: %.2f for TaskQueue %d' %
                      ((time.time() - start), taskQueueID))

        stdout = ret['Value'][1]
        stderr = ret['Value'][2]

        submittedPilot = None

        failed = 1
        rb = ''
        for line in List.fromChar(stdout, '\n'):
            m = re.search("(https:\S+)", line)
            if (m):
                glite_id = m.group(1)
                submittedPilot = glite_id
                if not rb:
                    m = re.search("https://(.+):.+", glite_id)
                    rb = m.group(1)
                failed = 0
        if failed:
            self.log.error('Job Submit returns no Reference:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            return False

        self.log.info('Reference %s for TaskQueue %s' %
                      (glite_id, taskQueueID))

        return glite_id, rb
예제 #10
0
    def parseListMatchStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse List Match stdout to return list of matched CE's
    """
        self.log.verbose('Executing List Match for TaskQueue', taskQueueID)

        start = time.time()
        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret['OK']:
            self.log.error('Failed to execute List Match:', ret['Message'])
            self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy)
            return False
        if ret['Value'][0] != 0:
            self.log.error('Error executing List Match:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy)
            return False
        self.log.info('List Match Execution Time: %.2f for TaskQueue %d' %
                      ((time.time() - start), taskQueueID))

        stdout = ret['Value'][1]
        stderr = ret['Value'][2]
        availableCEs = []
        # Parse std.out
        for line in List.fromChar(stdout, '\n'):
            if re.search('/jobmanager-', line) or re.search('/cream-', line):
                # TODO: the line has to be stripped from extra info
                availableCEs.append(line)

        if not availableCEs:
            self.log.info('List-Match failed to find CEs for TaskQueue',
                          taskQueueID)
            self.log.info(stdout)
            self.log.info(stderr)
        else:
            self.log.debug('List-Match returns:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            self.log.info(
                'List-Match found %s CEs for TaskQueue' % len(availableCEs),
                taskQueueID)
            self.log.verbose(', '.join(availableCEs))

        return availableCEs
예제 #11
0
    def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse Job Submit stdout to return pilot reference
    """
        start = time.time()
        self.log.verbose("Executing Job Submit for TaskQueue", taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret["OK"]:
            self.log.error("Failed to execute Job Submit:", ret["Message"])
            self.__sendErrorMail(rb, "Job Submit", cmd, ret, proxy)
            return False
        if ret["Value"][0] != 0:
            self.log.error("Error executing Job Submit:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            self.__sendErrorMail(rb, "Job Submit", cmd, ret, proxy)
            return False
        self.log.info("Job Submit Execution Time: %.2f for TaskQueue %d" % ((time.time() - start), taskQueueID))

        stdout = ret["Value"][1]
        stderr = ret["Value"][2]

        submittedPilot = None

        failed = 1
        rb = ""
        for line in List.fromChar(stdout, "\n"):
            m = re.search("(https:\S+)", line)
            if m:
                glite_id = m.group(1)
                submittedPilot = glite_id
                if not rb:
                    m = re.search("https://(.+):.+", glite_id)
                    rb = m.group(1)
                failed = 0
        if failed:
            self.log.error("Job Submit returns no Reference:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            return False

        self.log.info("Reference %s for TaskQueue %s" % (glite_id, taskQueueID))

        return glite_id, rb
예제 #12
0
  def parseJobSubmitStdout( self, proxy, cmd, taskQueueID, rb ):
    """
      Parse Job Submit stdout to return pilot reference
    """
    start = time.time()
    self.log.verbose( 'Executing Job Submit for TaskQueue', taskQueueID )

    ret = executeGridCommand( proxy, cmd, self.gridEnv )

    if not ret['OK']:
      self.log.error( 'Failed to execute Job Submit:', ret['Message'] )
      self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy )
      return False
    if ret['Value'][0] != 0:
      self.log.error( 'Error executing Job Submit:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy )
      return False
    self.log.info( 'Job Submit Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) )

    stdout = ret['Value'][1]
    stderr = ret['Value'][2]

    submittedPilot = None

    failed = 1
    rb = ''
    for line in List.fromChar( stdout, '\n' ):
      m = re.search( "(https:\S+)", line )
      if ( m ):
        glite_id = m.group( 1 )
        submittedPilot = glite_id
        if not rb:
          m = re.search( "https://(.+):.+", glite_id )
          rb = m.group( 1 )
        failed = 0
    if failed:
      self.log.error( 'Job Submit returns no Reference:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      return False

    self.log.info( 'Reference %s for TaskQueue %s' % ( glite_id, taskQueueID ) )

    return glite_id, rb
예제 #13
0
    def _getChildrenReferences(self, proxy, parentReference, taskQueueID):
        """
     Get reference for all Children
    """
        cmd = ['glite-wms-job-status', parentReference]

        start = time.time()
        self.log.verbose('Executing Job Status for TaskQueue', taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret['OK']:
            self.log.error('Failed to execute Job Status', ret['Message'])
            return False
        if ret['Value'][0] != 0:
            self.log.error('Error executing Job Status:',
                           str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3]))
            return False
        self.log.info('Job Status Execution Time: %.2f' %
                      (time.time() - start))

        stdout = ret['Value'][1]
        # stderr = ret['Value'][2]

        references = []

        failed = 1
        for line in List.fromChar(stdout, '\n'):
            match = re.search("Status info for the Job : (https:\S+)", line)
            if (match):
                glite_id = match.group(1)
                if glite_id not in references and glite_id != parentReference:
                    references.append(glite_id)
                failed = 0
        if failed:
            error = str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])
            self.log.error('Job Status returns no Child Reference:', error)
            return [parentReference]

        return references
예제 #14
0
  def parseListMatchStdout( self, proxy, cmd, taskQueueID, rb ):
    """
      Parse List Match stdout to return list of matched CE's
    """
    self.log.verbose( 'Executing List Match for TaskQueue', taskQueueID )

    start = time.time()
    ret = executeGridCommand( proxy, cmd, self.gridEnv )

    if not ret['OK']:
      self.log.error( 'Failed to execute List Match:', ret['Message'] )
      self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy )
      return False
    if ret['Value'][0] != 0:
      self.log.error( 'Error executing List Match:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy )
      return False
    self.log.info( 'List Match Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) )

    stdout = ret['Value'][1]
    stderr = ret['Value'][2]
    availableCEs = []
    # Parse std.out
    for line in List.fromChar( stdout, '\n' ):
      if re.search( '/jobmanager-', line ) or re.search( '/cream-', line ):
        # TODO: the line has to be stripped from extra info
        availableCEs.append( line )

    if not availableCEs:
      self.log.info( 'List-Match failed to find CEs for TaskQueue', taskQueueID )
      self.log.info( stdout )
      self.log.info( stderr )
    else:
      self.log.debug( 'List-Match returns:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      self.log.info( 'List-Match found %s CEs for TaskQueue' % len( availableCEs ), taskQueueID )
      self.log.verbose( ', '.join( availableCEs ) )


    return availableCEs
예제 #15
0
    def _getChildrenReferences(self, proxy, parentReference, taskQueueID):
        """
     Get reference for all Children
    """
        cmd = ["glite-wms-job-status", parentReference]

        start = time.time()
        self.log.verbose("Executing Job Status for TaskQueue", taskQueueID)

        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret["OK"]:
            self.log.error("Failed to execute Job Status", ret["Message"])
            return []
        if ret["Value"][0] != 0:
            self.log.error("Error executing Job Status:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            return []
        self.log.info("Job Status Execution Time: %.2f" % (time.time() - start))

        stdout = ret["Value"][1]
        # stderr = ret['Value'][2]

        references = []

        failed = 1
        for line in List.fromChar(stdout, "\n"):
            match = re.search("Status info for the Job : (https:\S+)", line)
            if match:
                glite_id = match.group(1)
                if glite_id not in references and glite_id != parentReference:
                    references.append(glite_id)
                failed = 0
        if failed:
            error = str(ret["Value"][0]) + "\n".join(ret["Value"][1:3])
            self.log.error("Job Status returns no Child Reference:", error)
            return [parentReference]

        return references
예제 #16
0
  def _getChildrenReferences( self, proxy, parentReference, taskQueueID ):
    """
     Get reference for all Children
    """
    cmd = [ 'glite-wms-job-status', parentReference ]

    start = time.time()
    self.log.verbose( 'Executing Job Status for TaskQueue', taskQueueID )

    ret = executeGridCommand( proxy, cmd, self.gridEnv )

    if not ret['OK']:
      self.log.error( 'Failed to execute Job Status', ret['Message'] )
      return False
    if ret['Value'][0] != 0:
      self.log.error( 'Error executing Job Status:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
      return False
    self.log.info( 'Job Status Execution Time: %.2f' % ( time.time() - start ) )

    stdout = ret['Value'][1]
    # stderr = ret['Value'][2]

    references = []

    failed = 1
    for line in List.fromChar( stdout, '\n' ):
      match = re.search( "Status info for the Job : (https:\S+)", line )
      if ( match ):
        glite_id = match.group( 1 )
        if glite_id not in references and glite_id != parentReference:
          references.append( glite_id )
        failed = 0
    if failed:
      error = str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] )
      self.log.error( 'Job Status returns no Child Reference:', error )
      return [parentReference]

    return references
예제 #17
0
    def parseListMatchStdout(self, proxy, cmd, taskQueueID, rb):
        """
      Parse List Match stdout to return list of matched CE's
    """
        self.log.verbose("Executing List Match for TaskQueue", taskQueueID)

        start = time.time()
        ret = executeGridCommand(proxy, cmd, self.gridEnv)

        if not ret["OK"]:
            self.log.error("Failed to execute List Match:", ret["Message"])
            self.__sendErrorMail(rb, "List Match", cmd, ret, proxy)
            return False
        if ret["Value"][0] != 0:
            self.log.error("Error executing List Match:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            self.__sendErrorMail(rb, "List Match", cmd, ret, proxy)
            return False
        self.log.info("List Match Execution Time: %.2f for TaskQueue %d" % ((time.time() - start), taskQueueID))

        stdout = ret["Value"][1]
        stderr = ret["Value"][2]
        availableCEs = []
        # Parse std.out
        for line in List.fromChar(stdout, "\n"):
            if re.search("/jobmanager-", line) or re.search("/cream-", line):
                # TODO: the line has to be stripped from extra info
                availableCEs.append(line)

        if not availableCEs:
            self.log.info("List-Match failed to find CEs for TaskQueue", taskQueueID)
            self.log.info(stdout)
            self.log.info(stderr)
        else:
            self.log.debug("List-Match returns:", str(ret["Value"][0]) + "\n".join(ret["Value"][1:3]))
            self.log.info("List-Match found %s CEs for TaskQueue" % len(availableCEs), taskQueueID)
            self.log.verbose(", ".join(availableCEs))

        return availableCEs
예제 #18
0
파일: JobPath.py 프로젝트: sbel/bes3-jinr
    argsDict = { 'JobID': jobState.jid,
                 'JobState' : jobState,
                 'ConfigPath':self.ex_getProperty( "section" ) }
    try:
      modInstance = self.__voPlugins[ voPlugin ]( argsDict )
      result = modInstance.execute()
    except Exception, excp:
      self.jobLog.exception( "Excp while executing %s" % voPlugin )
      return S_ERROR( "Could not execute VO plugin %s: %s" % ( voPlugin, excp ) )

    if not result['OK']:
      return result
    extraPath = result[ 'Value' ]
    if type( extraPath ) in types.StringTypes:
      extraPath = List.fromChar( result['Value'] )
    return S_OK( extraPath )


  def optimizeJob( self, jid, jobState ):
    result = jobState.getManifest()
    if not result[ 'OK' ]:
      return result
    jobManifest = result[ 'Value' ]
    opChain = jobManifest.getOption( "JobPath", [] )
    if opChain:
      self.jobLog.info( 'Job defines its own optimizer chain %s' % opChain )
      return self.__setOptimizerChain( jobState, opChain )
    #Construct path
    opPath = self.ex_getOption( 'BasePath', ['JobPath', 'JobSanity'] )
    voPlugin = self.ex_getOption( 'VOPlugin', '' )
예제 #19
0
  def getPilotStatus( self, proxy, gridType, pilotRefList ):
    """ Get GRID job status information using the job's owner proxy and
        GRID job IDs. Returns for each JobID its status in the GRID WMS and
        its destination CE as a tuple of 2 elements
    """

    if gridType == 'LCG':
      cmd = [ 'edg-job-status' ]
    elif gridType == 'gLite':
      cmd = [ 'glite-wms-job-status' ]
    else:
      return S_ERROR()
    cmd.extend( pilotRefList )

    start = time.time()
    ret = executeGridCommand( proxy, cmd, self.gridEnv )
    self.log.info( '%s Job Status Execution Time for %d jobs:' %
                   ( gridType, len( pilotRefList ) ), time.time() - start )

    if not ret['OK']:
      self.log.error( 'Failed to execute %s Job Status' % gridType, ret['Message'] )
      return S_ERROR()
    if ret['Value'][0] != 0:
      stderr = ret['Value'][2]
      stdout = ret['Value'][1]
      deleted = 0
      resultDict = {}
      status = 'Deleted'
      destination = 'Unknown'
      deletedJobDict = { 'Status': status,
             'DestinationSite': destination,
             'StatusDate': Time.dateTime(),
             'isChild': False,
             'isParent': False,
             'ParentRef': False,
             'FinalStatus' : status in self.finalStateList,
             'ChildRefs' : [] }
      # Glite returns this error for Deleted jobs to std.err
      for job in List.fromChar( stderr, '\nUnable to retrieve the status for:' )[1:]:
        pRef = List.fromChar( job, '\n' )[0].strip()
        resultDict[pRef] = deletedJobDict
        self.pilotDB.setPilotStatus( pRef, "Deleted" )
        deleted += 1
      # EDG returns a similar error for Deleted jobs to std.out
      for job in List.fromChar( stdout, '\nUnable to retrieve the status for:' )[1:]:
        pRef = List.fromChar( job, '\n' )[0].strip()
        if re.search( "No such file or directory: no matching jobs found", job ):
          resultDict[pRef] = deletedJobDict
          self.pilotDB.setPilotStatus( pRef, "Deleted" )
          deleted += 1
        if re.search( "edg_wll_JobStatus: Connection refused: edg_wll_ssl_connect()", job ):
          # the Broker is not accesible
          return S_ERROR( 'Broker not Available' )
      if not deleted:
        self.log.error( 'Error executing %s Job Status:' %
                        gridType, str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) )
        return S_ERROR()
      return S_OK( resultDict )

    stdout = ret['Value'][1]
    stderr = ret['Value'][2]
    resultDict = {}
    for job in List.fromChar( stdout, '\nStatus info for the Job :' )[1:]:
      pRef = List.fromChar( job, '\n' )[0].strip()
      resultDict[pRef] = self.__parseJobStatus( job, gridType )

    return S_OK( resultDict )
예제 #20
0
  def __parseJobStatus( self, job, gridType ):
    """ Parse output of grid pilot status command
    """

    statusRE = 'Current Status:\s*(\w*)'
    destinationRE = 'Destination:\s*([\w\.-]*)'
    statusDateLCGRE = 'reached on:\s*....(.*)'
    submittedDateRE = 'Submitted:\s*....(.*)'
    statusFailedRE = 'Current Status:.*\(Failed\)'

    status = None
    destination = 'Unknown'
    statusDate = None
    submittedDate = None

    try:
      status = re.search( statusRE, job ).group( 1 )
      if status == 'Done' and re.search( statusFailedRE, job ):
        status = 'Failed'
      if re.search( destinationRE, job ):
        destination = re.search( destinationRE, job ).group( 1 )
      if gridType == 'LCG' and re.search( statusDateLCGRE, job ):
        statusDate = re.search( statusDateLCGRE, job ).group( 1 )
        statusDate = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime( statusDate, '%b %d %H:%M:%S %Y' ) )
      if gridType == 'gLite' and re.search( submittedDateRE, job ):
        submittedDate = re.search( submittedDateRE, job ).group( 1 )
        submittedDate = time.strftime( '%Y-%m-%d %H:%M:%S', time.strptime( submittedDate, '%b %d %H:%M:%S %Y %Z' ) )
    except:
      self.log.exception( 'Error parsing %s Job Status output:\n' % gridType, job )

    isParent = False
    if re.search( 'Nodes information', job ):
      isParent = True
    isChild = False
    if re.search( 'Parent Job', job ):
      isChild = True

    if status == "Running":
      # Pilots can be in Running state for too long, due to bugs in the WMS
      if statusDate:
        statusTime = Time.fromString( statusDate )
        delta = Time.dateTime() - statusTime
        if delta > 4 * Time.day:
          self.log.info( 'Setting pilot status to Deleted after 4 days in Running' )
          status = "Deleted"
          statusDate = statusTime + 4 * Time.day
      elif submittedDate:
        statusTime = Time.fromString( submittedDate )
        delta = Time.dateTime() - statusTime
        if delta > 7 * Time.day:
          self.log.info( 'Setting pilot status to Deleted more than 7 days after submission still in Running' )
          status = "Deleted"
          statusDate = statusTime + 7 * Time.day

    childRefs = []
    childDicts = {}
    if isParent:
      for subjob in List.fromChar( job, ' Status info for the Job :' )[1:]:
        chRef = List.fromChar( subjob, '\n' )[0].strip()
        childDict = self.__parseJobStatus( subjob, gridType )
        childRefs.append( chRef )
        childDicts[chRef] = childDict

    return { 'Status': status,
             'DestinationSite': destination,
             'StatusDate': statusDate,
             'isChild': isChild,
             'isParent': isParent,
             'ParentRef': False,
             'FinalStatus' : status in self.finalStateList,
             'ChildRefs' : childRefs,
             'ChildDicts' : childDicts }
예제 #21
0
            'JobState': jobState,
            'ConfigPath': self.ex_getProperty("section")
        }
        try:
            modInstance = self.__voPlugins[voPlugin](argsDict)
            result = modInstance.execute()
        except Exception, excp:
            self.jobLog.exception("Excp while executing %s" % voPlugin)
            return S_ERROR("Could not execute VO plugin %s: %s" %
                           (voPlugin, excp))

        if not result['OK']:
            return result
        extraPath = result['Value']
        if type(extraPath) in types.StringTypes:
            extraPath = List.fromChar(result['Value'])
        return S_OK(extraPath)

    def optimizeJob(self, jid, jobState):
        result = jobState.getManifest()
        if not result['OK']:
            return result
        jobManifest = result['Value']
        opChain = jobManifest.getOption("JobPath", [])
        if opChain:
            self.jobLog.info('Job defines its own optimizer chain %s' %
                             opChain)
            return self.__setOptimizerChain(jobState, opChain)
        #Construct path
        opPath = self.ex_getOption('BasePath', ['JobPath', 'JobSanity'])
        voPlugin = self.ex_getOption('VOPlugin', '')
예제 #22
0
    def checkJob(self, job, classAdJob):
        """This method controls the checking of the job.
    """
        jobDesc = JobDescription()
        result = jobDesc.loadDescription(classAdJob.asJDL())
        if not result['OK']:
            self.setFailedJob(job, result['Message'], classAdJob)
            return result
        self.__syncJobDesc(job, jobDesc, classAdJob)

        #Check if job defines a path itself
        # FIXME: only some group might be able to overwrite the jobPath
        jobPath = classAdJob.get_expression('JobPath').replace(
            '"', '').replace('Unknown', '')
        #jobPath = jobDesc.getVarWithDefault( 'JobPath' ).replace( 'Unknown', '' )
        if jobPath:
            # HACK: Remove the { and } to ensure we have a simple string
            jobPath = jobPath.replace("{", "").replace("}", "")
            self.log.info('Job %s defines its own optimizer chain %s' %
                          (job, jobPath))
            return self.processJob(job, List.fromChar(jobPath))

        #If no path, construct based on JDL and VO path module if present
        path = list(self.basePath)
        if self.voPlugin:
            argumentsDict = {
                'JobID': job,
                'ClassAd': classAdJob,
                'ConfigPath': self.am_getModuleParam("section")
            }
            moduleFactory = ModuleFactory()
            moduleInstance = moduleFactory.getModule(self.voPlugin,
                                                     argumentsDict)
            if not moduleInstance['OK']:
                self.log.error('Could not instantiate module:',
                               '%s' % (self.voPlugin))
                self.setFailedJob(
                    job, 'Could not instantiate module: %s' % (self.voPlugin),
                    classAdJob)
                return S_ERROR('Holding pending jobs')

            module = moduleInstance['Value']
            result = module.execute()
            if not result['OK']:
                self.log.warn('Execution of %s failed' % (self.voPlugin))
                return result
            extraPath = List.fromChar(result['Value'])
            if extraPath:
                path.extend(extraPath)
                self.log.verbose(
                    'Adding extra VO specific optimizers to path: %s' %
                    (extraPath))
        else:
            self.log.verbose('No VO specific plugin module specified')
            #Should only rely on an input data setting in absence of VO plugin
            result = self.jobDB.getInputData(job)
            if not result['OK']:
                self.log.error('Failed to get input data from JobDB', job)
                self.log.warn(result['Message'])
                return result

            if result['Value']:
                # if the returned tuple is not empty it will evaluate true
                self.log.info('Job %s has an input data requirement' % (job))
                path.extend(self.inputData)
            else:
                self.log.info('Job %s has no input data requirement' % (job))

        path.extend(self.endPath)
        self.log.info('Constructed path for job %s is: %s' % (job, path))
        return self.processJob(job, path)