Esempio n. 1
0
 def _query(self, cmd, conn=False):
     """ Make queries to MPIJob DB
 """
     print "DB3"
     start = Time.time()
     ret = DB._query(self, cmd, conn)
     if DEBUG:
         print >> debugFile, Time.time() - start, cmd.replace('\n', '')
         debugFile.flush()
     print ret
     return ret
Esempio n. 2
0
 def _update( self, cmd, conn=False ):
   """ Update MPIJob Database
   """ 
   print "DB4"
   start = Time.time()
   ret = DB._update( self, cmd, conn )
   if DEBUG:
     print >> debugFile, Time.time() - start, cmd.replace('\n','')
     debugFile.flush()
   print ret
   return ret
Esempio n. 3
0
 def _update(self, cmd, conn=False):
     """ Update MPIJob Database
 """
     print "DB4"
     start = Time.time()
     ret = DB._update(self, cmd, conn)
     if DEBUG:
         print >> debugFile, Time.time() - start, cmd.replace('\n', '')
         debugFile.flush()
     print ret
     return ret
Esempio n. 4
0
 def _query( self, cmd, conn=False ):
   """ Make queries to MPIJob DB
   """
   print "DB3"
   start = Time.time()
   ret = DB._query( self, cmd, conn )
   if DEBUG:
     print >> debugFile, Time.time() - start, cmd.replace('\n','')
     debugFile.flush()
   print ret
   return ret
Esempio n. 5
0
    def addLoggingRecord(self,
                         jobID,
                         status='idem',
                         minor='idem',
                         application='idem',
                         date='',
                         source='Unknown'):
        """ Add a new entry to the JobLoggingDB table. One, two or all the three status
        components can be specified. Optionaly the time stamp of the status can
        be provided in a form of a string in a format '%Y-%m-%d %H:%M:%S' or
        as datetime.datetime object. If the time stamp is not provided the current
        UTC time is used. 
    """

        event = 'status/minor/app=%s/%s/%s' % (status, minor, application)
        self.gLogger.info("Adding record for job " + str(jobID) + ": '" +
                          event + "' from " + source)

        if not date:
            # Make the UTC datetime string and float
            _date = Time.dateTime()
            epoc = time.mktime(_date.timetuple(
            )) + _date.microsecond / 1000000. - MAGIC_EPOC_NUMBER
            time_order = round(epoc, 3)
        else:
            try:
                if type(date) in StringTypes:
                    # The date is provided as a string in UTC
                    _date = Time.fromString(date)
                    epoc = time.mktime(_date.timetuple(
                    )) + _date.microsecond / 1000000. - MAGIC_EPOC_NUMBER
                    time_order = round(epoc, 3)
                elif type(date) == Time._dateTimeType:
                    _date = date
                    epoc = time.mktime(_date.timetuple(
                    )) + _date.microsecond / 1000000. - MAGIC_EPOC_NUMBER
                    time_order = round(epoc, 3)
                else:
                    self.gLogger.error('Incorrect date for the logging record')
                    _date = Time.dateTime()
                    epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER
                    time_order = round(epoc, 3)
            except:
                self.gLogger.exception('Exception while date evaluation')
                _date = Time.dateTime()
                epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER
                time_order = round(epoc, 3)

        cmd = "INSERT INTO LoggingInfo (JobId, Status, MinorStatus, ApplicationStatus, " + \
              "StatusTime, StatusTimeOrder, StatusSource) VALUES (%d,'%s','%s','%s','%s',%f,'%s')" % \
               (int(jobID),status,minor,application,str(_date),time_order,source)

        return self._update(cmd)
Esempio n. 6
0
  def addLoggingRecord(self,
                       jobID,
                       status='idem',
                       minor='idem',
                       application='idem',
                       date='',
                       source='Unknown'):
                       
    """ Add a new entry to the JobLoggingDB table. One, two or all the three status
        components can be specified. Optionaly the time stamp of the status can
        be provided in a form of a string in a format '%Y-%m-%d %H:%M:%S' or
        as datetime.datetime object. If the time stamp is not provided the current
        UTC time is used. 
    """
  
    event = 'status/minor/app=%s/%s/%s' % (status,minor,application)
    self.gLogger.info("Adding record for job "+str(jobID)+": '"+event+"' from "+source)
  
    if not date:
      # Make the UTC datetime string and float
      _date = Time.dateTime()
      epoc = time.mktime(_date.timetuple())+_date.microsecond/1000000. - MAGIC_EPOC_NUMBER
      time_order = round(epoc,3)      
    else:
      try:
        if type(date) in StringTypes:
          # The date is provided as a string in UTC 
          _date = Time.fromString(date)
          epoc = time.mktime(_date.timetuple())+_date.microsecond/1000000. - MAGIC_EPOC_NUMBER
          time_order = round(epoc,3)  
        elif type(date) == Time._dateTimeType:
          _date = date
          epoc = time.mktime(_date.timetuple())+_date.microsecond/1000000. - MAGIC_EPOC_NUMBER
          time_order = round(epoc,3)  
        else:
          self.gLogger.error('Incorrect date for the logging record')
          _date = Time.dateTime()
          epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER
          time_order = round(epoc,3)  
      except:
        self.gLogger.exception('Exception while date evaluation')
        _date = Time.dateTime()
        epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER
        time_order = round(epoc,3)     

    cmd = "INSERT INTO LoggingInfo (JobId, Status, MinorStatus, ApplicationStatus, " + \
          "StatusTime, StatusTimeOrder, StatusSource) VALUES (%d,'%s','%s','%s','%s',%f,'%s')" % \
           (int(jobID),status,minor,application,str(_date),time_order,source)
            
    return self._update( cmd )
Esempio n. 7
0
  def initialize( self, request ):
    """ Set default values to attributes,parameters
    """
    if type( request ) == types.NoneType:
      # Set some defaults
      for name in self.attributeNames:
        self.attributes[name] = 'Unknown'
      self.attributes['CreationTime'] = str( Time.dateTime() )
      self.attributes['Status'] = "New"
      result = getProxyInfo()
      if result['OK']:
        proxyDict = result[ 'Value' ]
        self.attributes['OwnerDN'] = proxyDict[ 'identity' ]
        if 'group' in proxyDict:
          self.attributes['OwnerGroup'] = proxyDict[ 'group' ]
      self.attributes['DIRACSetup'] = gConfig.getValue( '/DIRAC/Setup', 'Unknown' )
    elif type( request ) == types.InstanceType:
      for attr in self.attributeNames:
        self.attributes[attr] = request.attributes[attr]

    # initialize request from an XML string
    if type( request ) in types.StringTypes:
      for name in self.attributeNames:
        self.attributes[name] = 'Unknown'
      self.parseRequest( request )

    # Initialize request from another request
    elif type( request ) == types.InstanceType:
      self.subRequests = copy.deepcopy( request.subrequests )
Esempio n. 8
0
 def setLastUpdate(self, time=''):
     """ Set the last update to the current data and time
 """
     if not time:
         time = str(Time.dateTime())
     self.LastUpdate = time
     return S_OK()
Esempio n. 9
0
 def setCreationTime(self, time=''):
     """ Set the creation time to the current data and time
 """
     if not time:
         time = str(Time.dateTime())
     self.CreationTime = time
     return S_OK()
Esempio n. 10
0
 def __init__(self):
     # These are the subrequest attributes
     self.RequestType = ''
     self.Status = 'Waiting'
     self.SubRequestID = 0
     self.Operation = ''
     self.SourceSE = ''
     self.TargetSE = ''
     self.CreationTime = str(Time.dateTime())
     self.SubmissionTime = str(Time.dateTime())
     self.LastUpdate = str(Time.dateTime())
     self.Error = ''
     self.Catalog = ''
     self.Arguments = ''
     self.Files = []
     self.Datasets = []
Esempio n. 11
0
 def setLastUpdate(self,time=''):
   """ Set the last update to the current data and time
   """
   if not time:
     time = str(Time.dateTime())
   self.LastUpdate = time  
   return S_OK()
Esempio n. 12
0
 def updateRing(self,updDict):
   """ Update Ring port and status attributes after master of MPICH2 starts
       Inputs: {Port, RingID, JobID}
       Output: {RingID, Status, JobID}
   """ 
   print "DB15"
   port = updDict['Port']
   ringID = updDict['RingID']
   jobID = updDict['JobID']
   status = 'RingInit'
   timeUpd = Time.time()
   req = "UPDATE Rings SET Port=%s, LastTimeUpdate=UTC_TIMESTAMP(), Status=\'%s\' WHERE RingID=%s AND JobID=%s" % (port,status,ringID,jobID)
   result = self._query(req)
   if not result['OK']:
     print "DB16"
     self.log.info ('UPDATE PORT ERROR')
     return S_OK(result)
   dict = {'RingID': ringID, 'JobID': jobID}
   result = self.selectRings(dict)
   values = result['Value']
   result ={}
   keys = ['RingID', 'Status', 'JobID']
   for x,y,t in values:
         z = int(str(x).strip('L'))
         v = int(str(t).strip('L'))
         result.setdefault('RingID',z)
         result.setdefault('Status',y)
         result.setdefault('JobID',v)
   print result
   return S_OK(result)
Esempio n. 13
0
 def __init__(self):
   # These are the subrequest attributes
   self.RequestType = ''
   self.Status = 'Waiting'
   self.SubRequestID = 0
   self.Operation = ''
   self.SourceSE = ''
   self.TargetSE = ''
   self.CreationTime = str(Time.dateTime())
   self.SubmissionTime = str(Time.dateTime())
   self.LastUpdate = str(Time.dateTime())
   self.Error = ''
   self.Catalog = ''
   self.Arguments = ''
   self.Files = []
   self.Datasets = []
Esempio n. 14
0
  def updateProcessors(self, updDict):
    """ Update number of ring processors than are part of particular ring. 
        Input: {RingID, JobID}
        Output:{RingID}
    """ 
    print "DB23"
    ringID = updDict['RingID']
    jobID = updDict['JobID']
    req = ('SELECT NumberOfProcessorsRing, NumberOfProcessorsJob FROM Rings WHERE RingID=%s AND JobID=%s') % (ringID,jobID)
    result = self._query(req)
    if not result['OK']:
      print "DB24"
      return S_OK(result)
    value ={}
    temp = result['Value']
    for x,y in temp:
      v = temp[0]
      z = int(str(x).strip('L'))
      value.setdefault('numProce',z)
      value.setdefault('numProceJ',y)

    numProc=int(value['numProce'])+1
    timeUpd = Time.time()
    cmd = 'UPDATE Rings SET NumberOfProcessorsRing=%s, LastTimeUpdate=UTC_TIMESTAMP() WHERE RingID=%s AND JobID=%s' % (numProc, ringID,jobID)
    result = self._update(cmd)
    print "RESULT SELF UPDATE", result
    if not result['OK']:
      print "Result no OK", result
      print "DB25"
      return S_ERROR(result['Message'])
    matchDict = {'RingID':ringID}
    result = self.selectRing(matchDict)
    #result = ringID
    print "VH >>>>>>>>>>>>>  ELIMINE", result
    return S_OK(result)
Esempio n. 15
0
 def updateRing(self, updDict):
     """ Update Ring port and status attributes after master of MPICH2 starts
     Inputs: {Port, RingID, JobID}
     Output: {RingID, Status, JobID}
 """
     print "DB15"
     port = updDict['Port']
     ringID = updDict['RingID']
     jobID = updDict['JobID']
     status = 'RingInit'
     timeUpd = Time.time()
     req = "UPDATE Rings SET Port=%s, LastTimeUpdate=UTC_TIMESTAMP(), Status=\'%s\' WHERE RingID=%s AND JobID=%s" % (
         port, status, ringID, jobID)
     result = self._query(req)
     if not result['OK']:
         print "DB16"
         self.log.info('UPDATE PORT ERROR')
         return S_OK(result)
     dict = {'RingID': ringID, 'JobID': jobID}
     result = self.selectRings(dict)
     values = result['Value']
     result = {}
     keys = ['RingID', 'Status', 'JobID']
     for x, y, t in values:
         z = int(str(x).strip('L'))
         v = int(str(t).strip('L'))
         result.setdefault('RingID', z)
         result.setdefault('Status', y)
         result.setdefault('JobID', v)
     print result
     return S_OK(result)
Esempio n. 16
0
 def setCreationTime(self,time=''):
   """ Set the creation time to the current data and time
   """
   if not time:
     time = str(Time.dateTime())
   self.CreationTime = time
   return S_OK()
Esempio n. 17
0
    def setApplicationStatus(self, appStatus, sendFlag=True):
        """ Send application status information to the JobState service for jobID
    """
        if not self.jobID:
            return S_OK('Local execution, jobID is null.')

        timeStamp = Time.toString()
        # add Application status record
        self.appStatusInfo.append((appStatus.replace("'", ''), timeStamp))
        if sendFlag:
            # and send
            return self.sendStoredStatusInfo()

        return S_OK()
Esempio n. 18
0
    def setJobParameter(self, par_name, par_value, sendFlag=True):
        """ Send job parameter for jobID
    """
        if not self.jobID:
            return S_OK('Local execution, jobID is null.')

        timeStamp = Time.toString()
        # add job paramenter record
        self.jobParameters[par_name] = (par_value, timeStamp)
        if sendFlag:
            # and send
            return self.sendStoredJobParameters()

        return S_OK()
Esempio n. 19
0
  def setJobParameter( self, par_name, par_value, sendFlag = True ):
    """ Send job parameter for jobID
    """
    if not self.jobID:
      return S_OK( 'Local execution, jobID is null.' )

    timeStamp = Time.toString()
    # add job parameter record
    self.jobParameters[par_name] = ( par_value, timeStamp )
    if sendFlag:
      # and send
      return self.sendStoredJobParameters()

    return S_OK()
Esempio n. 20
0
  def setApplicationStatus( self, appStatus, sendFlag = True ):
    """ Send application status information to the JobState service for jobID
    """
    if not self.jobID:
      return S_OK( 'Local execution, jobID is null.' )

    timeStamp = Time.toString()
    # add Application status record
    self.appStatusInfo.append( ( appStatus.replace( "'", '' ), timeStamp ) )
    if sendFlag:
      # and send
      return self.sendStoredStatusInfo()

    return S_OK()
Esempio n. 21
0
    def setJobStatus(self, status="", minor="", application="", sendFlag=True):
        """ Send job status information to the JobState service for jobID
    """
        if not self.jobID:
            return S_OK("Local execution, jobID is null.")

        timeStamp = Time.toString()
        # add job status record
        self.jobStatusInfo.append((status.replace("'", ""), minor.replace("'", ""), timeStamp))
        if application:
            self.appStatusInfo.append((application.replace("'", ""), timeStamp))
        if sendFlag:
            # and send
            return self.sendStoredStatusInfo()

        return S_OK()
Esempio n. 22
0
    def setJobParameters(self, parameters, sendFlag=True):
        """ Send job parameters for jobID
    """
        if not self.jobID:
            return S_OK("Local execution, jobID is null.")

        timeStamp = Time.toString()
        # add job parameter record
        for pname, pvalue in parameters:
            self.jobParameters[pname] = (pvalue, timeStamp)

        if sendFlag:
            # and send
            return self.sendStoredJobParameters()

        return S_OK()
Esempio n. 23
0
  def __init__(self,rpcStub= None,executionOrder=0):
    """Instantiates the Workflow object and some default parameters.
    """
    self.subAttributeNames = ['Status','SubRequestID','Operation','ExecutionOrder','CreationTime','LastUpdate','Arguments']
    self.subAttributes = {}

    for attr in self.subAttributeNames:
      self.subAttributes[attr] = "Unknown"

    # Some initial values
    self.subAttributes['Status'] = "Waiting"
    self.subAttributes['SubRequestID'] = makeGuid()
    self.subAttributes['CreationTime'] = Time.toString()
    self.subAttributes['ExecutionOrder'] = executionOrder

    if rpcStub:
      self.subAttributes['Arguments'] = DEncode.encode(rpcStub)
      self.subAttributes['Operation'] = rpcStub[1]
Esempio n. 24
0
    def updateProcessors(self, updDict):
        """ Update number of ring processors than are part of particular ring. 
        Input: {RingID, JobID}
        Output:{RingID}
    """
        print "DB23"
        ringID = updDict['RingID']
        jobID = updDict['JobID']
        req = (
            'SELECT NumberOfProcessorsRing, NumberOfProcessorsJob FROM Rings WHERE RingID=%s AND JobID=%s'
        ) % (ringID, jobID)
        result = self._query(req)
        if not result['OK']:
            print "DB24"
            return S_OK(result)
        value = {}
        temp = result['Value']
        for x, y in temp:
            v = temp[0]
            z = int(str(x).strip('L'))
            value.setdefault('numProce', z)
            value.setdefault('numProceJ', y)

        numProc = int(value['numProce']) + 1
        timeUpd = Time.time()
        cmd = 'UPDATE Rings SET NumberOfProcessorsRing=%s, LastTimeUpdate=UTC_TIMESTAMP() WHERE RingID=%s AND JobID=%s' % (
            numProc, ringID, jobID)
        result = self._update(cmd)
        print "RESULT SELF UPDATE", result
        if not result['OK']:
            print "Result no OK", result
            print "DB25"
            return S_ERROR(result['Message'])
        matchDict = {'RingID': ringID}
        result = self.selectRing(matchDict)
        #result = ringID
        print "VH >>>>>>>>>>>>>  ELIMINE", result
        return S_OK(result)
Esempio n. 25
0
  def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit,
                     ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ):
    """
      This method does the actual pilot submission to the Grid RB
      The logic is as follows:
      - If there are no available RB it return error
      - If there is no VOMS extension in the proxy, return error
      - It creates a temp directory
      - Prepare a JDL
        it has some part common to gLite and LCG (the payload description)
        it has some part specific to each middleware
    """
    taskQueueID = taskQueueDict['TaskQueueID']
    # ownerDN = taskQueueDict['OwnerDN']
    credDict = proxy.getCredentials()['Value']
    ownerDN = credDict['identity']
    ownerGroup = credDict[ 'group' ]

    if not self.resourceBrokers:
      # Since we can exclude RBs from the list, it may become empty
      return S_ERROR( ERROR_RB )

    # Need to get VOMS extension for the later interactions with WMS
    ret = gProxyManager.getVOMSAttributes( proxy )
    if not ret['OK']:
      self.log.error( ERROR_VOMS, ret['Message'] )
      return S_ERROR( ERROR_VOMS )
    if not ret['Value']:
      return S_ERROR( ERROR_VOMS )
    vomsGroup = ret['Value'][0]

    workingDirectory = tempfile.mkdtemp( prefix = 'TQ_%s_' % taskQueueID, dir = workDir )
    self.log.verbose( 'Using working Directory:', workingDirectory )

    # Write JDL
    retDict = self._prepareJDL( taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob,
                                ceMask, submitPrivatePilot, privateTQ )
    jdl = retDict['JDL']
    pilotRequirements = retDict['Requirements']
    rb = retDict['RB']
    if not jdl:
      try:
        shutil.rmtree( workingDirectory )
      except:
        pass
      return S_ERROR( ERROR_JDL )

    # Check that there are available queues for the Job:
    if self.enableListMatch:
      availableCEs = []
      now = Time.dateTime()
      availableCEs = self.listMatchCache.get( pilotRequirements )
      if availableCEs == False:
        availableCEs = self._listMatch( proxy, jdl, taskQueueID, rb )
        if availableCEs != False:
          self.log.verbose( 'LastListMatch', now )
          self.log.verbose( 'AvailableCEs ', availableCEs )
          self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60,
                                   value = availableCEs )                      # it is given in minutes
      if not availableCEs:
        try:
          shutil.rmtree( workingDirectory )
        except:
          pass
        return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID )

    # Now we are ready for the actual submission, so

    self.log.verbose( 'Submitting Pilots for TaskQueue', taskQueueID )
    submitRet = self._submitPilot( proxy, pilotsPerJob, jdl, taskQueueID, rb )
    try:
      shutil.rmtree( workingDirectory )
    except:
      pass
    if not submitRet:
      return S_ERROR( 'Pilot Submission Failed for TQ %d ' % taskQueueID )
    # pilotReference, resourceBroker = submitRet

    submittedPilots = 0

    if pilotsPerJob != 1 and len( submitRet ) != pilotsPerJob:
      # Parametric jobs are used
      for pilotReference, resourceBroker in submitRet:
        pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID )
        submittedPilots += len( pilotReference )
        pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN,
                      ownerGroup, resourceBroker, self.gridMiddleware,
                      pilotRequirements )
    else:
      for pilotReference, resourceBroker in submitRet:
        pilotReference = [pilotReference]
        submittedPilots += len( pilotReference )
        pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN,
                      ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements )

    # add some sleep here
    time.sleep( 0.1 * submittedPilots )

    if pilotsToSubmit > pilotsPerJob:
      # Additional submissions are necessary, need to get a new token and iterate.
      pilotsToSubmit -= pilotsPerJob
      result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) )
      if not result[ 'OK' ]:
        self.log.error( ERROR_TOKEN, result['Message'] )
        result = S_ERROR( ERROR_TOKEN )
        result['Value'] = submittedPilots
        return result
      ( token, numberOfUses ) = result[ 'Value' ]
      for option in pilotOptions:
        if option.find( '-o /Security/ProxyToken=' ) == 0:
          pilotOptions.remove( option )
      pilotOptions.append( '-o /Security/ProxyToken=%s' % token )
      pilotsPerJob = max( 1, min( pilotsPerJob, int( numberOfUses / self.maxJobsInFillMode ) ) )
      result = self._submitPilots( workDir, taskQueueDict, pilotOptions,
                                   pilotsToSubmit, ceMask,
                                   submitPrivatePilot, privateTQ,
                                   proxy, pilotsPerJob )
      if not result['OK']:
        if 'Value' not in result:
          result['Value'] = 0
        result['Value'] += submittedPilots
        return result
      submittedPilots += result['Value']

    return S_OK( submittedPilots )
Esempio n. 26
0
  def execute( self ):
    """Main Agent code:
      1.- Query TaskQueueDB for existing TQs
      2.- Add their Priorities
      3.- Submit pilots
    """

    self.__checkSubmitPools()

    self.directorDict = getResourceDict()
    #Add all submit pools
    self.directorDict[ 'SubmitPool' ] = self.am_getOption( "SubmitPools" ) 
    #Add all DIRAC platforms if not specified otherwise
    if not 'Platform' in self.directorDict:
      result = gConfig.getOptionsDict( '/Resources/Computing/OSCompatibility' )
      if result['OK']:
        self.directorDict['Platform'] = result['Value'].keys()

    rpcMatcher = RPCClient( "WorkloadManagement/Matcher" )
    result = rpcMatcher.getMatchingTaskQueues( self.directorDict )
    if not result['OK']:
      self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] )
      return result
    taskQueueDict = result['Value']

    self.log.info( 'Found %s TaskQueues' % len( taskQueueDict ) )

    if not taskQueueDict:
      self.log.info( 'No TaskQueue to Process' )
      return S_OK()

    prioritySum = 0
    waitingJobs = 0
    for taskQueueID in taskQueueDict:
      taskQueueDict[taskQueueID]['TaskQueueID'] = taskQueueID
      prioritySum += taskQueueDict[taskQueueID]['Priority']
      waitingJobs += taskQueueDict[taskQueueID]['Jobs']

    self.log.info( 'Sum of Priorities %s' % prioritySum )

    if waitingJobs == 0:
      self.log.info( 'No waiting Jobs' )
      return S_OK( 'No waiting Jobs' )
    if prioritySum <= 0:
      return S_ERROR( 'Wrong TaskQueue Priorities' )

    self.pilotsPerPriority = self.am_getOption( 'pilotsPerIteration' ) / prioritySum
    self.pilotsPerJob = self.am_getOption( 'pilotsPerIteration' ) / waitingJobs

    self.callBackLock.acquire()
    self.submittedPilots = 0
    self.callBackLock.release()
    self.toSubmitPilots = 0
    waitingStatusList = ['Submitted', 'Ready', 'Scheduled', 'Waiting']
    timeLimitToConsider = Time.toString( Time.dateTime() - Time.hour * self.am_getOption( "maxPilotWaitingHours" ) )

    for taskQueueID in taskQueueDict:
      self.log.verbose( 'Processing TaskQueue', taskQueueID )

      result = pilotAgentsDB.countPilots( { 'TaskQueueID': taskQueueID,
                                            'Status': waitingStatusList},
                                          None, timeLimitToConsider )
      if not result['OK']:
        self.log.error( 'Fail to get Number of Waiting pilots', result['Message'] )
        waitingPilots = 0
      else:
        waitingPilots = result['Value']
        self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % taskQueueID, waitingPilots )

      result = self.submitPilotsForTaskQueue( taskQueueDict[taskQueueID], waitingPilots )

      if result['OK']:
        self.toSubmitPilots += result['Value']

    self.log.info( 'Number of pilots to be Submitted %s' % self.toSubmitPilots )

    # Now wait until all Jobs in the Default ThreadPool are proccessed
    if 'Default' in self.pools:
      # only for those in "Default' thread Pool
      # for pool in self.pools:
      self.pools['Default'].processAllResults()

    self.log.info( 'Number of pilots Submitted %s' % self.submittedPilots )

    return S_OK()
Esempio n. 27
0
    def export_setJobStatusBulk(self, jobID, statusDict):
        """ Set various status fields for job specified by its JobId.
        Set only the last status in the JobDB, updating all the status
        logging information in the JobLoggingDB. The statusDict has datetime
        as a key and status information dictionary as values
    """

        status = ""
        minor = ""
        application = ""
        appCounter = ""
        endDate = ''
        startDate = ''
        startFlag = ''
        jobID = int(jobID)

        result = jobDB.getJobAttributes(jobID, ['Status'])
        if not result['OK']:
            return result

        if not result['Value']:
            # if there is no matching Job it returns an empty dictionary
            return S_ERROR('No Matching Job')

        new_status = result['Value']['Status']
        if new_status == "Stalled":
            status = 'Running'

        # Get the latest WN time stamps of status updates
        result = logDB.getWMSTimeStamps(int(jobID))
        if not result['OK']:
            return result
        lastTime = max(
            [float(t) for s, t in result['Value'].items() if s != 'LastTime'])
        from DIRAC import Time
        lastTime = Time.toString(Time.fromEpoch(lastTime))

        # Get the last status values
        dates = sorted(statusDict)
        # We should only update the status if its time stamp is more recent than the last update
        for date in [date for date in dates if date >= lastTime]:
            sDict = statusDict[date]
            if sDict['Status']:
                status = sDict['Status']
                if status in JOB_FINAL_STATES:
                    endDate = date
                if status == "Running":
                    startFlag = 'Running'
            if sDict['MinorStatus']:
                minor = sDict['MinorStatus']
                if minor == "Application" and startFlag == 'Running':
                    startDate = date
            if sDict['ApplicationStatus']:
                application = sDict['ApplicationStatus']
            counter = sDict.get('ApplicationCounter')
            if counter:
                appCounter = counter
        attrNames = []
        attrValues = []
        if status:
            attrNames.append('Status')
            attrValues.append(status)
        if minor:
            attrNames.append('MinorStatus')
            attrValues.append(minor)
        if application:
            attrNames.append('ApplicationStatus')
            attrValues.append(application)
        if appCounter:
            attrNames.append('ApplicationCounter')
            attrValues.append(appCounter)
        result = jobDB.setJobAttributes(jobID,
                                        attrNames,
                                        attrValues,
                                        update=True)
        if not result['OK']:
            return result

        if endDate:
            result = jobDB.setEndExecTime(jobID, endDate)
        if startDate:
            result = jobDB.setStartExecTime(jobID, startDate)

        # Update the JobLoggingDB records
        for date in dates:
            sDict = statusDict[date]
            status = sDict['Status']
            if not status:
                status = 'idem'
            minor = sDict['MinorStatus']
            if not minor:
                minor = 'idem'
            application = sDict['ApplicationStatus']
            if not application:
                application = 'idem'
            else:
                status = "Running"
                minor = "Application"
            source = sDict['Source']
            result = logDB.addLoggingRecord(jobID, status, minor, application,
                                            date, source)
            if not result['OK']:
                return result

        return S_OK()
Esempio n. 28
0
  def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit,
                     ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ):
    """
      This method does the actual pilot submission to the Grid RB
      The logic is as follows:
      - If there are no available RB it return error
      - If there is no VOMS extension in the proxy, return error
      - It creates a temp directory
      - Prepare a JDL
        it has some part common to gLite and LCG (the payload description)
        it has some part specific to each middleware
    """
    taskQueueID = taskQueueDict['TaskQueueID']
    # ownerDN = taskQueueDict['OwnerDN']
    ownerDN = proxy.getCredentials()['Value']['identity']


    if not self.resourceBrokers:
      # Since we can exclude RBs from the list, it may become empty
      return S_ERROR( ERROR_RB )

    # Need to get VOMS extension for the later interactions with WMS
    ret = gProxyManager.getVOMSAttributes( proxy )
    if not ret['OK']:
      self.log.error( ERROR_VOMS, ret['Message'] )
      return S_ERROR( ERROR_VOMS )
    if not ret['Value']:
      return S_ERROR( ERROR_VOMS )
    vomsGroup = ret['Value'][0]

    workingDirectory = tempfile.mkdtemp( prefix = 'TQ_%s_' % taskQueueID, dir = workDir )
    self.log.verbose( 'Using working Directory:', workingDirectory )

    # Write JDL
    retDict = self._prepareJDL( taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob,
                                ceMask, submitPrivatePilot, privateTQ )
    jdl = retDict['JDL']
    pilotRequirements = retDict['Requirements']
    rb = retDict['RB']
    if not jdl:
      try:
        shutil.rmtree( workingDirectory )
      except:
        pass
      return S_ERROR( ERROR_JDL )

    # Check that there are available queues for the Job:
    if self.enableListMatch:
      availableCEs = []
      now = Time.dateTime()
      availableCEs = self.listMatchCache.get( pilotRequirements )
      if availableCEs == False:
        availableCEs = self._listMatch( proxy, jdl, taskQueueID, rb )
        if availableCEs != False:
          self.log.verbose( 'LastListMatch', now )
          self.log.verbose( 'AvailableCEs ', availableCEs )
          self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60,
                                   value = availableCEs )                      # it is given in minutes
      if not availableCEs:
        try:
          shutil.rmtree( workingDirectory )
        except:
          pass
        return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID )

    # Now we are ready for the actual submission, so

    self.log.verbose( 'Submitting Pilots for TaskQueue', taskQueueID )
    submitRet = self._submitPilot( proxy, pilotsPerJob, jdl, taskQueueID, rb )
    try:
      shutil.rmtree( workingDirectory )
    except:
      pass
    if not submitRet:
      return S_ERROR( 'Pilot Submission Failed for TQ %d ' % taskQueueID )
    # pilotReference, resourceBroker = submitRet

    submittedPilots = 0

    if pilotsPerJob != 1 and len( submitRet ) != pilotsPerJob:
      # Parametric jobs are used
      for pilotReference, resourceBroker in submitRet:
        pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID )
        submittedPilots += len( pilotReference )
        pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN,
                      vomsGroup, resourceBroker, self.gridMiddleware,
                      pilotRequirements )
    else:
      for pilotReference, resourceBroker in submitRet:
        pilotReference = [pilotReference]
        submittedPilots += len( pilotReference )
        pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN,
                      vomsGroup, broker = resourceBroker, gridType = self.gridMiddleware,
                      requirements = pilotRequirements )

    # add some sleep here
    time.sleep( 0.1 * submittedPilots )

    if pilotsToSubmit > pilotsPerJob:
      # Additional submissions are necessary, need to get a new token and iterate.
      pilotsToSubmit -= pilotsPerJob
      ownerDN = self.genericPilotDN
      ownerGroup = self.genericPilotGroup
      result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) )
      if not result[ 'OK' ]:
        self.log.error( ERROR_TOKEN, result['Message'] )
        return S_ERROR( ERROR_TOKEN )
      ( token, numberOfUses ) = result[ 'Value' ]
      for option in pilotOptions:
        if option.find( '-o /Security/ProxyToken=' ) == 0:
          pilotOptions.remove( option )
      pilotOptions.append( '-o /Security/ProxyToken=%s' % token )
      pilotsPerJob = min( pilotsPerJob, int( numberOfUses / self.maxJobsInFillMode ) )
      result = self._submitPilots( workDir, taskQueueDict, pilotOptions,
                                   pilotsToSubmit, ceMask,
                                   submitPrivatePilot, privateTQ,
                                   proxy, pilotsPerJob )
      if not result['OK']:
        result['Value'] = submittedPilots
        return result
      submittedPilots += result['Value']

    return S_OK( submittedPilots )
Esempio n. 29
0
    def execute(self):
        """Main Agent code:
      1.- Query TaskQueueDB for existing TQs
      2.- Add their Priorities
      3.- Submit pilots
    """

        self.__checkSubmitPools()

        self.directorDict = getResourceDict()
        #Add all submit pools
        self.directorDict['SubmitPool'] = self.am_getOption("SubmitPools")

        rpcMatcher = RPCClient("WorkloadManagement/Matcher")
        result = rpcMatcher.getMatchingTaskQueues(self.directorDict)
        if not result['OK']:
            self.log.error('Could not retrieve TaskQueues from TaskQueueDB',
                           result['Message'])
            return result
        taskQueueDict = result['Value']

        self.log.info('Found %s TaskQueues' % len(taskQueueDict))

        if not taskQueueDict:
            self.log.info('No TaskQueue to Process')
            return S_OK()

        prioritySum = 0
        waitingJobs = 0
        for taskQueueID in taskQueueDict:
            taskQueueDict[taskQueueID]['TaskQueueID'] = taskQueueID
            prioritySum += taskQueueDict[taskQueueID]['Priority']
            waitingJobs += taskQueueDict[taskQueueID]['Jobs']

        self.log.info('Sum of Priorities %s' % prioritySum)

        if waitingJobs == 0:
            self.log.info('No waiting Jobs')
            return S_OK('No waiting Jobs')
        if prioritySum <= 0:
            return S_ERROR('Wrong TaskQueue Priorities')

        self.pilotsPerPriority = self.am_getOption(
            'pilotsPerIteration') / prioritySum
        self.pilotsPerJob = self.am_getOption(
            'pilotsPerIteration') / waitingJobs

        self.callBackLock.acquire()
        self.submittedPilots = 0
        self.callBackLock.release()
        self.toSubmitPilots = 0
        waitingStatusList = ['Submitted', 'Ready', 'Scheduled', 'Waiting']
        timeLimitToConsider = Time.toString(
            Time.dateTime() -
            Time.hour * self.am_getOption("maxPilotWaitingHours"))

        for taskQueueID in taskQueueDict:
            self.log.verbose('Processing TaskQueue', taskQueueID)

            result = pilotAgentsDB.countPilots(
                {
                    'TaskQueueID': taskQueueID,
                    'Status': waitingStatusList
                }, None, timeLimitToConsider)
            if not result['OK']:
                self.log.error('Fail to get Number of Waiting pilots',
                               result['Message'])
                waitingPilots = 0
            else:
                waitingPilots = result['Value']
                self.log.verbose(
                    'Waiting Pilots for TaskQueue %s:' % taskQueueID,
                    waitingPilots)

            result = self.submitPilotsForTaskQueue(taskQueueDict[taskQueueID],
                                                   waitingPilots)

            if result['OK']:
                self.toSubmitPilots += result['Value']

        self.log.info('Number of pilots to be Submitted %s' %
                      self.toSubmitPilots)

        # Now wait until all Jobs in the Default ThreadPool are proccessed
        if 'Default' in self.pools:
            # only for those in "Default' thread Pool
            # for pool in self.pools:
            self.pools['Default'].processAllResults()

        self.log.info('Number of pilots Submitted %s' % self.submittedPilots)

        return S_OK()
  def export_setJobStatusBulk( self, jobID, statusDict ):
    """ Set various status fields for job specified by its JobId.
        Set only the last status in the JobDB, updating all the status
        logging information in the JobLoggingDB. The statusDict has datetime
        as a key and status information dictionary as values
    """

    status = ""
    minor = ""
    application = ""
    appCounter = ""
    endDate = ''
    startDate = ''
    startFlag = ''
    jobID = int( jobID )

    result = jobDB.getJobAttributes( jobID, ['Status'] )
    if not result['OK']:
      return result

    if not result['Value']:
      # if there is no matching Job it returns an empty dictionary
      return S_ERROR( 'No Matching Job' )

    new_status = result['Value']['Status']
    if new_status == "Stalled":
      status = 'Running'

    # Get the latest WN time stamps of status updates
    result = logDB.getWMSTimeStamps( int( jobID ) )
    if not result['OK']:
      return result
    lastTime = max( [float( t ) for s, t in result['Value'].items() if s != 'LastTime'] )
    from DIRAC import Time
    lastTime = Time.toString( Time.fromEpoch( lastTime ) )

    # Get the last status values
    dates = sorted( statusDict )
    # We should only update the status if its time stamp is more recent than the last update
    for date in [date for date in dates if date >= lastTime]:
      sDict = statusDict[date]
      if sDict['Status']:
        status = sDict['Status']
        if status in JOB_FINAL_STATES:
          endDate = date
        if status == "Running":
          startFlag = 'Running'
      if sDict['MinorStatus']:
        minor = sDict['MinorStatus']
        if minor == "Application" and startFlag == 'Running':
          startDate = date
      if sDict['ApplicationStatus']:
        application = sDict['ApplicationStatus']
      counter = sDict.get( 'ApplicationCounter' )
      if counter:
        appCounter = counter
    attrNames = []
    attrValues = []
    if status:
      attrNames.append( 'Status' )
      attrValues.append( status )
    if minor:
      attrNames.append( 'MinorStatus' )
      attrValues.append( minor )
    if application:
      attrNames.append( 'ApplicationStatus' )
      attrValues.append( application )
    if appCounter:
      attrNames.append( 'ApplicationCounter' )
      attrValues.append( appCounter )
    result = jobDB.setJobAttributes( jobID, attrNames, attrValues, update = True )
    if not result['OK']:
      return result

    if endDate:
      result = jobDB.setEndExecTime( jobID, endDate )
    if startDate:
      result = jobDB.setStartExecTime( jobID, startDate )

    # Update the JobLoggingDB records
    for date in dates:
      sDict = statusDict[date]
      status = sDict['Status']
      if not status:
        status = 'idem'
      minor = sDict['MinorStatus']
      if not minor:
        minor = 'idem'
      application = sDict['ApplicationStatus']
      if not application:
        application = 'idem'
      else:
        status = "Running"
        minor = "Application"
      source = sDict['Source']
      result = logDB.addLoggingRecord( jobID, status, minor, application, date, source )
      if not result['OK']:
        return result

    return S_OK()
Esempio n. 31
0
    def execute(self):
        """Main Agent code:
      1.- Query TaskQueueDB for existing TQs
      2.- Add their Priorities
      3.- Submit pilots
    """

        self.__checkSubmitPools()

        self.directorDict = getResourceDict()
        # Add all submit pools
        self.directorDict["SubmitPool"] = self.am_getOption("SubmitPools")
        # Add all DIRAC platforms if not specified otherwise
        if not "Platform" in self.directorDict:
            result = getDIRACPlatforms()
            if result["OK"]:
                self.directorDict["Platform"] = result["Value"]

        rpcMatcher = RPCClient("WorkloadManagement/Matcher")
        result = rpcMatcher.getMatchingTaskQueues(self.directorDict)
        if not result["OK"]:
            self.log.error("Could not retrieve TaskQueues from TaskQueueDB", result["Message"])
            return result
        taskQueueDict = result["Value"]

        self.log.info("Found %s TaskQueues" % len(taskQueueDict))

        if not taskQueueDict:
            self.log.info("No TaskQueue to Process")
            return S_OK()

        prioritySum = 0
        waitingJobs = 0
        for taskQueueID in taskQueueDict:
            taskQueueDict[taskQueueID]["TaskQueueID"] = taskQueueID
            prioritySum += taskQueueDict[taskQueueID]["Priority"]
            waitingJobs += taskQueueDict[taskQueueID]["Jobs"]

        self.log.info("Sum of Priorities %s" % prioritySum)

        if waitingJobs == 0:
            self.log.info("No waiting Jobs")
            return S_OK("No waiting Jobs")
        if prioritySum <= 0:
            return S_ERROR("Wrong TaskQueue Priorities")

        self.pilotsPerPriority = self.am_getOption("pilotsPerIteration") / prioritySum
        self.pilotsPerJob = self.am_getOption("pilotsPerIteration") / waitingJobs

        self.callBackLock.acquire()
        self.submittedPilots = 0
        self.callBackLock.release()
        self.toSubmitPilots = 0
        waitingStatusList = ["Submitted", "Ready", "Scheduled", "Waiting"]
        timeLimitToConsider = Time.toString(Time.dateTime() - Time.hour * self.am_getOption("maxPilotWaitingHours"))

        for taskQueueID in taskQueueDict:
            self.log.verbose("Processing TaskQueue", taskQueueID)

            result = pilotAgentsDB.countPilots(
                {"TaskQueueID": taskQueueID, "Status": waitingStatusList}, None, timeLimitToConsider
            )
            if not result["OK"]:
                self.log.error("Fail to get Number of Waiting pilots", result["Message"])
                waitingPilots = 0
            else:
                waitingPilots = result["Value"]
                self.log.verbose("Waiting Pilots for TaskQueue %s:" % taskQueueID, waitingPilots)

            result = self.submitPilotsForTaskQueue(taskQueueDict[taskQueueID], waitingPilots)

            if result["OK"]:
                self.toSubmitPilots += result["Value"]

        self.log.info("Number of pilots to be Submitted %s" % self.toSubmitPilots)

        # Now wait until all Jobs in the Default ThreadPool are proccessed
        if "Default" in self.pools:
            # only for those in "Default' thread Pool
            # for pool in self.pools:
            self.pools["Default"].processAllResults()

        self.log.info("Number of pilots Submitted %s" % self.submittedPilots)

        return S_OK()