class TaskCallbacksTests(unittest.TestCase): """ .. class:: TaskCallbacksTests test case for ProcessPool """ def setUp(self): gLogger.showHeaders(True) self.log = gLogger.getSubLogger(self.__class__.__name__) self.processPool = ProcessPool(4, 8, 8) self.processPool.daemonize() def testCallableClass(self): """ CallableClass and task callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableClass, taskID=i, args=(i, timeWait, raiseException), callback=ResultCallback, exceptionCallback=ExceptionCallback, blocking=True) if result["OK"]: self.log.always("CallableClass enqueued to task %s" % i) i += 1 else: continue if i == 10: break self.processPool.finalize(2) def testCallableFunc(self): """ CallableFunc and task callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableFunc, taskID=i, args=(i, timeWait, raiseException), callback=ResultCallback, exceptionCallback=ExceptionCallback, blocking=True) if result["OK"]: self.log.always("CallableClass enqueued to task %s" % i) i += 1 else: continue if i == 10: break self.processPool.finalize(2)
class TaskCallbacksTests(unittest.TestCase): """ .. class:: TaskCallbacksTests test case for ProcessPool """ def setUp( self ): gLogger.showHeaders( True ) self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.processPool = ProcessPool( 4, 8, 8 ) self.processPool.daemonize() def testCallableClass( self ): """ CallableClass and task callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableClass, taskID = i, args = ( i, timeWait, raiseException ), callback = ResultCallback, exceptionCallback = ExceptionCallback, blocking = True ) if result["OK"]: self.log.always("CallableClass enqueued to task %s" % i ) i += 1 else: continue if i == 10: break self.processPool.finalize( 2 ) def testCallableFunc( self ): """ CallableFunc and task callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableFunc, taskID = i, args = ( i, timeWait, raiseException ), callback = ResultCallback, exceptionCallback = ExceptionCallback, blocking = True ) if result["OK"]: self.log.always("CallableClass enqueued to task %s" % i ) i += 1 else: continue if i == 10: break self.processPool.finalize( 2 )
def runTest(): global nClients, nQueries, testType, resultTest, testDir, lfnListFile resultTest = [] pp = ProcessPool(nClients) testFunction = eval(testType) for c in xrange(nClients): pp.createAndQueueTask(testFunction, [nQueries], callback=finalize, exceptionCallback=doException) pp.processAllResults(3600) pp.finalize(0) timeResult = [] for testTime, success, failure in resultTest: #print testTime,success,failure timeResult += testTime averageTime, errorTime = doStats(timeResult) rateResult = [nClients / t for t in timeResult] averageRate, errorRate = doStats(rateResult) if testDir: print "\nTest results for clients %d, %s" % (nClients, testDir) else: print "\nTest results for clients %d, %s" % (nClients, lfnListFile) print "Query time: %.2f +/- %.2f" % (averageTime, errorTime) print "Query rate: %.2f +/- %.2f" % (averageRate, errorRate) return ((averageTime, errorTime), (averageRate, errorRate))
def runTest(): global nClients, nQueries, testType, resultTest, testDir, lfnListFile resultTest = [] pp = ProcessPool(nClients) testFunction = eval(testType) for c in xrange(nClients): pp.createAndQueueTask(testFunction, [nQueries], callback=finalize, exceptionCallback=doException) pp.processAllResults(3600) pp.finalize(0) timeResult = [] for testTime, success, failure in resultTest: # print testTime,success,failure timeResult += testTime averageTime, errorTime = doStats(timeResult) rateResult = [nClients / t for t in timeResult] averageRate, errorRate = doStats(rateResult) if testDir: print "\nTest results for clients %d, %s" % (nClients, testDir) else: print "\nTest results for clients %d, %s" % (nClients, lfnListFile) print "Query time: %.2f +/- %.2f" % (averageTime, errorTime) print "Query rate: %.2f +/- %.2f" % (averageRate, errorRate) return((averageTime, errorTime), (averageRate, errorRate))
class PoolComputingElement(ComputingElement): mandatoryParameters = MandatoryParameters ############################################################################# def __init__(self, ceUniqueID): """ Standard constructor. """ ComputingElement.__init__(self, ceUniqueID) self.ceType = "Pool" self.log = gLogger.getSubLogger('Pool') self.submittedJobs = 0 self.processors = 1 self.pPool = None self.taskID = 0 self.processorsPerTask = {} self.userNumberPerTask = {} self.useSudo = False ############################################################################# def _addCEConfigDefaults(self): """Method to make sure all necessary Configuration Parameters are defined """ # First assure that any global parameters are loaded ComputingElement._addCEConfigDefaults(self) def _reset(self): """ Update internal variables after some extra parameters are added :return: None """ self.processors = int(self.ceParameters.get('NumberOfProcessors', self.processors)) self.ceParameters['MaxTotalJobs'] = self.processors self.useSudo = self.ceParameters.get('SudoExecution', False) def getProcessorsInUse(self): """ Get the number of currently allocated processor cores :return: number of processor cores """ processorsInUse = 0 for task in self.processorsPerTask: processorsInUse += self.processorsPerTask[task] return processorsInUse ############################################################################# def submitJob(self, executableFile, proxy, **kwargs): """ Method to submit job. :param str executableFile: location of the executable file :param str proxy: payload proxy :return: S_OK/S_ERROR of the result of the job submission """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() processorsInUse = self.getProcessorsInUse() if kwargs.get('wholeNode'): if processorsInUse > 0: return S_ERROR('Can not take WholeNode job') # , %d/%d slots used' % (self.slotsInUse,self.slots) ) else: requestedProcessors = self.processors elif "numberOfProcessors" in kwargs: requestedProcessors = int(kwargs['numberOfProcessors']) if requestedProcessors > 0: if (processorsInUse + requestedProcessors) > self.processors: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedProcessors, self.processors - processorsInUse)) else: requestedProcessors = 1 if self.processors - processorsInUse < requestedProcessors: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedProcessors, self.processors - processorsInUse)) ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) kwargs = {'UseSudo': False} if self.useSudo: for nUser in range(MAX_NUMBER_OF_SUDO_UNIX_USERS): if nUser not in self.userNumberPerTask.values(): break kwargs['NUser'] = nUser kwargs['PayloadUser'] = os.environ['USER'] + 'p%s' % str(nUser).zfill(2) kwargs['UseSudo'] = True result = self.pPool.createAndQueueTask(executeJob, args=(executableFile, proxy, self.taskID), kwargs=kwargs, taskID=self.taskID, usePoolCallbacks=True) self.processorsPerTask[self.taskID] = requestedProcessors self.taskID += 1 self.pPool.processResults() return result def finalizeJob(self, taskID, result): """ Finalize the job by updating the process utilisation counters :param int taskID: local PoolCE task ID :param dict result: result of the job execution """ nProc = self.processorsPerTask.pop(taskID) if result['OK']: self.log.info('Task %d finished successfully, %d processor(s) freed' % (taskID, nProc)) else: self.log.error("Task failed submission", "%d, message: %s" % (taskID, result['Message'])) ############################################################################# def getCEStatus(self, jobIDList=None): """ Method to return information on running and pending jobs. :return: dictionary of numbers of jobs per status """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() result = S_OK() result['SubmittedJobs'] = 0 nJobs = 0 for _j, value in self.processorsPerTask.iteritems(): if value > 0: nJobs += 1 result['RunningJobs'] = nJobs result['WaitingJobs'] = 0 processorsInUse = self.getProcessorsInUse() result['UsedProcessors'] = processorsInUse result['AvailableProcessors'] = self.processors - processorsInUse return result def getDescription(self): """ Get CE description as a dictionary """ result = super(PoolComputingElement, self).getDescription() if not result['OK']: return result ceDict = result['Value'] ceDictList = [] if self.ceParameters.get('MultiProcessorStrategy'): strategyRequiredTags = [] if not ceDict.get("ProcessorsInUse", 0): # We are starting from a clean page, try to get the most demanding # jobs first strategyRequiredTags.append(['WholeNode']) processors = ceDict.get('NumberOfProcessors', 0) if processors > 1: # We have several processors at hand, try to use most of them strategyRequiredTags.append(['%dProcessors' % processors]) # Well, at least jobs with some processors requirement strategyRequiredTags.append(['MultiProcessor']) for strat in strategyRequiredTags: newCEDict = dict(ceDict) newCEDict.setdefault("RequiredTag", []).extend(strat) ceDictList.append(newCEDict) # Do not require anything special if nothing else was lucky ceDictList.append(dict(ceDict)) return S_OK(ceDictList) ############################################################################# def monitorProxy(self, pilotProxy, payloadProxy): """ Monitor the payload proxy and renew as necessary. :param str pilotProxy: location of the pilotProxy :param str payloadProxy: location of the payloadProxy """ return self._monitorProxy(pilotProxy, payloadProxy)
class PoolComputingElement(ComputingElement): mandatoryParameters = MandatoryParameters ############################################################################# def __init__(self, ceUniqueID): """ Standard constructor. """ super(PoolComputingElement, self).__init__(ceUniqueID) self.ceType = "Pool" self.log = gLogger.getSubLogger('Pool') self.submittedJobs = 0 self.processors = 1 self.pPool = None self.taskID = 0 self.processorsPerTask = {} self.userNumberPerTask = {} self.useSudo = False ############################################################################# def _addCEConfigDefaults(self): """Method to make sure all necessary Configuration Parameters are defined """ # First assure that any global parameters are loaded ComputingElement._addCEConfigDefaults(self) def _reset(self): """ Update internal variables after some extra parameters are added :return: None """ self.processors = int( self.ceParameters.get('NumberOfProcessors', self.processors)) self.ceParameters['MaxTotalJobs'] = self.processors self.useSudo = self.ceParameters.get('SudoExecution', False) def getProcessorsInUse(self): """ Get the number of currently allocated processor cores :return: number of processor cores """ processorsInUse = 0 for task in self.processorsPerTask: processorsInUse += self.processorsPerTask[task] return processorsInUse ############################################################################# def submitJob(self, executableFile, proxy, **kwargs): """ Method to submit job. :param str executableFile: location of the executable file :param str proxy: payload proxy :return: S_OK/S_ERROR of the result of the job submission """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() processorsForJob = self._getProcessorsForJobs(kwargs) if not processorsForJob: return S_ERROR('Not enough processors for the job') # Now persisiting the job limits for later use in pilot.cfg file (pilot 3 default) cd = ConfigurationData(loadDefaultCFG=False) res = cd.loadFile('pilot.cfg') if not res['OK']: self.log.error("Could not load pilot.cfg", res['Message']) # only NumberOfProcessors for now, but RAM (or other stuff) can also be added jobID = int(kwargs.get('jobDesc', {}).get('jobID', 0)) cd.setOptionInCFG( '/Resources/Computing/JobLimits/%d/NumberOfProcessors' % jobID, processorsForJob) res = cd.dumpLocalCFGToFile('pilot.cfg') if not res['OK']: self.log.error("Could not dump cfg to pilot.cfg", res['Message']) ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) kwargs = {'UseSudo': False} if self.useSudo: for nUser in range(MAX_NUMBER_OF_SUDO_UNIX_USERS): if nUser not in self.userNumberPerTask.values(): break kwargs['NUser'] = nUser kwargs['PayloadUser'] = os.environ['USER'] + 'p%s' % str( nUser).zfill(2) kwargs['UseSudo'] = True result = self.pPool.createAndQueueTask(executeJob, args=(executableFile, proxy, self.taskID), kwargs=kwargs, taskID=self.taskID, usePoolCallbacks=True) self.processorsPerTask[self.taskID] = processorsForJob self.taskID += 1 self.pPool.processResults() return result def _getProcessorsForJobs(self, kwargs): """ helper function """ processorsInUse = self.getProcessorsInUse() availableProcessors = self.processors - processorsInUse # Does this ask for MP? if not kwargs.get('mpTag', False): if availableProcessors: return 1 else: return 0 # From here we assume the job is asking for MP if kwargs.get('wholeNode', False): if processorsInUse > 0: return 0 else: requestedProcessors = self.processors if "numberOfProcessors" in kwargs: requestedProcessors = int(kwargs['numberOfProcessors']) else: requestedProcessors = 1 if availableProcessors < requestedProcessors: return 0 # If there's a maximum number of processors allowed for the job, use that as maximum, # otherwise it will use all the remaining processors if 'maxNumberOfProcessors' in kwargs and kwargs[ 'maxNumberOfProcessors']: maxNumberOfProcessors = min(int(kwargs['maxNumberOfProcessors']), availableProcessors) else: maxNumberOfProcessors = availableProcessors return maxNumberOfProcessors def finalizeJob(self, taskID, result): """ Finalize the job by updating the process utilisation counters :param int taskID: local PoolCE task ID :param dict result: result of the job execution """ nProc = self.processorsPerTask.pop(taskID) if result['OK']: self.log.info( 'Task %d finished successfully, %d processor(s) freed' % (taskID, nProc)) else: self.log.error("Task failed submission", "%d, message: %s" % (taskID, result['Message'])) ############################################################################# def getCEStatus(self, jobIDList=None): """ Method to return information on running and pending jobs. :return: dictionary of numbers of jobs per status """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() result = S_OK() result['SubmittedJobs'] = 0 nJobs = 0 for _j, value in self.processorsPerTask.iteritems(): if value > 0: nJobs += 1 result['RunningJobs'] = nJobs result['WaitingJobs'] = 0 processorsInUse = self.getProcessorsInUse() result['UsedProcessors'] = processorsInUse result['AvailableProcessors'] = self.processors - processorsInUse return result def getDescription(self): """ Get a list of CEs descriptions (each is a dict) This is called by the JobAgent. """ result = super(PoolComputingElement, self).getDescription() if not result['OK']: return result ceDict = result['Value'] ceDictList = [] if self.ceParameters.get('MultiProcessorStrategy'): strategyRequiredTags = [] if not ceDict.get("ProcessorsInUse", 0): # We are starting from a clean page, try to get the most demanding # jobs first strategyRequiredTags.append(['WholeNode']) processors = ceDict.get('NumberOfProcessors', 0) if processors > 1: # We have several processors at hand, try to use most of them strategyRequiredTags.append(['%dProcessors' % processors]) # Well, at least jobs with some processors requirement strategyRequiredTags.append(['MultiProcessor']) for strat in strategyRequiredTags: newCEDict = dict(ceDict) newCEDict.setdefault("RequiredTag", []).extend(strat) ceDictList.append(newCEDict) # Do not require anything special if nothing else was lucky ceDictList.append(dict(ceDict)) return S_OK(ceDictList) ############################################################################# def monitorProxy(self, pilotProxy, payloadProxy): """ Monitor the payload proxy and renew as necessary. :param str pilotProxy: location of the pilotProxy :param str payloadProxy: location of the payloadProxy """ return self._monitorProxy(pilotProxy, payloadProxy)
pPool = ProcessPool(10,50,50) pPool.daemonize() # dirlist = ['prod/ilc/mc-dbd/generated','prod/ilc/mc-dbd/ild'] # dirlist= ['prod/ilc/mc-dbd/generated/500-TDR_ws/higgs'] # dirlist= ['prod/ilc/mc-dbd/generated/250-TDR_ws/higgs','prod/ilc/mc-dbd/generated/350-TDR_ws/higgs'] #dirlist= ['prod/ilc/mc-dbd/generated/250-TDR_ws'] #dirlist= ['prod/ilc/mc-dbd/generated/250-TDR_ws/1f', # 'prod/ilc/mc-dbd/generated/250-TDR_ws/3f', # 'prod/ilc/mc-dbd/generated/250-TDR_ws/aa_lowpt', # 'prod/ilc/mc-dbd/generated/250-TDR_ws/aa_minijet'] #dirlist= ['prod/ilc/mc-dbd/generated/250-TDR_ws/aa_2f', # 'prod/ilc/mc-dbd/generated/350-TDR_ws/3f', # 'prod/ilc/mc-dbd/generated/350-TDR_ws/1f', # 'prod/ilc/mc-dbd/generated/350-TDR_ws/aa_minijet'] lfcHosts = ['grid-lfc.desy.de'] for dir in dirlist: path = "/ilc/%s" % (dir) print "Queueing user", dir, pPool.getFreeSlots(),pPool.getNumWorkingProcesses(),pPool.hasPendingTasks(),pPool.getNumIdleProcesses(), lfcHosts[0] result = pPool.createAndQueueTask( processDir,[path,True,lfcHosts[0]],callback=finalizeDirectory ) if not result['OK']: print "Failed queueing", path pPool.processAllResults() print "LFC_to_DFC completed."
class TaskTimeOutTests( unittest.TestCase ): """ .. class:: TaskTimeOutTests test case for ProcessPool """ def setUp( self ): """c'tor :param self: self reference """ from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.FrameworkSystem.Client.Logger import gLogger gLogger.showHeaders( True ) self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.processPool = ProcessPool( 2, 4, 8, poolCallback = self.poolCallback, poolExceptionCallback = self.poolExceptionCallback ) self.processPool.daemonize() def poolCallback( self, taskID, taskResult ): self.log.always( "callback result for %s is %s" % ( taskID, taskResult ) ) def poolExceptionCallback( self, taskID, taskException ): self.log.always( "callback exception for %s is %s" % ( taskID, taskException ) ) def testCallableClass( self ): """ CallableClass and task time out test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint( 0, 5 ) * 10 raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableClass, taskID = i, args = ( i, timeWait, raiseException ), timeOut = 15, usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableClass enqueued to task %s timeWait=%s exception=%s" % ( i, timeWait, raiseException ) ) i += 1 else: continue if i == 16: break self.processPool.finalize( 2 ) def testCallableFunc( self ): """ CallableFunc and task timeout test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) * 5 raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableFunc, taskID = i, args = ( i, timeWait, raiseException ), timeOut = 15, usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableFunc enqueued to task %s timeWait=%s exception=%s" % ( i, timeWait, raiseException ) ) i += 1 else: continue if i == 16: break self.processPool.finalize( 2 ) def testLockedClass( self ): """ LockedCallableClass and task time out test """ for loop in range(2): self.log.always( "loop %s" % loop ) i = 0 while i < 16: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) * 5 raiseException = False if timeWait == 5: raiseException = True klass = CallableClass if timeWait >= 20: klass = LockedCallableClass result = self.processPool.createAndQueueTask( klass, taskID = i, args = ( i, timeWait, raiseException ), timeOut = 15, usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("%s enqueued to task %s timeWait=%s exception=%s" % ( klass.__name__ , i, timeWait, raiseException ) ) i += 1 else: continue self.log.always("being idle for a while") for i in range(100000): for j in range(1000): pass self.log.always("finalizing...") self.processPool.finalize( 10 ) ## unlock gLock.release()
class ProcessPoolCallbacksTests( unittest.TestCase ): """ .. class:: ProcessPoolCallbacksTests test case for ProcessPool """ def setUp( self ): """c'tor :param self: self reference """ from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.FrameworkSystem.Client.Logger import gLogger gLogger.showHeaders( True ) self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.processPool = ProcessPool( 4, 8, 8, poolCallback = self.poolCallback, poolExceptionCallback = self.poolExceptionCallback ) self.processPool.daemonize() def poolCallback( self, taskID, taskResult ): self.log.always( "callback for %s result is %s" % ( taskID, taskResult ) ) def poolExceptionCallback( self, taskID, taskException ): self.log.always( "callback for %s exception is %s" % ( taskID, taskException ) ) def testCallableClass( self ): """ CallableClass and pool callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableClass, taskID = i, args = ( i, timeWait, raiseException ), usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableClass enqueued to task %s" % i ) i += 1 else: continue if i == 10: break self.processPool.finalize( 2 ) def testCallableFunc( self ): """ CallableFunc and pool callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableFunc, taskID = i, args = ( i, timeWait, raiseException ), usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableFunc enqueued to task %s" % i ) i += 1 else: continue if i == 10: break self.processPool.finalize( 2 )
class PoolComputingElement(ComputingElement): mandatoryParameters = MandatoryParameters ############################################################################# def __init__(self, ceUniqueID): """ Standard constructor. """ ComputingElement.__init__(self, ceUniqueID) self.ceType = "Pool" self.log = gLogger.getSubLogger('Pool') self.submittedJobs = 0 self.processors = 1 self.pPool = None self.taskID = 0 self.processorsPerTask = {} self.userNumberPerTask = {} self.useSudo = False ############################################################################# def _addCEConfigDefaults(self): """Method to make sure all necessary Configuration Parameters are defined """ # First assure that any global parameters are loaded ComputingElement._addCEConfigDefaults(self) def _reset(self): """ Update internal variables after some extra parameters are added :return: None """ self.processors = int(self.ceParameters.get('NumberOfProcessors', self.processors)) self.ceParameters['MaxTotalJobs'] = self.processors self.useSudo = self.ceParameters.get('SudoExecution', False) def getProcessorsInUse(self): """ Get the number of currently allocated processor cores :return: number of processor cores """ processorsInUse = 0 for task in self.processorsPerTask: processorsInUse += self.processorsPerTask[task] return processorsInUse ############################################################################# def submitJob(self, executableFile, proxy, **kwargs): """ Method to submit job. :param str executableFile: location of the executable file :param str proxy: payload proxy :return: S_OK/S_ERROR of the result of the job submission """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() processorsInUse = self.getProcessorsInUse() if kwargs.get('wholeNode'): if processorsInUse > 0: return S_ERROR('Can not take WholeNode job') # , %d/%d slots used' % (self.slotsInUse,self.slots) ) else: requestedProcessors = self.processors elif "numberOfProcessors" in kwargs: requestedProcessors = int(kwargs['numberOfProcessors']) if requestedProcessors > 0: if (processorsInUse + requestedProcessors) > self.processors: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedProcessors, self.processors - processorsInUse)) else: requestedProcessors = 1 if self.processors - processorsInUse < requestedProcessors: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedProcessors, self.processors - processorsInUse)) ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) kwargs = {'UseSudo': False} if self.useSudo: for nUser in range(MAX_NUMBER_OF_SUDO_UNIX_USERS): if nUser not in self.userNumberPerTask.values(): break kwargs['NUser'] = nUser kwargs['PayloadUser'] = os.environ['USER'] + 'p%s' % str(nUser).zfill(2) kwargs['UseSudo'] = True result = self.pPool.createAndQueueTask(executeJob, args=(executableFile, proxy, self.taskID), kwargs=kwargs, taskID=self.taskID, usePoolCallbacks=True) self.processorsPerTask[self.taskID] = requestedProcessors self.taskID += 1 self.pPool.processResults() return result def finalizeJob(self, taskID, result): """ Finalize the job by updating the process utilisation counters :param int taskID: local PoolCE task ID :param dict result: result of the job execution """ nProc = self.processorsPerTask.pop(taskID) if result['OK']: self.log.info('Task %d finished successfully, %d processor(s) freed' % (taskID, nProc)) else: self.log.error("Task failed submission", "%d, message: %s" % (taskID, result['Message'])) ############################################################################# def getCEStatus(self, jobIDList=None): """ Method to return information on running and pending jobs. :return: dictionary of numbers of jobs per status """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() result = S_OK() result['SubmittedJobs'] = 0 nJobs = 0 for _j, value in self.processorsPerTask.iteritems(): if value > 0: nJobs += 1 result['RunningJobs'] = nJobs result['WaitingJobs'] = 0 processorsInUse = self.getProcessorsInUse() result['UsedProcessors'] = processorsInUse result['AvailableProcessors'] = self.processors - processorsInUse return result ############################################################################# def monitorProxy(self, pilotProxy, payloadProxy): """ Monitor the payload proxy and renew as necessary. :param str pilotProxy: location of the pilotProxy :param str payloadProxy: location of the payloadProxy """ return self._monitorProxy(pilotProxy, payloadProxy)
class TaskTimeOutTests( unittest.TestCase ): """ .. class:: TaskTimeOutTests test case for ProcessPool """ def setUp( self ): """c'tor :param self: self reference """ gLogger.showHeaders( True ) self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.processPool = ProcessPool( 2, 4, 8, poolCallback = self.poolCallback, poolExceptionCallback = self.poolExceptionCallback ) self.processPool.daemonize() def poolCallback( self, taskID, taskResult ): self.log.always( "callback result for %s is %s" % ( taskID, taskResult ) ) def poolExceptionCallback( self, taskID, taskException ): self.log.always( "callback exception for %s is %s" % ( taskID, taskException ) ) def testCallableClass( self ): """ CallableClass and task time out test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint( 0, 5 ) * 10 raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableClass, taskID = i, args = ( i, timeWait, raiseException ), timeOut = 15, usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableClass enqueued to task %s timeWait=%s exception=%s" % ( i, timeWait, raiseException ) ) i += 1 else: continue if i == 16: break self.processPool.finalize( 2 ) def testCallableFunc( self ): """ CallableFunc and task timeout test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) * 5 raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableFunc, taskID = i, args = ( i, timeWait, raiseException ), timeOut = 15, usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableFunc enqueued to task %s timeWait=%s exception=%s" % ( i, timeWait, raiseException ) ) i += 1 else: continue if i == 16: break self.processPool.finalize( 2 ) def testLockedClass( self ): """ LockedCallableClass and task time out test """ for loop in range(2): self.log.always( "loop %s" % loop ) i = 0 while i < 16: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) * 5 raiseException = False if timeWait == 5: raiseException = True klass = CallableClass if timeWait >= 20: klass = LockedCallableClass result = self.processPool.createAndQueueTask( klass, taskID = i, args = ( i, timeWait, raiseException ), timeOut = 15, usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("%s enqueued to task %s timeWait=%s exception=%s" % ( klass.__name__ , i, timeWait, raiseException ) ) i += 1 else: continue self.log.always("being idle for a while") for _ in range( 100000 ): for _ in range( 1000 ): pass self.log.always("finalizing...") self.processPool.finalize( 10 ) ## unlock gLock.release()
class ProcessPoolCallbacksTests( unittest.TestCase ): """ .. class:: ProcessPoolCallbacksTests test case for ProcessPool """ def setUp( self ): """c'tor :param self: self reference """ gLogger.showHeaders( True ) self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.processPool = ProcessPool( 4, 8, 8, poolCallback = self.poolCallback, poolExceptionCallback = self.poolExceptionCallback ) self.processPool.daemonize() def poolCallback( self, taskID, taskResult ): self.log.always( "callback for %s result is %s" % ( taskID, taskResult ) ) def poolExceptionCallback( self, taskID, taskException ): self.log.always( "callback for %s exception is %s" % ( taskID, taskException ) ) def testCallableClass( self ): """ CallableClass and pool callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableClass, taskID = i, args = ( i, timeWait, raiseException ), usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableClass enqueued to task %s" % i ) i += 1 else: continue if i == 10: break self.processPool.finalize( 2 ) def testCallableFunc( self ): """ CallableFunc and pool callbacks test """ i = 0 while True: if self.processPool.getFreeSlots() > 0: timeWait = random.randint(0, 5) raiseException = False if not timeWait: raiseException = True result = self.processPool.createAndQueueTask( CallableFunc, taskID = i, args = ( i, timeWait, raiseException ), usePoolCallbacks = True, blocking = True ) if result["OK"]: self.log.always("CallableFunc enqueued to task %s" % i ) i += 1 else: continue if i == 10: break self.processPool.finalize( 2 )
# 'lfc-lhcb-ro.in2p3.fr', # 'lfc-lhcb.grid.sara.nl', # 'lfclhcb.pic.es', # 'lhcb-lfc.gridpp.rl.ac.uk'] lfcHosts = ['prod-lfc-lhcb-ro.cern.ch'] # path = "/lhcb/LHCb" path = '/lhcb/user/c/chaen' print("Queueing task for directory", path, lfcHosts[0]) writerProc = Process(target=writer, args=('lfc_dfc.out', writerQueue, stopFlag)) writerProc.start() result = pPool.createAndQueueTask(processDir, [path, writerQueue, False, lfcHosts[0]], callback=finalizeDirectory) if not result['OK']: print("Failed queueing", path) for i in range(20): pPool.processResults() time.sleep(1) pPool.processAllResults(timeout=300) stopFlag.value = 1 writerQueue.put("Exit") writerProc.join()
class PoolComputingElement(ComputingElement): mandatoryParameters = MandatoryParameters ############################################################################# def __init__(self, ceUniqueID, cores=0): """ Standard constructor. """ ComputingElement.__init__(self, ceUniqueID) self.ceType = "Pool" self.submittedJobs = 0 if cores > 0: self.cores = cores else: self.cores = getNumberOfCores() self.pPool = ProcessPool(self.cores, self.cores, poolCallback=self.finalizeJob) self.taskID = 0 self.coresPerTask = {} ############################################################################# def _addCEConfigDefaults(self): """Method to make sure all necessary Configuration Parameters are defined """ # First assure that any global parameters are loaded ComputingElement._addCEConfigDefaults(self) def getCoresInUse(self): """ """ coresInUse = 0 for _task, cores in self.coresPerTask.items(): coresInUse += cores return coresInUse ############################################################################# def submitJob(self, executableFile, proxy, **kwargs): """ Method to submit job. """ self.pPool.processResults() coresInUse = self.getCoresInUse() if "WholeNode" in kwargs and kwargs['WholeNode']: if coresInUse > 0: return S_ERROR( 'Can not take WholeNode job' ) #, %d/%d slots used' % (self.slotsInUse,self.slots) ) else: requestedCores = self.cores elif "NumberOfCores" in kwargs: requestedCores = int(kwargs['NumberOfCores']) if requestedCores > 0: if (coresInUse + requestedCores) > self.cores: return S_ERROR( 'Not enough slots: requested %d, available %d' % (requestedCores, self.cores - coresInUse)) else: requestedCores = 1 if self.cores - coresInUse < requestedCores: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedCores, self.cores - coresInUse)) ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) result = self.pPool.createAndQueueTask( executeJob, [executableFile, proxy, self.taskID], None, self.taskID, usePoolCallbacks=True) self.taskID += 1 self.coresPerTask[self.taskID] = requestedCores self.pPool.processResults() return result def finalizeJob(self, taskID, result): """ Finalize the job """ del self.coresPerTask[taskID] ############################################################################# def getCEStatus(self): """ Method to return information on running and pending jobs. """ self.pPool.processResults() result = S_OK() result['SubmittedJobs'] = 0 nJobs = 0 for _j, value in self.coresPerTask.items(): if value > 0: nJobs += 1 result['RunningJobs'] = nJobs result['WaitingJobs'] = 0 coresInUse = self.getCoresInUse() result['UsedCores'] = coresInUse result['AvailableCores'] = self.cores - coresInUse return result ############################################################################# def monitorProxy(self, pilotProxy, payloadProxy): """ Monitor the payload proxy and renew as necessary. """ return self._monitorProxy(pilotProxy, payloadProxy)
class PoolComputingElement(ComputingElement): mandatoryParameters = MandatoryParameters ############################################################################# def __init__(self, ceUniqueID): """ Standard constructor. """ ComputingElement.__init__(self, ceUniqueID) self.ceType = "Pool" self.log = gLogger.getSubLogger('Pool') self.submittedJobs = 0 self.processors = 1 self.pPool = None self.taskID = 0 self.processorsPerTask = {} self.userNumberPerTask = {} self.useSudo = False ############################################################################# def _addCEConfigDefaults(self): """Method to make sure all necessary Configuration Parameters are defined """ # First assure that any global parameters are loaded ComputingElement._addCEConfigDefaults(self) def _reset(self): self.processors = int(self.ceParameters.get('NumberOfProcessors', self.processors)) self.ceParameters['MaxTotalJobs'] = self.processors self.useSudo = self.ceParameters.get('SudoExecution', False) def getProcessorsInUse(self): """ """ processorsInUse = 0 for task in self.processorsPerTask: processorsInUse += self.processorsPerTask[task] return processorsInUse ############################################################################# def submitJob(self, executableFile, proxy, **kwargs): """ Method to submit job. """ if self.pPool is None: self.pPool = ProcessPool(minSize=self.processors, maxSize=self.processors, poolCallback=self.finalizeJob) self.pPool.processResults() processorsInUse = self.getProcessorsInUse() if kwargs.get('wholeNode'): if processorsInUse > 0: return S_ERROR('Can not take WholeNode job') # , %d/%d slots used' % (self.slotsInUse,self.slots) ) else: requestedProcessors = self.processors elif "numberOfProcessors" in kwargs: requestedProcessors = int(kwargs['numberOfProcessors']) if requestedProcessors > 0: if (processorsInUse + requestedProcessors) > self.processors: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedProcessors, self.processors - processorsInUse)) else: requestedProcessors = 1 if self.processors - processorsInUse < requestedProcessors: return S_ERROR('Not enough slots: requested %d, available %d' % (requestedProcessors, self.processors - processorsInUse)) ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) kwargs = {'UseSudo': False} if self.useSudo: for nUser in range(MAX_NUMBER_OF_SUDO_UNIX_USERS): if nUser not in self.userNumberPerTask.values(): break kwargs['NUser'] = nUser kwargs['PayloadUser'] = os.environ['USER'] + 'p%s' % str(nUser).zfill(2) kwargs['UseSudo'] = True result = self.pPool.createAndQueueTask(executeJob, args=(executableFile, proxy, self.taskID), kwargs=kwargs, taskID=self.taskID, usePoolCallbacks=True) self.processorsPerTask[self.taskID] = requestedProcessors self.taskID += 1 self.pPool.processResults() return result def finalizeJob(self, taskID, result): """ Finalize the job """ nProc = self.processorsPerTask.pop(taskID) if result['OK']: self.log.info('Task %d finished successfully, %d processor(s) freed' % (taskID, nProc)) else: self.log.error("Task failed submission", "%d, message: %s" % (taskID, result['Message'])) ############################################################################# def getCEStatus(self, jobIDList=None): """ Method to return information on running and pending jobs. """ self.pPool.processResults() result = S_OK() result['SubmittedJobs'] = 0 nJobs = 0 for _j, value in self.processorsPerTask.iteritems(): if value > 0: nJobs += 1 result['RunningJobs'] = nJobs result['WaitingJobs'] = 0 processorsInUse = self.getProcessorsInUse() result['UsedProcessors'] = processorsInUse result['AvailableProcessors'] = self.processors - processorsInUse return result ############################################################################# def monitorProxy(self, pilotProxy, payloadProxy): """ Monitor the payload proxy and renew as necessary. """ return self._monitorProxy(pilotProxy, payloadProxy)
class PoolComputingElement( ComputingElement ): mandatoryParameters = MandatoryParameters ############################################################################# def __init__( self, ceUniqueID, cores = 0 ): """ Standard constructor. """ ComputingElement.__init__( self, ceUniqueID ) self.ceType = "Pool" self.submittedJobs = 0 if cores > 0: self.cores = cores else: self.cores = getNumberOfCores() self.pPool = ProcessPool( self.cores, self.cores, poolCallback = self.finalizeJob ) self.taskID = 0 self.coresPerTask = {} ############################################################################# def _addCEConfigDefaults( self ): """Method to make sure all necessary Configuration Parameters are defined """ # First assure that any global parameters are loaded ComputingElement._addCEConfigDefaults( self ) def getCoresInUse( self ): """ """ coresInUse = 0 for _task, cores in self.coresPerTask.items(): coresInUse += cores return coresInUse ############################################################################# def submitJob( self, executableFile, proxy, **kwargs ): """ Method to submit job, should be overridden in sub-class. """ self.pPool.processResults() coresInUse = self.getCoresInUse() if "WholeNode" in kwargs and kwargs['WholeNode']: if coresInUse > 0: return S_ERROR('Can not take WholeNode job, %d/%d slots used' % (self.slotsInUse,self.slots) ) else: requestedCores = self.cores elif "NumberOfCores" in kwargs: requestedCores = int( kwargs['NumberOfCores'] ) if requestedCores > 0: if (coresInUse + requestedCores) > self.cores: return S_ERROR( 'Not enough slots: requested %d, available %d' % ( requestedCores, self.cores-coresInUse) ) else: requestedCores = 1 if self.cores - coresInUse < requestedCores: return S_ERROR( 'Not enough slots: requested %d, available %d' % ( requestedCores, self.cores-coresInUse) ) ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice( 'Pilot Proxy:', pilotProxy ) result = self.pPool.createAndQueueTask( executeJob, [executableFile,proxy,self.taskID],None, self.taskID, usePoolCallbacks = True ) self.taskID += 1 self.coresPerTask[self.taskID] = requestedCores self.pPool.processResults() return result def finalizeJob( self, taskID, result ): """ Finalize the job """ del self.coresPerTask[taskID] ############################################################################# def getCEStatus( self ): """ Method to return information on running and pending jobs. """ self.pPool.processResults() result = S_OK() result['SubmittedJobs'] = 0 nJobs = 0 for _j, value in self.coresPerTask.items(): if value > 0: nJobs += 1 result['RunningJobs'] = nJobs result['WaitingJobs'] = 0 coresInUse = self.getCoresInUse() result['UsedCores'] = coresInUse result['AvailableCores'] = self.cores - coresInUse return result ############################################################################# def monitorProxy( self, pilotProxy, payloadProxy ): """ Monitor the payload proxy and renew as necessary. """ return self._monitorProxy( pilotProxy, payloadProxy )