def start(self):
     # start base classes
     JediKnight.start(self)
     FactoryBase.initializeMods(self,self.taskBufferIF,self.ddmIF)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # get logger
             tmpLog = MsgWrapper(logger)
             tmpLog.info('start')
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # prepare tasks to be finished
                     tmpLog.info('preparing tasks to be finished for vo={0} label={1}'.format(vo,prodSourceLabel))
                     tmpRet = self.taskBufferIF.prepareTasksToBeFinished_JEDI(vo,prodSourceLabel,
                                                                              jedi_config.postprocessor.nTasks,
                                                                              pid=self.pid)
                     if tmpRet == None:
                         # failed
                         tmpLog.error('failed to prepare tasks')
                     # get tasks to be finished
                     tmpLog.info('getting tasks to be finished') 
                     tmpList = self.taskBufferIF.getTasksToBeFinished_JEDI(vo,prodSourceLabel,self.pid,
                                                                           jedi_config.postprocessor.nTasks)
                     if tmpList == None: 
                         # failed
                         tmpLog.error('failed to get tasks to be finished')
                     else:
                         tmpLog.info('got {0} tasks'.format(len(tmpList)))
                         # put to a locked list
                         taskList = ListWithLock(tmpList)
                         # make thread pool
                         threadPool = ThreadPool()
                         # make workers
                         nWorker = jedi_config.postprocessor.nWorkers
                         for iWorker in range(nWorker):
                             thr = PostProcessorThread(taskList,threadPool,
                                                       self.taskBufferIF,
                                                       self.ddmIF,
                                                       self)
                             thr.start()
                         # join
                         threadPool.join()
             tmpLog.info('done')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.error('failed in {0}.start() with {1} {2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
         # sleep if needed
         loopCycle = 60
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
Beispiel #2
0
 def start(self):
     # start base classes
     JediKnight.start(self)
     FactoryBase.initializeMods(self, self.taskBufferIF, self.ddmIF)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # get logger
             tmpLog = MsgWrapper(logger)
             tmpLog.debug('start')
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # get the list of tasks to refine
                     tmpList = self.taskBufferIF.getTasksToRefine_JEDI(
                         vo, prodSourceLabel)
                     if tmpList is None:
                         # failed
                         tmpLog.error(
                             'failed to get the list of tasks to refine')
                     else:
                         tmpLog.debug('got {0} tasks'.format(len(tmpList)))
                         # put to a locked list
                         taskList = ListWithLock(tmpList)
                         # make thread pool
                         threadPool = ThreadPool()
                         # get work queue mapper
                         workQueueMapper = self.taskBufferIF.getWorkQueueMap(
                         )
                         # make workers
                         nWorker = jedi_config.taskrefine.nWorkers
                         for iWorker in range(nWorker):
                             thr = TaskRefinerThread(
                                 taskList, threadPool, self.taskBufferIF,
                                 self.ddmIF, self, workQueueMapper)
                             thr.start()
                         # join
                         threadPool.join()
         except Exception:
             errtype, errvalue = sys.exc_info()[:2]
             tmpLog.error('failed in {0}.start() with {1} {2}'.format(
                 self.__class__.__name__, errtype.__name__, errvalue))
             tmpLog.error('Traceback: {0}'.format(traceback.format_exc()))
         # sleep if needed
         loopCycle = jedi_config.taskrefine.loopCycle
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
         # randomize cycle
         self.randomSleep(max_val=loopCycle)
Beispiel #3
0
 def start(self):
     # start base classes
     JediKnight.start(self)
     FactoryBase.initializeMods(self, self.taskBufferIF, self.ddmIF)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # get logger
             tmpLog = MsgWrapper(logger)
             tmpLog.debug('start')
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # get the list of tasks to refine
                     tmpList = self.taskBufferIF.getTasksToRefine_JEDI(vo, prodSourceLabel)
                     if tmpList == None:
                         # failed
                         tmpLog.error('failed to get the list of tasks to refine')
                     else:
                         tmpLog.debug('got {0} tasks'.format(len(tmpList)))
                         # put to a locked list
                         taskList = ListWithLock(tmpList)
                         # make thread pool
                         threadPool = ThreadPool()
                         # get work queue mapper
                         workQueueMapper = self.taskBufferIF.getWorkQueueMap()
                         # make workers
                         nWorker = jedi_config.taskrefine.nWorkers
                         for iWorker in range(nWorker):
                             thr = TaskRefinerThread(taskList, threadPool,
                                                     self.taskBufferIF,
                                                     self.ddmIF,
                                                     self, workQueueMapper)
                             thr.start()
                         # join
                         threadPool.join()
         except:
             errtype, errvalue = sys.exc_info()[:2]
             tmpLog.error('failed in {0}.start() with {1} {2}'.format(self.__class__.__name__,
                                                                      errtype.__name__, errvalue))
             tmpLog.error('Traceback: {0}'.format(traceback.format_exc()))
         # sleep if needed
         loopCycle = jedi_config.taskrefine.loopCycle
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
         # randomize cycle
         self.randomSleep()
Beispiel #4
0
 def start(self):
     # start base classes
     JediKnight.start(self)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # get logger
             tmpLog = MsgWrapper(logger)
             tmpLog.debug('start')
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # get the list of tasks to exec command
                     tmpList = self.taskBufferIF.getTasksToExecCommand_JEDI(
                         vo, prodSourceLabel)
                     if tmpList == None:
                         # failed
                         tmpLog.error(
                             'failed to get the task list for vo={0} label={1}'
                             .format(vo, prodSourceLabel))
                     else:
                         tmpLog.debug('got {0} tasks'.format(len(tmpList)))
                         # put to a locked list
                         taskList = ListWithLock(tmpList)
                         # make thread pool
                         threadPool = ThreadPool()
                         # make workers
                         nWorker = jedi_config.taskrefine.nWorkers
                         for iWorker in range(nWorker):
                             thr = TaskCommandoThread(
                                 taskList, threadPool, self.taskBufferIF,
                                 self.ddmIF, self.pid)
                             thr.start()
                         # join
                         threadPool.join()
             tmpLog.debug('done')
         except:
             errtype, errvalue = sys.exc_info()[:2]
             tmpLog.error('failed in {0}.start() with {1} {2}'.format(
                 self.__class__.__name__, errtype.__name__, errvalue))
         # sleep if needed
         loopCycle = jedi_config.tcommando.loopCycle
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
         # randomize cycle
         self.randomSleep()
Beispiel #5
0
 def start(self):
     # start base class
     JediKnight.start(self)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # get the list of datasets to feed contents to DB
                     tmpList = self.taskBufferIF.getDatasetsToFeedContents_JEDI(
                         vo, prodSourceLabel)
                     if tmpList == None:
                         # failed
                         logger.error(
                             'failed to get the list of datasets to feed contents'
                         )
                     else:
                         logger.debug('got %s datasets' % len(tmpList))
                         # put to a locked list
                         dsList = ListWithLock(tmpList)
                         # make thread pool
                         threadPool = ThreadPool()
                         # make workers
                         nWorker = jedi_config.confeeder.nWorkers
                         for iWorker in range(nWorker):
                             thr = ContentsFeederThread(
                                 dsList, threadPool, self.taskBufferIF,
                                 self.ddmIF, self.pid)
                             thr.start()
                         # join
                         threadPool.join()
         except:
             errtype, errvalue = sys.exc_info()[:2]
             logger.error(
                 'failed in %s.start() with %s %s' %
                 (self.__class__.__name__, errtype.__name__, errvalue))
         # sleep if needed
         loopCycle = jedi_config.confeeder.loopCycle
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
         # randomize cycle
         self.randomSleep()
Beispiel #6
0
 def start(self):
     # start base class
     JediKnight.start(self)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # get the list of datasets to feed contents to DB
                     tmpList = self.taskBufferIF.getDatasetsToFeedContents_JEDI(vo,prodSourceLabel)
                     if tmpList == None:
                         # failed
                         logger.error('failed to get the list of datasets to feed contents')
                     else:
                         logger.debug('got %s datasets' % len(tmpList))
                         # put to a locked list
                         dsList = ListWithLock(tmpList)
                         # make thread pool
                         threadPool = ThreadPool() 
                         # make workers
                         nWorker = jedi_config.confeeder.nWorkers
                         for iWorker in range(nWorker):
                             thr = ContentsFeederThread(dsList,threadPool,
                                                        self.taskBufferIF,self.ddmIF,
                                                        self.pid)
                             thr.start()
                         # join
                         threadPool.join()
         except:
             errtype,errvalue = sys.exc_info()[:2]
             logger.error('failed in %s.start() with %s %s' % (self.__class__.__name__,errtype.__name__,errvalue))
         # sleep if needed
         loopCycle = jedi_config.confeeder.loopCycle
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
         # randomize cycle
         self.randomSleep()
Beispiel #7
0
 def process(self, msg_obj, decoded_data=None):
     # logger
     tmp_log = logger_utils.make_logger(base_logger, method_name='process')
     # start
     tmp_log.info('start')
     # parse
     if decoded_data is None:
         # json decode
         try:
             msg_dict = json.loads(msg_obj.data)
         except Exception as e:
             err_str = 'failed to parse message json {2} , skipped. {0} : {1}'.format(e.__class__.__name__, e,
                                                                                      msg_obj.data)
             tmp_log.error(err_str)
             raise
     else:
         msg_dict = decoded_data
     # run
     try:
         tmp_log.debug('got message {0}'.format(msg_dict))
         if msg_dict['msg_type'] == 'generate_job':
             # get task to generate jobs
             jediTaskID = int(msg_dict['taskid'])
             s, taskSpec = self.tbIF.getTaskWithID_JEDI(jediTaskID)
             if not taskSpec:
                 tmp_log.debug('unknown task {}'.format(jediTaskID))
             else:
                 # get WQ
                 vo = taskSpec.vo
                 prodSourceLabel = taskSpec.prodSourceLabel
                 workQueue = self.tbIF.getWorkQueueMap().getQueueWithIDGshare(taskSpec.workQueue_ID, taskSpec.gshare)
                 # get inputs
                 tmpList = self.tbIF.getTasksToBeProcessed_JEDI(self.pid, None, workQueue, None, None, nFiles=1000,
                                                                target_tasks=[jediTaskID])
                 if tmpList:
                     inputList = ListWithLock(tmpList)
                     # create thread
                     threadPool = ThreadPool()
                     siteMapper = self.tbIF.getSiteMapper()
                     taskSetupper = TaskSetupper(vo, prodSourceLabel)
                     taskSetupper.initializeMods(self.tbIF, self.ddmIF)
                     gen = JobGeneratorThread(inputList, threadPool, self.tbIF, self.ddmIF, siteMapper,
                                              True, taskSetupper, self.pid, workQueue, 'pjmsg',
                                              None, None, None, False)
                     gen.start()
                     gen.join()
         else:
             tmp_log.debug('unknown message type : {}'.format(msg_dict['msg_type']))
     except Exception as e:
         err_str = 'failed to run, skipped. {0} : {1}'.format(e.__class__.__name__, e)
         tmp_log.error(err_str)
         raise
     # done
     tmp_log.info('done')
Beispiel #8
0
 def doUpdateDataLocality(self):
     tmpLog = MsgWrapper(logger, ' #ATM #KV doUpdateDataLocality')
     tmpLog.debug('start')
     try:
         # lock
         got_lock = self.taskBufferIF.lockProcess_JEDI(  vo=self.vo, prodSourceLabel='default',
                                                         cloud=None, workqueue_id=None, resource_name=None,
                                                         component='AtlasDataLocalityUpdaterWatchDog.doUpdateDataLocality',
                                                         pid=self.pid, timeLimit=240)
         if not got_lock:
             tmpLog.debug('locked by another process. Skipped')
             return
         tmpLog.debug('got lock')
         # get list of datasets
         datasets_list = self.get_datasets_list()
         tmpLog.debug('got {0} datasets to update'.format(len(datasets_list)))
         # make thread pool
         thread_pool = ThreadPool()
         # make workers
         n_workers = 4
         for _ in range(n_workers):
             thr = DataLocalityUpdaterThread(taskDsList=datasets_list,
                                             threadPool=thread_pool,
                                             taskbufferIF=self.taskBufferIF,
                                             ddmIF=self.ddmIF,
                                             pid=self.pid,
                                             loggerObj=tmpLog)
             thr.start()
         tmpLog.debug('started {0} updater workers'.format(n_workers))
         # join
         thread_pool.join()
         # done
         tmpLog.debug('done')
     except Exception:
         errtype, errvalue = sys.exc_info()[:2]
         tmpLog.error('failed with {0} {1} {2}'.format(errtype, errvalue, traceback.format_exc()))
Beispiel #9
0
    def start(self):
        # start base classes
        JediKnight.start(self)
        FactoryBase.initializeMods(self, self.taskBufferIF, self.ddmIF)
        # go into main loop
        while True:
            startTime = datetime.datetime.utcnow()
            try:
                # get logger
                tmpLog = MsgWrapper(logger)
                tmpLog.debug('start TaskBroker')
                # get work queue mapper
                workQueueMapper = self.taskBufferIF.getWorkQueueMap()
                resource_types = self.taskBufferIF.load_resource_types()

                # loop over all vos
                for vo in self.vos:
                    # loop over all sourceLabels
                    for prodSourceLabel in self.prodSourceLabels:
                        # loop over all work queues
                        for workQueue in workQueueMapper.getAlignedQueueList(
                                vo, prodSourceLabel):
                            for resource_type in resource_types:
                                wq_name = '_'.join(
                                    workQueue.queue_name.split(' '))
                                msgLabel = 'vo={0} label={1} queue={2} resource_type={3}: '.\
                                    format(vo, prodSourceLabel, wq_name, resource_type.resource_name)
                                tmpLog.debug(msgLabel + 'start')
                                # get the list of tasks to check
                                tmpList = self.taskBufferIF.getTasksToCheckAssignment_JEDI(
                                    vo, prodSourceLabel, workQueue,
                                    resource_type.resource_name)
                                if tmpList is None:
                                    # failed
                                    tmpLog.error(
                                        msgLabel +
                                        'failed to get the list of tasks to check'
                                    )
                                else:
                                    tmpLog.debug(msgLabel +
                                                 'got tasks_to_check={0}'.
                                                 format(len(tmpList)))
                                    # put to a locked list
                                    taskList = ListWithLock(tmpList)
                                    # make thread pool
                                    threadPool = ThreadPool()
                                    # make workers
                                    nWorker = jedi_config.taskbroker.nWorkers
                                    for iWorker in range(nWorker):
                                        thr = TaskCheckerThread(
                                            taskList, threadPool,
                                            self.taskBufferIF, self.ddmIF,
                                            self, vo, prodSourceLabel)
                                        thr.start()
                                    # join
                                    threadPool.join()
                                # get the list of tasks to assign
                                tmpList = self.taskBufferIF.getTasksToAssign_JEDI(
                                    vo, prodSourceLabel, workQueue,
                                    resource_type.resource_name)
                                if tmpList is None:
                                    # failed
                                    tmpLog.error(
                                        msgLabel +
                                        'failed to get the list of tasks to assign'
                                    )
                                else:
                                    tmpLog.debug(msgLabel +
                                                 'got tasks_to_assign={0}'.
                                                 format(len(tmpList)))
                                    # put to a locked list
                                    taskList = ListWithLock(tmpList)
                                    # make thread pool
                                    threadPool = ThreadPool()
                                    # make workers
                                    nWorker = jedi_config.taskbroker.nWorkers
                                    for iWorker in range(nWorker):
                                        thr = TaskBrokerThread(
                                            taskList, threadPool,
                                            self.taskBufferIF, self.ddmIF,
                                            self, vo, prodSourceLabel,
                                            workQueue,
                                            resource_type.resource_name)
                                        thr.start()
                                    # join
                                    threadPool.join()
                                tmpLog.debug(msgLabel + 'done')
            except Exception:
                errtype, errvalue = sys.exc_info()[:2]
                tmpLog.error('failed in {0}.start() with {1} {2}'.format(
                    self.__class__.__name__, errtype.__name__, errvalue))
            tmpLog.debug('done')
            # sleep if needed
            loopCycle = jedi_config.taskbroker.loopCycle
            timeDelta = datetime.datetime.utcnow() - startTime
            sleepPeriod = loopCycle - timeDelta.seconds
            if sleepPeriod > 0:
                time.sleep(sleepPeriod)
            # randomize cycle
            self.randomSleep(max_val=loopCycle)
 def doBrokerage(self,inputList,vo,prodSourceLabel,workQueue):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal    = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2} nTasks={3}'.format(vo,prodSourceLabel,
                                                                 workQueue.queue_name,
                                                                 len(inputList)))
     # loop over all tasks
     allRwMap    = {}
     prioMap     = {}
     tt2Map      = {}
     expRWs      = {}
     jobSpecList = []
     for tmpJediTaskID,tmpInputList in inputList:
         for taskSpec,cloudName,inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec,inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID     = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel  = 'managed'
             jobSpec.processingType   = taskSpec.processingType
             jobSpec.workingGroup     = taskSpec.workingGroup
             jobSpec.metadata         = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority  = taskSpec.currentPriority
             jobSpec.maxDiskCount     = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID]  = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                    jobSpec.currentPriority) 
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error('failed to calculate RW for jediTaskID={0}'.format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (jobSpec.metadata,
                                                       str(rwValues),str(expRWs),
                                                       str(prioMap),str(fullRWs),
                                                       str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask+maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS,outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS,str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec,inputChunk in inputListWorld:
             if not taskSpec.currentPriority in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                         taskSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld,threadPool,
                                             self.taskBufferIF,ddmIF,
                                             fullRWs,liveCounter)
             thr.start()
         threadPool.join(60*10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED
Beispiel #11
0
datasetIDs = None
if len(sys.argv) > 2:
    datasetIDs = [int(sys.argv[2])]

s, taskSpec = tbIF.getTaskWithID_JEDI(jediTaskID)

cloudName = taskSpec.cloud
vo = taskSpec.vo
prodSourceLabel = taskSpec.prodSourceLabel
queueID = taskSpec.workQueue_ID
gshare_name = taskSpec.gshare

workQueue = tbIF.getWorkQueueMap().getQueueWithID(queueID, gshare_name)

threadPool = ThreadPool()

# get typical number of files
#typicalNumFilesMap = tbIF.getTypicalNumInput_JEDI(vo,prodSourceLabel,workQueue,
#                                                  useResultCache=600)

typicalNumFilesMap = {}

tmpListList = tbIF.getTasksToBeProcessed_JEDI(
    None,
    vo,
    workQueue,
    prodSourceLabel,
    cloudName,
    nFiles=10,
    simTasks=[jediTaskID],
Beispiel #12
0
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue,
                 resource_name):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug(
         'vo={0} label={1} queue={2} resource_name={3} nTasks={4}'.format(
             vo, prodSourceLabel, workQueue.queue_name, resource_name,
             len(inputList)))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec, inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) // 1024 // 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock is False and prodDBlock is not None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if jobSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW is None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
                 jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
                 str(fullRWs), str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(
             len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec, inputChunk in inputListWorld:
             if taskSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     taskSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld, threadPool,
                                             self.taskBufferIF, ddmIF,
                                             fullRWs, liveCounter,
                                             workQueue)
             thr.start()
         threadPool.join(60 * 10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED
Beispiel #13
0
 def start(self):
     # start base classes
     JediKnight.start(self)
     FactoryBase.initializeMods(self,self.taskBufferIF,self.ddmIF)
     # go into main loop
     while True:
         startTime = datetime.datetime.utcnow()
         try:
             # get logger
             tmpLog = MsgWrapper(logger)
             tmpLog.debug('start TaskBroker')
             # get work queue mapper
             workQueueMapper = self.taskBufferIF.getWorkQueueMap()
             # loop over all vos
             for vo in self.vos:
                 # loop over all sourceLabels
                 for prodSourceLabel in self.prodSourceLabels:
                     # loop over all work queues
                     for workQueue in workQueueMapper.getQueueListWithVoType(vo,prodSourceLabel):
                         msgLabel = 'vo={0} label={1} queue={2}: '.format(vo,prodSourceLabel,workQueue.queue_name)
                         tmpLog.debug(msgLabel+'start')
                         # get the list of tasks to check
                         tmpList = self.taskBufferIF.getTasksToCheckAssignment_JEDI(vo,prodSourceLabel,workQueue)
                         if tmpList == None:
                             # failed
                             tmpLog.error(msgLabel+'failed to get the list of tasks to check')
                         else:
                             tmpLog.debug(msgLabel+'got {0} tasks to check'.format(len(tmpList)))
                             # put to a locked list
                             taskList = ListWithLock(tmpList)
                             # make thread pool
                             threadPool = ThreadPool()
                             # make workers
                             nWorker = jedi_config.taskbroker.nWorkers
                             for iWorker in range(nWorker):
                                 thr = TaskCheckerThread(taskList,threadPool,
                                                         self.taskBufferIF,
                                                         self.ddmIF,self,
                                                         vo,prodSourceLabel)
                                 thr.start()
                             # join
                             threadPool.join()
                         # get the list of tasks to assign
                         tmpList = self.taskBufferIF.getTasksToAssign_JEDI(vo,prodSourceLabel,workQueue)
                         if tmpList == None:
                             # failed
                             tmpLog.error(msgLabel+'failed to get the list of tasks to assign')
                         else:
                             tmpLog.debug(msgLabel+'got {0} tasks to assign'.format(len(tmpList)))
                             # put to a locked list
                             taskList = ListWithLock(tmpList)
                             # make thread pool
                             threadPool = ThreadPool()
                             # make workers
                             nWorker = jedi_config.taskbroker.nWorkers
                             for iWorker in range(nWorker):
                                 thr = TaskBrokerThread(taskList,threadPool,
                                                        self.taskBufferIF,
                                                        self.ddmIF,self,
                                                        vo,prodSourceLabel,
                                                        workQueue)
                                 thr.start()
                             # join
                             threadPool.join()
                         tmpLog.debug(msgLabel+'done')
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.error('failed in {0}.start() with {1} {2}'.format(self.__class__.__name__,
                                                                      errtype.__name__,errvalue))
         tmpLog.debug('done')                                
         # sleep if needed
         loopCycle = jedi_config.taskbroker.loopCycle
         timeDelta = datetime.datetime.utcnow() - startTime
         sleepPeriod = loopCycle - timeDelta.seconds
         if sleepPeriod > 0:
             time.sleep(sleepPeriod)
         # randomize cycle
         self.randomSleep()