Beispiel #1
0
 def setupInterface(self):
     # parse config
     for configStr in jedi_config.ddm.modConfig.split(','):
         configStr = configStr.strip()
         items = configStr.split(':')
         # check format
         active = True
         try:
             vo = items[0]
             maxSize = int(items[1])
             moduleName = items[2]
             className = items[3]
             if len(items) >= 5:
                 group = items[4]
                 if not group:
                     group = None
             else:
                 group = None
             if len(items) >= 6 and items[5] == 'off':
                 active = False
         except Exception:
             # TODO add config error message
             continue
         # add VO interface
         if active:
             voIF = Interaction.CommandSendInterface(
                 vo, maxSize, moduleName, className)
             voIF.initialize()
         else:
             voIF = None
         key = self.get_dict_key(vo, group)
         self.interfaceMap[key] = voIF
 def setupInterface(self):
     vo = 'any'
     maxSize = jedi_config.db.nWorkers
     moduleName = 'pandajedi.jedicore.JediTaskBuffer'
     className = 'JediTaskBuffer'
     self.interface = Interaction.CommandSendInterface(
         vo, maxSize, moduleName, className)
     self.interface.initialize()
Beispiel #3
0
 def setupInterface(self):
     # parse config
     for configStr in jedi_config.ddm.modConfig.split(','):
         configStr = configStr.strip()
         items = configStr.split(':')
         # check format
         try:
             vo = items[0]
             maxSize = int(items[1])
             moduleName = items[2]
             className = items[3]
         except:
             # TODO add config error message
             continue
         # add VO interface
         voIF = Interaction.CommandSendInterface(vo, maxSize, moduleName,
                                                 className)
         voIF.initialize()
         self.interfaceMap[vo] = voIF
            self.taskSpec.splitRule = tmpStr
        else:
            tmpMatch = re.search(valName + '=(-*\d+)', self.taskSpec.splitRule)
            if tmpMatch == None:
                # append
                self.taskSpec.splitRule += ',{0}'.format(tmpStr)
            else:
                # replace
                self.taskSpec.splitRule = re.sub(valName + '=(-*\d+)', tmpStr,
                                                 self.taskSpec.splitRule)
        return

    # get parameters for event service merging
    def getParamsForEventServiceMerging(self, taskParamMap):
        # no event service
        if not self.taskSpec.useEventService():
            return None
        # extract parameters
        transPath = 'UnDefined'
        jobParameters = 'UnDefined'
        if taskParamMap.has_key('esmergeSpec'):
            if taskParamMap['esmergeSpec'].has_key('transPath'):
                transPath = taskParamMap['esmergeSpec']['transPath']
            if taskParamMap['esmergeSpec'].has_key('jobParameters'):
                jobParameters = taskParamMap['esmergeSpec']['jobParameters']
        # return
        return '<PANDA_ESMERGE_TRF>' + transPath + '</PANDA_ESMERGE_TRF>' + '<PANDA_ESMERGE_JOBP>' + jobParameters + '</PANDA_ESMERGE_JOBP>'


Interaction.installSC(TaskRefinerBase)
from pandajedi.jedicore import Interaction


# base class for task setup
class TaskSetupperBase(object):
    def __init__(self, taskBufferIF, ddmIF):
        self.ddmIF = ddmIF
        self.taskBufferIF = taskBufferIF
        self.refresh()

    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()


Interaction.installSC(TaskSetupperBase)
Beispiel #6
0
        # start communication channel
        import threading
        thr = threading.Thread(target=self.startImpl)
        thr.start()

    # implementation of start()
    def startImpl(self):
        try:
            Interaction.CommandReceiveInterface.start(self)
        except:
            errtype, errvalue = sys.exc_info()[:2]
            self.logger.error('crashed in JediKnight.startImpl() with %s %s' %
                              (errtype.__name__, errvalue))

    # parse init params
    def parseInit(self, par):
        if isinstance(par, list):
            return par
        try:
            return par.split('|')
        except:
            return [par]

    # sleep to avoid synchronization of loop
    def randomSleep(self, minVal=0, maxVal=30):
        time.sleep(random.randint(minVal, maxVal))


# install SCs
Interaction.installSC(JediKnight)
        # finish tasks when goal is reached
        tmpLog.info('finish achieved tasks for vo={0} label={1}'.format(
            vo, prodSourceLabel))
        tmpRet = self.taskBufferIF.getAchievedTasks_JEDI(
            vo, prodSourceLabel, jedi_config.watchdog.waitForAchieved)
        if tmpRet is None:
            # failed
            tmpLog.error('failed to finish')
        else:
            for jediTaskID in tmpRet:
                self.taskBufferIF.sendCommandTaskPanda(jediTaskID,
                                                       'JEDI. Goal reached',
                                                       True,
                                                       'finish',
                                                       comQualifier='soft')
            tmpLog.info('finished {0} tasks'.format(tmpRet))
        # rescue unlocked tasks with picked files
        tmpLog.info(
            'rescue unlocked tasks with picked files for vo={0} label={1}'.
            format(vo, prodSourceLabel))
        tmpRet = self.taskBufferIF.rescueUnLockedTasksWithPicked_JEDI(
            vo, prodSourceLabel, 60, pid)
        if tmpRet is None:
            # failed
            tmpLog.error('failed to rescue unlocked tasks')
        else:
            tmpLog.info('rescue unlocked {0} tasks'.format(tmpRet))


Interaction.installSC(TypicalWatchDogBase)
from pandajedi.jedicore import Interaction

# base class for task brokerge
class TaskBrokerBase(object):
    def __init__(self, taskBufferIF, ddmIF):
        self.ddmIF = ddmIF
        self.taskBufferIF = taskBufferIF
        self.refresh()

    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()


Interaction.installSC(TaskBrokerBase)
Beispiel #9
0
    def get_unified_sites(self, scan_site_list):
        unified_list = set()
        for tmpSiteName in scan_site_list:
            tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
            unifiedName = tmpSiteSpec.get_unified_name()
            unified_list.add(unifiedName)
        return tuple(unified_list)

    # get list of pseudo sites
    def get_pseudo_sites(self, unified_list, scan_site_list):
        unified_list = set(unified_list)
        pseudo_list = set()
        for tmpSiteName in scan_site_list:
            tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
            if tmpSiteSpec.get_unified_name() in unified_list:
                pseudo_list.add(tmpSiteName)
        return tuple(pseudo_list)

    # add pseudo sites to skip
    def add_pseudo_sites_to_skip(self, unified_dict, scan_site_list,
                                 skipped_dict):
        for tmpSiteName in scan_site_list:
            tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
            if tmpSiteSpec.get_unified_name() in unified_dict:
                skipped_dict[tmpSiteName] = unified_dict[
                    tmpSiteSpec.get_unified_name()]
        return skipped_dict


Interaction.installSC(JobBrokerBase)
Beispiel #10
0
        unified_list = set()
        for tmpSiteName in scan_site_list:
            tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
            unifiedName = tmpSiteSpec.get_unified_name()
            unified_list.add(unifiedName)
        return tuple(unified_list)


    # get list of pseudo sites
    def get_pseudo_sites(self, unified_list, scan_site_list):
        unified_list = set(unified_list)
        pseudo_list = set()
        for tmpSiteName in scan_site_list:
            tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
            if tmpSiteSpec.get_unified_name() in unified_list:
                pseudo_list.add(tmpSiteName)
        return tuple(pseudo_list)



    # add pseudo sites to skip
    def add_pseudo_sites_to_skip(self, unified_dict, scan_site_list, skipped_dict):
        for tmpSiteName in scan_site_list:
            tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
            if tmpSiteSpec.get_unified_name() in unified_dict:
                skipped_dict[tmpSiteName] = unified_dict[tmpSiteSpec.get_unified_name()]
        return skipped_dict


Interaction.installSC(JobBrokerBase)
Beispiel #11
0
        self.maxNumJobs = None
        self.minPriority = None
        self.underNqLimit = False
        self.siteMapper = self.taskBufferIF.getSiteMapper()

    # set maximum number of jobs to be submitted
    def setMaxNumJobs(self, maxNumJobs):
        self.maxNumJobs = maxNumJobs

    # set min priority of jobs to be submitted
    def setMinPriority(self, minPriority):
        self.minPriority = minPriority

    # check throttle level
    def mergeThrottled(self, thrLevel):
        # un-leveled flag
        if thrLevel in [True, False]:
            return thrLevel
        return thrLevel > THR_LEVEL5

    # check if lack of jobs
    def lackOfJobs(self):
        return self.underNqLimit

    # not enough jobs are queued
    def notEnoughJobsQueued(self):
        self.underNqLimit = True


Interaction.installSC(JobThrottlerBase)
Beispiel #12
0
from pandajedi.jedicore import Interaction

# base class for task brokerge
class TaskBrokerBase (object):

    def __init__(self,taskBufferIF,ddmIF):
        self.ddmIF = ddmIF
        self.taskBufferIF = taskBufferIF
        self.refresh()



    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()



Interaction.installSC(TaskBrokerBase)                        
Beispiel #13
0
        thr.start()
        

    # implementation of start()
    def startImpl(self):
        try:
            Interaction.CommandReceiveInterface.start(self)
        except:
            errtype,errvalue = sys.exc_info()[:2]
            self.logger.error('crashed in JediKnight.startImpl() with %s %s' % (errtype.__name__,errvalue))


    # parse init params
    def parseInit(self,par):
        if isinstance(par,list):
            return par
        try:
            return par.split('|')
        except:
            return [par]


    # sleep to avoid synchronization of loop
    def randomSleep(self,minVal=0,maxVal=30):
        time.sleep(random.randint(minVal,maxVal))


            
# install SCs
Interaction.installSC(JediKnight)
Beispiel #14
0
from pandajedi.jedicore import Interaction

# base class for watchdog
class WatchDogBase (object):

    # constructor
    def __init__(self,taskBufferIF,ddmIF):
        self.ddmIF = ddmIF
        self.taskBufferIF = taskBufferIF
        self.refresh()



    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()



Interaction.installSC(WatchDogBase)
Beispiel #15
0
from pandajedi.jedicore import Interaction

# base class for task generator
class TaskGeneratorBase (object):

    def __init__(self,taskBufferIF,ddmIF):
        self.ddmIF = ddmIF
        self.taskBufferIF = taskBufferIF
        self.refresh()



    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()



Interaction.installSC(TaskGeneratorBase)
Beispiel #16
0
from pandajedi.jedicore import Interaction


# base class for watchdog
class WatchDogBase(object):

    # constructor
    def __init__(self, taskBufferIF, ddmIF):
        self.taskBufferIF = taskBufferIF
        self.ddmIF = ddmIF
        self.refresh()

    # refresh
    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()

    # pre-action
    def pre_action(self, tmpLog, vo, prodSourceLabel, pid, *args, **kwargs):
        pass


Interaction.installSC(WatchDogBase)
Beispiel #17
0
                                              maxNumEventRanges=maxNumEventRanges,
                                              multiplicity=multiplicity,
                                              splitByFields=splitByFields,
                                              tmpLog=tmpLog,
                                              useDirectIO=useDirectIO,
                                              maxDiskSize=maxDiskSize)
            if subChunk == None:
                break
            if subChunk != []:
                # append
                subChunks.append(subChunk)
            iSubChunks += 1
        # append to return map if remain
        if subChunks != []:
            # get site names for parallel execution
            if taskSpec.getNumSitesPerJob() > 1 and not inputChunk.isMerging:
                siteName = inputChunk.getParallelSites(taskSpec.getNumSitesPerJob(),
                                                       nSubChunks,[siteName])
            returnList.append({'siteName':siteName,
                               'subChunks':subChunks,
                               'siteCandidate':siteCandidate,
                               })
            tmpLog.debug('split to %s subchunks' % len(subChunks))
        # return
        tmpLog.debug('done')
        return self.SC_SUCCEEDED,returnList



Interaction.installSC(JobSplitter)
Beispiel #18
0
            if allow_chunk_size_limit and strict_chunkSize and len(
                    subChunks) < nSubChunks:
                tmpLog.debug(
                    'skip splitting since chunk size {} is less than chunk size limit {} at {}'
                    .format(len(subChunks), nSubChunks, siteName))
                inputChunk.rollback_file_usage()
                isSkipped = True
            else:
                # get site names for parallel execution
                if taskSpec.getNumSitesPerJob(
                ) > 1 and not inputChunk.isMerging:
                    siteName = inputChunk.getParallelSites(
                        taskSpec.getNumSitesPerJob(), nSubChunks, [siteName])
                returnList.append({
                    'siteName': siteName,
                    'subChunks': subChunks,
                    'siteCandidate': siteCandidate,
                })
                try:
                    gshare = taskSpec.gshare.replace(' ', '_')
                except Exception:
                    gshare = None
                tmpLog.info('split to nJobs=%s at site=%s gshare=%s' %
                            (len(subChunks), siteName, gshare))
        # return
        tmpLog.debug('done')
        return self.SC_SUCCEEDED, returnList, isSkipped


Interaction.installSC(JobSplitter)
Beispiel #19
0
        # check goal only
        if checkGoal:
            # no goal
            if taskSpec.goal != None and taskCompleteness >= taskGoal:
                return True
            return False
        # return status
        return status

    # pre-check
    def doPreCheck(self, taskSpec, tmpLog):
        # send task to exhausted
        if taskSpec.useExhausted() and not taskSpec.status in ['passed'] \
                and self.getFinalTaskStatus(taskSpec) in ['finished'] \
                and not self.getFinalTaskStatus(taskSpec,checkParent=False) in ['done'] \
                and not self.getFinalTaskStatus(taskSpec,checkGoal=True):
            taskSpec.status = 'exhausted'
            taskSpec.lockedBy = None
            taskSpec.lockedTime = None
            # update task
            tmpLog.info('set task.status={0}'.format(taskSpec.status))
            self.taskBufferIF.updateTask_JEDI(
                taskSpec, {'jediTaskID': taskSpec.jediTaskID}, updateDEFT=True)
            # kick child tasks
            self.taskBufferIF.kickChildTasks_JEDI(taskSpec.jediTaskID)
            return True
        return False


Interaction.installSC(PostProcessorBase)
Beispiel #20
0
from pandajedi.jedicore import Interaction

# base class for job throttle
class JobThrottlerBase(object):
    def __init__(self, taskBufferIF):
        self.taskBufferIF = taskBufferIF
        # returns
        self.retTmpError = self.SC_FAILED, True
        self.retThrottled = self.SC_SUCCEEDED, True
        self.retUnThrottled = self.SC_SUCCEEDED, False
        # limit
        self.maxNumJobs = None
        self.minPriority = None
        self.refresh()

    # refresh
    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()

    # set maximum number of jobs to be submitted
    def setMaxNumJobs(self, maxNumJobs):
        self.maxNumJobs = maxNumJobs

    # set min priority of jobs to be submitted
    def setMinPriority(self, minPriority):
        self.minPriority = minPriority


Interaction.installSC(JobThrottlerBase)
Beispiel #21
0
                # append
                self.taskSpec.splitRule += ',{0}'.format(tmpStr)
            else:
                # replace
                self.taskSpec.splitRule = re.sub(valName+'=(-*\d+)',
                                                 tmpStr,
                                                 self.taskSpec.splitRule)
        return    



    # get parameters for event service merging
    def getParamsForEventServiceMerging(self,taskParamMap):
        # no event service
        if not self.taskSpec.useEventService():
            return None
        # extract parameters
        transPath = 'UnDefined'
        jobParameters = 'UnDefined'
        if taskParamMap.has_key('esmergeSpec'):
            if taskParamMap['esmergeSpec'].has_key('transPath'):
                transPath = taskParamMap['esmergeSpec']['transPath']
            if taskParamMap['esmergeSpec'].has_key('jobParameters'):
                jobParameters = taskParamMap['esmergeSpec']['jobParameters']
        # return
        return '<PANDA_ESMERGE_TRF>'+transPath+'</PANDA_ESMERGE_TRF>'+'<PANDA_ESMERGE_JOBP>'+jobParameters+'</PANDA_ESMERGE_JOBP>'

        
    
Interaction.installSC(TaskRefinerBase)
Beispiel #22
0
from pandajedi.jedicore import Interaction

# base class for task setup
class TaskSetupperBase (object):

    def __init__(self,taskBufferIF,ddmIF):
        self.ddmIF = ddmIF
        self.taskBufferIF = taskBufferIF
        self.refresh()



    def refresh(self):
        self.siteMapper = self.taskBufferIF.getSiteMapper()



Interaction.installSC(TaskSetupperBase)
            except:
                errType,errValue = sys.exc_info()[:2]
                if iTry+1 < nTry:
                    # sleep for retry
                    tmpLog.debug("sleep {0} due to {1}:{2}".format(iTry,errType,errValue))
                    time.sleep(30)
                else:
                    tmpLog.error("failed to send notification with {0}:{1}".format(errType,errValue))
                    if fileBackUp:
                        # write to file which is processed in add.py
                        mailFile = '{0}/jmail_{1}_{2}' % (panda_config.logdir,jediTaskID,commands.getoutput('uuidgen'))
                        oMail = open(mailFile,"w")
                        oMail.write(str(jediTaskID)+'\n'+toAdd+'\n'+msgBody)
                        oMail.close()
                break
        try:
            smtplib.stderr = org_smtpstderr
        except:
            pass



    # return email sender
    def senderAddress(self):
        return panda_config.emailSender



    
Interaction.installSC(PostProcessorBase)
Beispiel #24
0
 def doBrokerage(self, taskSpec, cloudName, inputChunk, taskParamMap):
     # make logger
     tmpLog = MsgWrapper(logger,
                         '<jediTaskID={0}>'.format(taskSpec.jediTaskID))
     tmpLog.debug('start')
     # return for failure
     retFatal = self.SC_FATAL, inputChunk
     retTmpError = self.SC_FAILED, inputChunk
     # set cloud
     try:
         if not taskParamMap:
             taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(
                 taskSpec.jediTaskID)
             taskParamMap = RefinerUtils.decodeJSON(taskParam)
         if not taskSpec.cloud and 'cloud' in taskParamMap:
             taskSpec.cloud = taskParamMap['cloud']
     except Exception:
         pass
     # get sites in the cloud
     site_preassigned = True
     if taskSpec.site not in ['', None]:
         tmpLog.debug('site={0} is pre-assigned'.format(taskSpec.site))
         if self.siteMapper.checkSite(taskSpec.site):
             scanSiteList = [taskSpec.site]
         else:
             scanSiteList = []
             for tmpSite in self.siteMapper.getCloud(
                     taskSpec.cloud)['sites']:
                 if re.search(taskSpec.site, tmpSite):
                     scanSiteList.append(tmpSite)
             if not scanSiteList:
                 tmpLog.error('unknown site={}'.format(taskSpec.site))
                 taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
                 return retTmpError
     elif inputChunk.getPreassignedSite() is not None:
         scanSiteList = [inputChunk.getPreassignedSite()]
         tmpLog.debug('site={0} is pre-assigned in masterDS'.format(
             inputChunk.getPreassignedSite()))
     else:
         site_preassigned = False
         scanSiteList = self.siteMapper.getCloud(taskSpec.cloud)['sites']
         # remove NA
         if 'NA' in scanSiteList:
             scanSiteList.remove('NA')
         tmpLog.debug('cloud=%s has %s candidates' %
                      (taskSpec.cloud, len(scanSiteList)))
     tmpLog.debug('initial {0} candidates'.format(len(scanSiteList)))
     ######################################
     # selection for status and PandaSite
     newScanSiteList = []
     for tmpSiteName in scanSiteList:
         tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
         # check site status
         if tmpSiteSpec.status != 'online' and not site_preassigned:
             tmpLog.debug('  skip %s due to status=%s' %
                          (tmpSiteName, tmpSiteSpec.status))
             continue
         # check PandaSite
         if 'PandaSite' in taskParamMap and taskParamMap['PandaSite']:
             if tmpSiteSpec.pandasite != taskParamMap['PandaSite']:
                 tmpLog.debug('  skip %s due to wrong PandaSite=%s <> %s' %
                              (tmpSiteName, tmpSiteSpec.pandasite,
                               taskParamMap['PandaSite']))
                 continue
         newScanSiteList.append(tmpSiteName)
     scanSiteList = newScanSiteList
     tmpLog.debug('{0} candidates passed site status check'.format(
         len(scanSiteList)))
     if scanSiteList == []:
         tmpLog.error('no candidates')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     ######################################
     # selection for scratch disk
     minDiskCountS = taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize(
     ) + inputChunk.getMaxAtomSize()
     minDiskCountS = minDiskCountS // 1024 // 1024
     # size for direct IO sites
     if taskSpec.useLocalIO():
         minDiskCountR = minDiskCountS
     else:
         minDiskCountR = taskSpec.getOutDiskSize(
         ) + taskSpec.getWorkDiskSize()
         minDiskCountR = minDiskCountR // 1024 // 1024
     newScanSiteList = []
     for tmpSiteName in scanSiteList:
         tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
         # check at the site
         if tmpSiteSpec.maxwdir:
             if JediCoreUtils.use_direct_io_for_job(taskSpec, tmpSiteSpec,
                                                    inputChunk):
                 minDiskCount = minDiskCountR
             else:
                 minDiskCount = minDiskCountS
             if minDiskCount > tmpSiteSpec.maxwdir:
                 tmpLog.debug(
                     '  skip {0} due to small scratch disk={1} < {2}'.
                     format(tmpSiteName, tmpSiteSpec.maxwdir, minDiskCount))
                 continue
         newScanSiteList.append(tmpSiteName)
     scanSiteList = newScanSiteList
     tmpLog.debug('{0} candidates passed scratch disk check'.format(
         len(scanSiteList)))
     if scanSiteList == []:
         tmpLog.error('no candidates')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     ######################################
     # selection for available space in SE
     newScanSiteList = []
     for tmpSiteName in scanSiteList:
         # check at the site
         tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
         # free space must be >= 200GB
         diskThreshold = 200
         tmpSpaceSize = tmpSiteSpec.space
         if tmpSiteSpec.space and tmpSpaceSize < diskThreshold:
             tmpLog.debug(
                 '  skip {0} due to disk shortage in SE = {1} < {2}GB'.
                 format(tmpSiteName, tmpSiteSpec.space, diskThreshold))
             continue
         newScanSiteList.append(tmpSiteName)
     scanSiteList = newScanSiteList
     tmpLog.debug('{0} candidates passed SE space check'.format(
         len(scanSiteList)))
     if scanSiteList == []:
         tmpLog.error('no candidates')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     ######################################
     # selection for walltime
     minWalltime = taskSpec.walltime
     if minWalltime not in [0, None]:
         newScanSiteList = []
         for tmpSiteName in scanSiteList:
             tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
             # check at the site
             if tmpSiteSpec.maxtime != 0 and minWalltime > tmpSiteSpec.maxtime:
                 tmpLog.debug(
                     '  skip {0} due to short site walltime={1}(site upper limit) < {2}'
                     .format(tmpSiteName, tmpSiteSpec.maxtime, minWalltime))
                 continue
             if tmpSiteSpec.mintime != 0 and minWalltime < tmpSiteSpec.mintime:
                 tmpLog.debug(
                     '  skip {0} due to short job walltime={1}(site lower limit) > {2}'
                     .format(tmpSiteName, tmpSiteSpec.mintime, minWalltime))
                 continue
             newScanSiteList.append(tmpSiteName)
         scanSiteList = newScanSiteList
         tmpLog.debug('{0} candidates passed walltime check ={1}{2}'.format(
             len(scanSiteList), minWalltime, taskSpec.walltimeUnit))
         if scanSiteList == []:
             tmpLog.error('no candidates')
             taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
             return retTmpError
     ######################################
     # selection for memory
     origMinRamCount = inputChunk.getMaxRamCount()
     if not site_preassigned and origMinRamCount:
         newScanSiteList = []
         for tmpSiteName in scanSiteList:
             tmpSiteSpec = self.siteMapper.getSite(tmpSiteName)
             # job memory requirement
             if taskSpec.ramPerCore():
                 minRamCount = origMinRamCount * (
                     tmpSiteSpec.coreCount if tmpSiteSpec.coreCount else 1)
                 minRamCount += (taskSpec.baseRamCount
                                 if taskSpec.baseRamCount else 0)
             else:
                 minRamCount = origMinRamCount
             # site max memory requirement
             site_maxmemory = tmpSiteSpec.maxrss if tmpSiteSpec.maxrss else 0
             # check at the site
             if site_maxmemory and minRamCount and minRamCount > site_maxmemory:
                 tmpMsg = '  skip site={0} due to site RAM shortage {1}(site upper limit) less than {2} '.format(
                     tmpSiteName, site_maxmemory, minRamCount)
                 tmpLog.debug(tmpMsg)
                 continue
             # site min memory requirement
             site_minmemory = tmpSiteSpec.minrss if tmpSiteSpec.minrss else 0
             if site_minmemory and minRamCount and minRamCount < site_minmemory:
                 tmpMsg = '  skip site={0} due to job RAM shortage {1}(site lower limit) greater than {2} '.format(
                     tmpSiteName, site_minmemory, minRamCount)
                 tmpLog.info(tmpMsg)
                 continue
             newScanSiteList.append(tmpSiteName)
         scanSiteList = newScanSiteList
         tmpLog.debug('{0} candidates passed memory check'.format(
             len(scanSiteList)))
         if scanSiteList == []:
             tmpLog.error('no candidates')
             taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
             return retTmpError
     ######################################
     # selection for nPilot
     nWNmap = self.taskBufferIF.getCurrentSiteData()
     newScanSiteList = []
     for tmpSiteName in scanSiteList:
         # check at the site
         nPilot = 0
         if tmpSiteName in nWNmap:
             nPilot = nWNmap[tmpSiteName]['getJob'] + nWNmap[tmpSiteName][
                 'updateJob']
         if nPilot == 0 and taskSpec.prodSourceLabel not in ['test']:
             tmpLog.debug('  skip %s due to no pilot' % tmpSiteName)
             #continue
         newScanSiteList.append(tmpSiteName)
     scanSiteList = newScanSiteList
     tmpLog.debug('{0} candidates passed pilot activity check'.format(
         len(scanSiteList)))
     if scanSiteList == []:
         tmpLog.error('no candidates')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     ######################################
     # sites already used by task
     tmpSt, sitesUsedByTask = self.taskBufferIF.getSitesUsedByTask_JEDI(
         taskSpec.jediTaskID)
     if not tmpSt:
         tmpLog.error('failed to get sites which already used by task')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     ######################################
     # get list of available files
     availableFileMap = {}
     for datasetSpec in inputChunk.getDatasets():
         try:
             # get list of site to be scanned
             tmpLog.debug(
                 'getting the list of available files for {0}'.format(
                     datasetSpec.datasetName))
             fileScanSiteList = []
             for tmpPseudoSiteName in scanSiteList:
                 tmpSiteSpec = self.siteMapper.getSite(tmpPseudoSiteName)
                 tmpSiteName = tmpSiteSpec.get_unified_name()
                 if tmpSiteName in fileScanSiteList:
                     continue
                 fileScanSiteList.append(tmpSiteName)
             # mapping between sites and input storage endpoints
             siteStorageEP = AtlasBrokerUtils.getSiteInputStorageEndpointMap(
                 fileScanSiteList, self.siteMapper,
                 taskSpec.prodSourceLabel, None)
             # disable file lookup for merge jobs
             if inputChunk.isMerging:
                 checkCompleteness = False
             else:
                 checkCompleteness = True
             if not datasetSpec.isMaster():
                 useCompleteOnly = True
             else:
                 useCompleteOnly = False
             # get available files per site/endpoint
             tmpAvFileMap = self.ddmIF.getAvailableFiles(
                 datasetSpec,
                 siteStorageEP,
                 self.siteMapper,
                 check_completeness=checkCompleteness,
                 file_scan_in_container=False,
                 complete_only=useCompleteOnly)
             if tmpAvFileMap is None:
                 raise Interaction.JEDITemporaryError(
                     'ddmIF.getAvailableFiles failed')
             availableFileMap[datasetSpec.datasetName] = tmpAvFileMap
         except Exception as e:
             tmpLog.error('failed to get available files with {}'.format(e))
             taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
             return retTmpError
     ######################################
     # calculate weight
     tmpSt, jobStatPrioMap = self.taskBufferIF.getJobStatisticsByGlobalShare(
         taskSpec.vo)
     if not tmpSt:
         tmpLog.error('failed to get job statistics with priority')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     ######################################
     # final procedure
     tmpLog.debug('final {0} candidates'.format(len(scanSiteList)))
     weightMap = {}
     candidateSpecList = []
     preSiteCandidateSpec = None
     for tmpSiteName in scanSiteList:
         # get number of jobs in each job status. Using workQueueID=None to include non-JEDI jobs
         nRunning = AtlasBrokerUtils.getNumJobs(jobStatPrioMap, tmpSiteName,
                                                'running', None, None)
         nAssigned = AtlasBrokerUtils.getNumJobs(jobStatPrioMap,
                                                 tmpSiteName, 'defined',
                                                 None, None)
         nActivated = AtlasBrokerUtils.getNumJobs(jobStatPrioMap,
                                                  tmpSiteName, 'activated',
                                                  None, None)
         weight = float(nRunning + 1) / float(nActivated + nAssigned +
                                              1) / float(nAssigned + 1)
         # make candidate
         siteCandidateSpec = SiteCandidate(tmpSiteName)
         # set weight
         siteCandidateSpec.weight = weight
         # files
         for tmpDatasetName, availableFiles in six.iteritems(
                 availableFileMap):
             if tmpSiteName in availableFiles:
                 siteCandidateSpec.add_local_disk_files(
                     availableFiles[tmpSiteName]['localdisk'])
         # append
         if tmpSiteName in sitesUsedByTask:
             candidateSpecList.append(siteCandidateSpec)
         else:
             if weight not in weightMap:
                 weightMap[weight] = []
             weightMap[weight].append(siteCandidateSpec)
     # limit the number of sites
     maxNumSites = 5
     weightList = list(weightMap.keys())
     weightList.sort()
     weightList.reverse()
     for weightVal in weightList:
         if len(candidateSpecList) >= maxNumSites:
             break
         sitesWithWeight = weightMap[weightVal]
         random.shuffle(sitesWithWeight)
         candidateSpecList += sitesWithWeight[:(maxNumSites -
                                                len(candidateSpecList))]
     # collect site names
     scanSiteList = []
     for siteCandidateSpec in candidateSpecList:
         scanSiteList.append(siteCandidateSpec.siteName)
     # append candidates
     newScanSiteList = []
     for siteCandidateSpec in candidateSpecList:
         # append
         inputChunk.addSiteCandidate(siteCandidateSpec)
         newScanSiteList.append(siteCandidateSpec.siteName)
         tmpLog.debug('  use {} with weight={} nFiles={}'.format(
             siteCandidateSpec.siteName, siteCandidateSpec.weight,
             len(siteCandidateSpec.localDiskFiles)))
     scanSiteList = newScanSiteList
     if scanSiteList == []:
         tmpLog.error('no candidates')
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retTmpError
     # return
     tmpLog.debug('done')
     return self.SC_SUCCEEDED, inputChunk