コード例 #1
0
 def doPreProRefine(self,taskParamMap):
     # no preprocessing
     if not taskParamMap.has_key('preproSpec'):
         return None,taskParamMap
     # already preprocessed
     if self.taskSpec.checkPreProcessed():
         # get replaced task params
         tmpStat,tmpJsonStr = self.taskBufferIF.getPreprocessMetadata_JEDI(self.taskSpec.jediTaskID)
         try:
             # replace placeholders 
             replaceParams = RefinerUtils.decodeJSON(tmpJsonStr)
             self.tmpLog.debug("replace placeholders with "+str(replaceParams))
             for tmpKey,tmpVal in replaceParams.iteritems():
                 self.replacePlaceHolders(taskParamMap,tmpKey,tmpVal)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             self.tmpLog.error('{0} failed to get additional task params with {1}:{2}'.format(self.__class__.__name__,
                                                                                              errtype.__name__,errvalue))
             return False,taskParamMap
         # succeeded
         self.updatedTaskParams = taskParamMap
         return None,taskParamMap
     # make dummy dataset to keep track of preprocessing
     datasetSpec = JediDatasetSpec()
     datasetSpec.datasetName = 'panda.pp.in.{0}.{1}'.format(uuid.uuid4(),self.taskSpec.jediTaskID)
     datasetSpec.jediTaskID = self.taskSpec.jediTaskID
     datasetSpec.type = 'pp_input'
     datasetSpec.vo = self.taskSpec.vo
     datasetSpec.nFiles = 1
     datasetSpec.nFilesUsed = 0
     datasetSpec.nFilesToBeUsed = 1
     datasetSpec.nFilesFinished = 0
     datasetSpec.nFilesFailed = 0
     datasetSpec.nFilesOnHold = 0
     datasetSpec.status = 'ready'
     self.inMasterDatasetSpec.append(datasetSpec)
     # make file 
     fileSpec = JediFileSpec()
     fileSpec.jediTaskID   = datasetSpec.jediTaskID
     fileSpec.type         = datasetSpec.type
     fileSpec.status       = 'ready'            
     fileSpec.lfn          = 'pseudo_lfn'
     fileSpec.attemptNr    = 0
     fileSpec.maxAttempt   = 3
     fileSpec.keepTrack    = 1
     datasetSpec.addFile(fileSpec)
     # make log dataset
     logDatasetSpec = JediDatasetSpec()
     logDatasetSpec.datasetName = 'panda.pp.log.{0}.{1}'.format(uuid.uuid4(),self.taskSpec.jediTaskID)
     logDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
     logDatasetSpec.type = 'tmpl_pp_log'
     logDatasetSpec.streamName = 'PP_LOG'
     logDatasetSpec.vo = self.taskSpec.vo
     logDatasetSpec.nFiles = 0
     logDatasetSpec.nFilesUsed = 0
     logDatasetSpec.nFilesToBeUsed = 0
     logDatasetSpec.nFilesFinished = 0
     logDatasetSpec.nFilesFailed = 0
     logDatasetSpec.nFilesOnHold = 0
     logDatasetSpec.status = 'defined'
     self.outDatasetSpecList.append(logDatasetSpec)
     # make output template for log
     outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                       'serialNr' : 1,
                       'streamName' : logDatasetSpec.streamName,
                       'filenameTemplate' : "{0}._${{SN}}.log.tgz".format(logDatasetSpec.datasetName),
                       'outtype' : re.sub('^tmpl_','',logDatasetSpec.type),
                       }
     self.outputTemplateMap[logDatasetSpec.outputMapKey()] = [outTemplateMap]
     # set split rule to use preprocessing
     self.taskSpec.setPrePro()
     # set task status
     self.taskSpec.status = 'topreprocess'
     # return
     return True,taskParamMap
コード例 #2
0
 def doPreProRefine(self, taskParamMap):
     # no preprocessing
     if not taskParamMap.has_key('preproSpec'):
         return None, taskParamMap
     # already preprocessed
     if self.taskSpec.checkPreProcessed():
         # get replaced task params
         tmpStat, tmpJsonStr = self.taskBufferIF.getPreprocessMetadata_JEDI(
             self.taskSpec.jediTaskID)
         try:
             # replace placeholders
             replaceParams = RefinerUtils.decodeJSON(tmpJsonStr)
             self.tmpLog.debug("replace placeholders with " +
                               str(replaceParams))
             for tmpKey, tmpVal in replaceParams.iteritems():
                 self.replacePlaceHolders(taskParamMap, tmpKey, tmpVal)
         except:
             errtype, errvalue = sys.exc_info()[:2]
             self.tmpLog.error(
                 '{0} failed to get additional task params with {1}:{2}'.
                 format(self.__class__.__name__, errtype.__name__,
                        errvalue))
             return False, taskParamMap
         # succeeded
         self.updatedTaskParams = taskParamMap
         return None, taskParamMap
     # make dummy dataset to keep track of preprocessing
     datasetSpec = JediDatasetSpec()
     datasetSpec.datasetName = 'panda.pp.in.{0}.{1}'.format(
         uuid.uuid4(), self.taskSpec.jediTaskID)
     datasetSpec.jediTaskID = self.taskSpec.jediTaskID
     datasetSpec.type = 'pp_input'
     datasetSpec.vo = self.taskSpec.vo
     datasetSpec.nFiles = 1
     datasetSpec.nFilesUsed = 0
     datasetSpec.nFilesToBeUsed = 1
     datasetSpec.nFilesFinished = 0
     datasetSpec.nFilesFailed = 0
     datasetSpec.nFilesOnHold = 0
     datasetSpec.status = 'ready'
     self.inMasterDatasetSpec.append(datasetSpec)
     # make file
     fileSpec = JediFileSpec()
     fileSpec.jediTaskID = datasetSpec.jediTaskID
     fileSpec.type = datasetSpec.type
     fileSpec.status = 'ready'
     fileSpec.lfn = 'pseudo_lfn'
     fileSpec.attemptNr = 0
     fileSpec.maxAttempt = 3
     fileSpec.keepTrack = 1
     datasetSpec.addFile(fileSpec)
     # make log dataset
     logDatasetSpec = JediDatasetSpec()
     logDatasetSpec.datasetName = 'panda.pp.log.{0}.{1}'.format(
         uuid.uuid4(), self.taskSpec.jediTaskID)
     logDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
     logDatasetSpec.type = 'tmpl_pp_log'
     logDatasetSpec.streamName = 'PP_LOG'
     logDatasetSpec.vo = self.taskSpec.vo
     logDatasetSpec.nFiles = 0
     logDatasetSpec.nFilesUsed = 0
     logDatasetSpec.nFilesToBeUsed = 0
     logDatasetSpec.nFilesFinished = 0
     logDatasetSpec.nFilesFailed = 0
     logDatasetSpec.nFilesOnHold = 0
     logDatasetSpec.status = 'defined'
     self.outDatasetSpecList.append(logDatasetSpec)
     # make output template for log
     outTemplateMap = {
         'jediTaskID':
         self.taskSpec.jediTaskID,
         'serialNr':
         1,
         'streamName':
         logDatasetSpec.streamName,
         'filenameTemplate':
         "{0}._${{SN}}.log.tgz".format(logDatasetSpec.datasetName),
         'outtype':
         re.sub('^tmpl_', '', logDatasetSpec.type),
     }
     self.outputTemplateMap[logDatasetSpec.outputMapKey()] = [
         outTemplateMap
     ]
     # set split rule to use preprocessing
     self.taskSpec.setPrePro()
     # set task status
     self.taskSpec.status = 'topreprocess'
     # return
     return True, taskParamMap
コード例 #3
0
 def doBasicRefine(self,taskParamMap):
     # get input/output/log dataset specs
     nIn  = 0
     nOutMap = {}
     if isinstance(taskParamMap['log'],dict):
         itemList = taskParamMap['jobParameters'] + [taskParamMap['log']]
     else:
         itemList = taskParamMap['jobParameters'] + taskParamMap['log']
     # pseudo input
     if taskParamMap.has_key('noInput') and taskParamMap['noInput'] == True:
         tmpItem = {}
         tmpItem['type']       = 'template'
         tmpItem['value']      = ''
         tmpItem['dataset']    = 'pseudo_dataset'
         tmpItem['param_type'] = 'pseudo_input'
         itemList = [tmpItem] + itemList
     # random seed
     if RefinerUtils.useRandomSeed(taskParamMap):
         tmpItem = {}
         tmpItem['type']       = 'template'
         tmpItem['value']      = ''
         tmpItem['dataset']    = 'RNDMSEED'
         tmpItem['param_type'] = 'random_seed'
         itemList.append(tmpItem)
     # loop over all items
     allDsList = []   
     for tmpItem in itemList:
         # look for datasets
         if tmpItem['type'] == 'template' and tmpItem.has_key('dataset'):
             # avoid duplication
             if not tmpItem['dataset'] in allDsList:
                 allDsList.append(tmpItem['dataset'])
             else:
                 continue
             datasetSpec = JediDatasetSpec()
             datasetSpec.datasetName = tmpItem['dataset']
             datasetSpec.jediTaskID = self.taskSpec.jediTaskID
             datasetSpec.type = tmpItem['param_type']
             if tmpItem.has_key('container'):
                 datasetSpec.containerName = tmpItem['container']
             if tmpItem.has_key('token'):
                 datasetSpec.storageToken = tmpItem['token']
             if tmpItem.has_key('destination'):
                 datasetSpec.destination = tmpItem['destination']
             if tmpItem.has_key('attributes'):
                 datasetSpec.setDatasetAttribute(tmpItem['attributes'])
             if tmpItem.has_key('ratio'):
                 datasetSpec.setDatasetAttribute('ratio={0}'.format(tmpItem['ratio']))
             if tmpItem.has_key('check'):
                 datasetSpec.setDatasetAttribute('cc')
             if tmpItem.has_key('usedup'):
                 datasetSpec.setDatasetAttribute('ud')
             if tmpItem.has_key('random'):
                 datasetSpec.setDatasetAttribute('rd')
             if tmpItem.has_key('reusable'):
                 datasetSpec.setDatasetAttribute('ru')
             if tmpItem.has_key('offset'):
                 datasetSpec.setOffset(tmpItem['offset'])
             if tmpItem.has_key('allowNoOutput'):
                 datasetSpec.allowNoOutput()
             if tmpItem.has_key('nFilesPerJob'):
                 datasetSpec.setNumFilesPerJob(tmpItem['nFilesPerJob'])
             if tmpItem.has_key('num_records'):
                 datasetSpec.setNumRecords(tmpItem['num_records'])
             if 'transient' in tmpItem:
                 datasetSpec.setTransient(tmpItem['transient'])
             datasetSpec.vo = self.taskSpec.vo
             datasetSpec.nFiles = 0
             datasetSpec.nFilesUsed = 0
             datasetSpec.nFilesFinished = 0
             datasetSpec.nFilesFailed = 0
             datasetSpec.nFilesOnHold = 0
             datasetSpec.nEvents = 0
             datasetSpec.nEventsUsed = 0
             datasetSpec.nEventsToBeUsed = 0
             datasetSpec.status = 'defined'
             if datasetSpec.type in JediDatasetSpec.getInputTypes() + ['random_seed']:
                 datasetSpec.streamName = RefinerUtils.extractStreamName(tmpItem['value'])
                 if not tmpItem.has_key('expandedList'):
                     tmpItem['expandedList'] = []
                 # dataset names could be comma-concatenated
                 datasetNameList = datasetSpec.datasetName.split(',')
                 # datasets could be added by incexec
                 incexecDS = 'dsFor{0}'.format(datasetSpec.streamName)
                 # remove /XYZ
                 incexecDS = incexecDS.split('/')[0]
                 if taskParamMap.has_key(incexecDS):
                     for tmpDatasetName in taskParamMap[incexecDS].split(','):
                         if not tmpDatasetName in datasetNameList:
                             datasetNameList.append(tmpDatasetName)
                 # loop over all dataset names
                 inDatasetSpecList = []
                 for datasetName in datasetNameList:
                     # skip empty
                     if datasetName == '':
                         continue
                     # expand
                     if datasetSpec.isPseudo() or datasetSpec.type in ['random_seed'] or datasetName == 'DBR_LATEST':
                         # pseudo input
                         tmpDatasetNameList = [datasetName]
                     elif tmpItem.has_key('expand') and tmpItem['expand'] == True:
                         # expand dataset container
                         tmpDatasetNameList = self.ddmIF.getInterface(self.taskSpec.vo).expandContainer(datasetName)
                     else:
                         # normal dataset name
                         tmpDatasetNameList = self.ddmIF.getInterface(self.taskSpec.vo).listDatasets(datasetName)
                     for elementDatasetName in tmpDatasetNameList:
                         if not elementDatasetName in tmpItem['expandedList']:
                             tmpItem['expandedList'].append(elementDatasetName)
                             inDatasetSpec = copy.copy(datasetSpec)
                             inDatasetSpec.datasetName = elementDatasetName
                             inDatasetSpec.containerName = datasetName
                             inDatasetSpecList.append(inDatasetSpec)
                 # empty input
                 if inDatasetSpecList == [] and self.oldTaskStatus != 'rerefine':
                     errStr = 'doBasicRefine : unknown input dataset "{0}"'.format(datasetSpec.datasetName)
                     self.taskSpec.setErrDiag(errStr)
                     if not datasetSpec.datasetName in self.unknownDatasetList:
                         self.unknownDatasetList.append(datasetSpec.datasetName)
                     raise JediException.UnknownDatasetError,errStr
                 # set master flag
                 for inDatasetSpec in inDatasetSpecList:    
                     if nIn == 0:
                         # master
                         self.inMasterDatasetSpec.append(inDatasetSpec)
                     else:
                         # secondary
                         self.inSecDatasetSpecList.append(inDatasetSpec)
                 nIn += 1    
                 continue
             if datasetSpec.type in ['output','log']:
                 if not nOutMap.has_key(datasetSpec.type):
                     nOutMap[datasetSpec.type] = 0
                 # make stream name
                 datasetSpec.streamName = "{0}{1}".format(datasetSpec.type.upper(),nOutMap[datasetSpec.type])
                 nOutMap[datasetSpec.type] += 1
                 # set attribute for event service
                 if self.taskSpec.useEventService() and taskParamMap.has_key('objectStore') and datasetSpec.type in ['output']:
                     datasetSpec.setObjectStore(taskParamMap['objectStore'])
                 # extract output filename template and change the value field
                 outFileTemplate,tmpItem['value'] = RefinerUtils.extractReplaceOutFileTemplate(tmpItem['value'],
                                                                                               datasetSpec.streamName)
                 # make output template
                 if outFileTemplate != None:
                     if tmpItem.has_key('offset'):
                         offsetVal = 1 + tmpItem['offset']
                     else:
                         offsetVal = 1
                     outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                                       'serialNr' : offsetVal,
                                       'streamName' : datasetSpec.streamName,
                                       'filenameTemplate' : outFileTemplate,
                                       'outtype' : datasetSpec.type,
                                       }
                     if self.outputTemplateMap.has_key(datasetSpec.outputMapKey()):
                         # multiple files are associated to the same output datasets
                         self.outputTemplateMap[datasetSpec.outputMapKey()].append(outTemplateMap)
                         # don't insert the same output dataset
                         continue
                     self.outputTemplateMap[datasetSpec.outputMapKey()] = [outTemplateMap]
                 # append
                 self.outDatasetSpecList.append(datasetSpec)
                 # make unmerged dataset
                 if taskParamMap.has_key('mergeOutput') and taskParamMap['mergeOutput'] == True:
                     umDatasetSpec = JediDatasetSpec()
                     umDatasetSpec.datasetName = 'panda.um.' + datasetSpec.datasetName
                     umDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
                     umDatasetSpec.storageToken = 'TOMERGE'
                     umDatasetSpec.vo = datasetSpec.vo
                     umDatasetSpec.type = "tmpl_trn_" + datasetSpec.type
                     umDatasetSpec.nFiles = 0
                     umDatasetSpec.nFilesUsed = 0
                     umDatasetSpec.nFilesToBeUsed = 0
                     umDatasetSpec.nFilesFinished = 0
                     umDatasetSpec.nFilesFailed = 0
                     umDatasetSpec.nFilesOnHold = 0
                     umDatasetSpec.status = 'defined'
                     umDatasetSpec.streamName = datasetSpec.streamName
                     if datasetSpec.isAllowedNoOutput():
                         umDatasetSpec.allowNoOutput()
                     # make unmerged output template 
                     if outFileTemplate != None:
                         umOutTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                                             'serialNr' : 1,
                                             'streamName' : umDatasetSpec.streamName,
                                             'outtype' : datasetSpec.type,
                                             }
                         # append temporary name
                         if taskParamMap.has_key('umNameAtEnd') and taskParamMap['umNameAtEnd'] == True:
                             # append temporary name at the end
                             umOutTemplateMap['filenameTemplate'] = outFileTemplate + '.panda.um'
                         else:
                             umOutTemplateMap['filenameTemplate'] = 'panda.um.' + outFileTemplate
                         if self.outputTemplateMap.has_key(umDatasetSpec.outputMapKey()):
                             # multiple files are associated to the same output datasets
                             self.outputTemplateMap[umDatasetSpec.outputMapKey()].append(umOutTemplateMap)
                             # don't insert the same output dataset
                             continue
                         self.outputTemplateMap[umDatasetSpec.outputMapKey()] = [umOutTemplateMap]
                     # use log as master for merging
                     if datasetSpec.type == 'log':
                         self.unmergeMasterDatasetSpec[datasetSpec.outputMapKey()] = umDatasetSpec
                     else:
                         # append
                         self.unmergeDatasetSpecMap[datasetSpec.outputMapKey()] = umDatasetSpec
     # set attributes for merging
     if taskParamMap.has_key('mergeOutput') and taskParamMap['mergeOutput'] == True:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['mergeOutput'])
     # make job parameters
     rndmSeedOffset = None
     firstEventOffset = None
     jobParameters = ''
     for tmpItem in taskParamMap['jobParameters']:
         if tmpItem.has_key('value'):
             # hidden parameter
             if tmpItem.has_key('hidden') and tmpItem['hidden'] == True:
                 continue
             # add tags for ES-only parameters
             esOnly = False
             if 'es_only' in tmpItem and tmpItem['es_only'] == True:
                 esOnly = True
             if esOnly:
                 jobParameters += '<PANDA_ES_ONLY>'
             jobParameters += '{0}'.format(tmpItem['value'])
             if esOnly:
                 jobParameters += '</PANDA_ES_ONLY>'
             # padding
             if tmpItem.has_key('padding') and tmpItem['padding'] == False:
                 pass
             else:
                 jobParameters += ' '
             # get offset for random seed and first event
             if tmpItem['type'] == 'template' and tmpItem['param_type'] == 'number':
                 if '${RNDMSEED}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         rndmSeedOffset = tmpItem['offset']
                     else:
                         rndmSeedOffset = 0
                 elif '${FIRSTEVENT}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         firstEventOffset = tmpItem['offset']    
     jobParameters = jobParameters[:-1]
     # append parameters for event service merging if necessary
     esmergeParams = self.getParamsForEventServiceMerging(taskParamMap)
     if esmergeParams != None:
         jobParameters += esmergeParams
     self.setJobParamsTemplate(jobParameters)
     # set random seed offset
     if rndmSeedOffset != None:
         self.setSplitRule(None,rndmSeedOffset,JediTaskSpec.splitRuleToken['randomSeed'])
     if firstEventOffset != None:
         self.setSplitRule(None,firstEventOffset,JediTaskSpec.splitRuleToken['firstEvent'])
     # return
     return
コード例 #4
0
 def doBasicRefine(self, taskParamMap):
     # get input/output/log dataset specs
     nIn = 0
     nOutMap = {}
     if isinstance(taskParamMap['log'], dict):
         itemList = taskParamMap['jobParameters'] + [taskParamMap['log']]
     else:
         itemList = taskParamMap['jobParameters'] + taskParamMap['log']
     # pseudo input
     if taskParamMap.has_key('noInput') and taskParamMap['noInput'] == True:
         tmpItem = {}
         tmpItem['type'] = 'template'
         tmpItem['value'] = ''
         tmpItem['dataset'] = 'pseudo_dataset'
         tmpItem['param_type'] = 'pseudo_input'
         itemList = [tmpItem] + itemList
     # random seed
     if RefinerUtils.useRandomSeed(taskParamMap):
         tmpItem = {}
         tmpItem['type'] = 'template'
         tmpItem['value'] = ''
         tmpItem['dataset'] = 'RNDMSEED'
         tmpItem['param_type'] = 'random_seed'
         itemList.append(tmpItem)
     # loop over all items
     allDsList = []
     for tmpItem in itemList:
         # look for datasets
         if tmpItem['type'] == 'template' and tmpItem.has_key('dataset'):
             # avoid duplication
             if not tmpItem['dataset'] in allDsList:
                 allDsList.append(tmpItem['dataset'])
             else:
                 continue
             datasetSpec = JediDatasetSpec()
             datasetSpec.datasetName = tmpItem['dataset']
             datasetSpec.jediTaskID = self.taskSpec.jediTaskID
             datasetSpec.type = tmpItem['param_type']
             if tmpItem.has_key('container'):
                 datasetSpec.containerName = tmpItem['container']
             if tmpItem.has_key('token'):
                 datasetSpec.storageToken = tmpItem['token']
             if tmpItem.has_key('destination'):
                 datasetSpec.destination = tmpItem['destination']
             if tmpItem.has_key('attributes'):
                 datasetSpec.setDatasetAttribute(tmpItem['attributes'])
             if tmpItem.has_key('ratio'):
                 datasetSpec.setDatasetAttribute('ratio={0}'.format(
                     tmpItem['ratio']))
             if tmpItem.has_key('eventRatio'):
                 datasetSpec.setEventRatio(tmpItem['eventRatio'])
             if tmpItem.has_key('check'):
                 datasetSpec.setDatasetAttribute('cc')
             if tmpItem.has_key('usedup'):
                 datasetSpec.setDatasetAttribute('ud')
             if tmpItem.has_key('random'):
                 datasetSpec.setDatasetAttribute('rd')
             if tmpItem.has_key('reusable'):
                 datasetSpec.setDatasetAttribute('ru')
             if tmpItem.has_key('offset'):
                 datasetSpec.setOffset(tmpItem['offset'])
             if tmpItem.has_key('allowNoOutput'):
                 datasetSpec.allowNoOutput()
             if tmpItem.has_key('nFilesPerJob'):
                 datasetSpec.setNumFilesPerJob(tmpItem['nFilesPerJob'])
             if tmpItem.has_key('num_records'):
                 datasetSpec.setNumRecords(tmpItem['num_records'])
             if 'transient' in tmpItem:
                 datasetSpec.setTransient(tmpItem['transient'])
             datasetSpec.vo = self.taskSpec.vo
             datasetSpec.nFiles = 0
             datasetSpec.nFilesUsed = 0
             datasetSpec.nFilesFinished = 0
             datasetSpec.nFilesFailed = 0
             datasetSpec.nFilesOnHold = 0
             datasetSpec.nEvents = 0
             datasetSpec.nEventsUsed = 0
             datasetSpec.nEventsToBeUsed = 0
             datasetSpec.status = 'defined'
             if datasetSpec.type in JediDatasetSpec.getInputTypes() + [
                     'random_seed'
             ]:
                 datasetSpec.streamName = RefinerUtils.extractStreamName(
                     tmpItem['value'])
                 if not tmpItem.has_key('expandedList'):
                     tmpItem['expandedList'] = []
                 # dataset names could be comma-concatenated
                 datasetNameList = datasetSpec.datasetName.split(',')
                 # datasets could be added by incexec
                 incexecDS = 'dsFor{0}'.format(datasetSpec.streamName)
                 # remove /XYZ
                 incexecDS = incexecDS.split('/')[0]
                 if taskParamMap.has_key(incexecDS):
                     for tmpDatasetName in taskParamMap[incexecDS].split(
                             ','):
                         if not tmpDatasetName in datasetNameList:
                             datasetNameList.append(tmpDatasetName)
                 # loop over all dataset names
                 inDatasetSpecList = []
                 for datasetName in datasetNameList:
                     # skip empty
                     if datasetName == '':
                         continue
                     # expand
                     if datasetSpec.isPseudo() or datasetSpec.type in [
                             'random_seed'
                     ] or datasetName == 'DBR_LATEST':
                         # pseudo input
                         tmpDatasetNameList = [datasetName]
                     elif tmpItem.has_key(
                             'expand') and tmpItem['expand'] == True:
                         # expand dataset container
                         tmpDatasetNameList = self.ddmIF.getInterface(
                             self.taskSpec.vo).expandContainer(datasetName)
                     else:
                         # normal dataset name
                         tmpDatasetNameList = self.ddmIF.getInterface(
                             self.taskSpec.vo).listDatasets(datasetName)
                     for elementDatasetName in tmpDatasetNameList:
                         if nIn > 0 or not elementDatasetName in tmpItem[
                                 'expandedList']:
                             tmpItem['expandedList'].append(
                                 elementDatasetName)
                             inDatasetSpec = copy.copy(datasetSpec)
                             inDatasetSpec.datasetName = elementDatasetName
                             inDatasetSpec.containerName = datasetName
                             inDatasetSpecList.append(inDatasetSpec)
                 # empty input
                 if inDatasetSpecList == [] and self.oldTaskStatus != 'rerefine':
                     errStr = 'doBasicRefine : unknown input dataset "{0}"'.format(
                         datasetSpec.datasetName)
                     self.taskSpec.setErrDiag(errStr)
                     if not datasetSpec.datasetName in self.unknownDatasetList:
                         self.unknownDatasetList.append(
                             datasetSpec.datasetName)
                     raise JediException.UnknownDatasetError, errStr
                 # set master flag
                 for inDatasetSpec in inDatasetSpecList:
                     if nIn == 0:
                         # master
                         self.inMasterDatasetSpec.append(inDatasetSpec)
                     else:
                         # secondary
                         self.inSecDatasetSpecList.append(inDatasetSpec)
                 nIn += 1
                 continue
             if datasetSpec.type in ['output', 'log']:
                 if not nOutMap.has_key(datasetSpec.type):
                     nOutMap[datasetSpec.type] = 0
                 # make stream name
                 datasetSpec.streamName = "{0}{1}".format(
                     datasetSpec.type.upper(), nOutMap[datasetSpec.type])
                 nOutMap[datasetSpec.type] += 1
                 # set attribute for event service
                 if self.taskSpec.useEventService(
                 ) and taskParamMap.has_key(
                         'objectStore') and datasetSpec.type in ['output']:
                     datasetSpec.setObjectStore(taskParamMap['objectStore'])
                 # extract output filename template and change the value field
                 outFileTemplate, tmpItem[
                     'value'] = RefinerUtils.extractReplaceOutFileTemplate(
                         tmpItem['value'], datasetSpec.streamName)
                 # make output template
                 if outFileTemplate != None:
                     if tmpItem.has_key('offset'):
                         offsetVal = 1 + tmpItem['offset']
                     else:
                         offsetVal = 1
                     outTemplateMap = {
                         'jediTaskID': self.taskSpec.jediTaskID,
                         'serialNr': offsetVal,
                         'streamName': datasetSpec.streamName,
                         'filenameTemplate': outFileTemplate,
                         'outtype': datasetSpec.type,
                     }
                     if self.outputTemplateMap.has_key(
                             datasetSpec.outputMapKey()):
                         # multiple files are associated to the same output datasets
                         self.outputTemplateMap[datasetSpec.outputMapKey(
                         )].append(outTemplateMap)
                         # don't insert the same output dataset
                         continue
                     self.outputTemplateMap[datasetSpec.outputMapKey()] = [
                         outTemplateMap
                     ]
                 # append
                 self.outDatasetSpecList.append(datasetSpec)
                 # make unmerged dataset
                 if taskParamMap.has_key('mergeOutput') and taskParamMap[
                         'mergeOutput'] == True:
                     umDatasetSpec = JediDatasetSpec()
                     umDatasetSpec.datasetName = 'panda.um.' + datasetSpec.datasetName
                     umDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
                     umDatasetSpec.storageToken = 'TOMERGE'
                     umDatasetSpec.vo = datasetSpec.vo
                     umDatasetSpec.type = "tmpl_trn_" + datasetSpec.type
                     umDatasetSpec.nFiles = 0
                     umDatasetSpec.nFilesUsed = 0
                     umDatasetSpec.nFilesToBeUsed = 0
                     umDatasetSpec.nFilesFinished = 0
                     umDatasetSpec.nFilesFailed = 0
                     umDatasetSpec.nFilesOnHold = 0
                     umDatasetSpec.status = 'defined'
                     umDatasetSpec.streamName = datasetSpec.streamName
                     if datasetSpec.isAllowedNoOutput():
                         umDatasetSpec.allowNoOutput()
                     # ratio
                     if datasetSpec.getRatioToMaster() > 1:
                         umDatasetSpec.setDatasetAttribute(
                             'ratio={0}'.format(
                                 datasetSpec.getRatioToMaster()))
                     # make unmerged output template
                     if outFileTemplate != None:
                         umOutTemplateMap = {
                             'jediTaskID': self.taskSpec.jediTaskID,
                             'serialNr': 1,
                             'streamName': umDatasetSpec.streamName,
                             'outtype': datasetSpec.type,
                         }
                         # append temporary name
                         if taskParamMap.has_key(
                                 'umNameAtEnd'
                         ) and taskParamMap['umNameAtEnd'] == True:
                             # append temporary name at the end
                             umOutTemplateMap[
                                 'filenameTemplate'] = outFileTemplate + '.panda.um'
                         else:
                             umOutTemplateMap[
                                 'filenameTemplate'] = 'panda.um.' + outFileTemplate
                         if self.outputTemplateMap.has_key(
                                 umDatasetSpec.outputMapKey()):
                             # multiple files are associated to the same output datasets
                             self.outputTemplateMap[
                                 umDatasetSpec.outputMapKey()].append(
                                     umOutTemplateMap)
                             # don't insert the same output dataset
                             continue
                         self.outputTemplateMap[
                             umDatasetSpec.outputMapKey()] = [
                                 umOutTemplateMap
                             ]
                     # use log as master for merging
                     if datasetSpec.type == 'log':
                         self.unmergeMasterDatasetSpec[
                             datasetSpec.outputMapKey()] = umDatasetSpec
                     else:
                         # append
                         self.unmergeDatasetSpecMap[
                             datasetSpec.outputMapKey()] = umDatasetSpec
     # set attributes for merging
     if taskParamMap.has_key(
             'mergeOutput') and taskParamMap['mergeOutput'] == True:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['mergeOutput'])
     # make job parameters
     rndmSeedOffset = None
     firstEventOffset = None
     jobParameters = ''
     for tmpItem in taskParamMap['jobParameters']:
         if tmpItem.has_key('value'):
             # hidden parameter
             if tmpItem.has_key('hidden') and tmpItem['hidden'] == True:
                 continue
             # add tags for ES-only parameters
             esOnly = False
             if 'es_only' in tmpItem and tmpItem['es_only'] == True:
                 esOnly = True
             if esOnly:
                 jobParameters += '<PANDA_ES_ONLY>'
             jobParameters += '{0}'.format(tmpItem['value'])
             if esOnly:
                 jobParameters += '</PANDA_ES_ONLY>'
             # padding
             if tmpItem.has_key('padding') and tmpItem['padding'] == False:
                 pass
             else:
                 jobParameters += ' '
             # get offset for random seed and first event
             if tmpItem['type'] == 'template' and tmpItem[
                     'param_type'] == 'number':
                 if '${RNDMSEED}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         rndmSeedOffset = tmpItem['offset']
                     else:
                         rndmSeedOffset = 0
                 elif '${FIRSTEVENT}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         firstEventOffset = tmpItem['offset']
     jobParameters = jobParameters[:-1]
     # append parameters for event service merging if necessary
     esmergeParams = self.getParamsForEventServiceMerging(taskParamMap)
     if esmergeParams != None:
         jobParameters += esmergeParams
     self.setJobParamsTemplate(jobParameters)
     # set random seed offset
     if rndmSeedOffset != None:
         self.setSplitRule(None, rndmSeedOffset,
                           JediTaskSpec.splitRuleToken['randomSeed'])
     if firstEventOffset != None:
         self.setSplitRule(None, firstEventOffset,
                           JediTaskSpec.splitRuleToken['firstEvent'])
     # return
     return
コード例 #5
0
task.userName = '******'
task.vo = 'atlas'
task.prodSourceLabel = 'managed'
task.taskPriority = 100
task.currentPriority = task.taskPriority
task.architecture = 'i686-slc5-gcc43-opt'
task.transUses = 'Atlas-17.2.7'
task.transHome = 'AtlasProduction-17.2.8.10'
task.transPath = 'Reco_trf.py'
task.workQueue_ID = 3

tbIF.insertTask_JEDI(task)

from pandajedi.jedicore.JediDatasetSpec import JediDatasetSpec

ds = JediDatasetSpec()
ds.jediTaskID = task.jediTaskID
if len(sys.argv) > 2:
    ds.datasetName = sys.argv[2]
else:
    ds.datasetName = 'data12_8TeV.00214651.physics_Egamma.merge.AOD.f489_m1261'
ds.type = 'input'
ds.vo = task.vo
ds.cloud = 'US'
ds.streamName = 'IN'
ds.status = 'defined'
ds.nFiles = 0
ds.nFilesUsed = 0
ds.nFilesFinished = 0
ds.nFilesFailed = 0
コード例 #6
0
ファイル: FileRecovery.py プロジェクト: PanDAWMS/panda-jedi
 def doRefine(self,jediTaskID,taskParamMap):
     try:
         # make logger
         tmpLog = self.tmpLog
         tmpLog.debug('start jediTaskID={0}'.format(jediTaskID))
         # old dataset name
         oldDatasetName = taskParamMap['oldDatasetName']
         # accompany datasets
         if taskParamMap.has_key('oldAccompanyDatasetNames'):
             oldAccDatasetNames = taskParamMap['oldAccompanyDatasetNames']
         else:
             oldAccDatasetNames = None
         # use first file to get task and dataset info
         lostFileName = taskParamMap['lostFiles'][0]
         # get ole jediTaskID and datasetIDs
         tmpStat,oldIDs = self.taskBufferIF.getIDsWithFileDataset_JEDI(oldDatasetName,lostFileName,'output')
         if tmpStat != True or oldIDs == None:
             tmpLog.error('failed to get jediTaskID and DatasetID for {0}:{1}'.format(oldDatasetName,
                                                                                      lostFileName))
             return self.SC_FAILED
         # get task
         oldJediTaskID = oldIDs['jediTaskID']
         oldDatasetID  = oldIDs['datasetID']
         tmpStat,oldTaskSpec = self.taskBufferIF.getTaskWithID_JEDI(oldJediTaskID,True)
         if tmpStat != True:
             tmpLog.error('failed to get TaskSpec for old jediTaskId={0}'.format(oldJediTaskID))
             return self.SC_FAILED
         # make task spec
         taskSpec = JediTaskSpec()
         taskSpec.copyAttributes(oldTaskSpec)
         # reset attributes
         taskSpec.jediTaskID   = jediTaskID
         taskSpec.taskType     = taskParamMap['taskType']
         taskSpec.taskPriority = taskParamMap['taskPriority']
         self.taskSpec = taskSpec
         # get datasets
         tmpStat,datasetSpecList = self.taskBufferIF.getDatasetsWithJediTaskID_JEDI(oldJediTaskID)
         if tmpStat != True:
             tmpLog.error('failed to get datasetSpecs')
             return self.SC_FAILED
         # loop over all datasets
         provenanceID = None
         dummyStreams = []
         outDatasetSpec = None
         datasetNameSpecMap = {} 
         for datasetSpec in datasetSpecList:
             # for output datasets
             if not datasetSpec.type in JediDatasetSpec.getInputTypes():
                 # collect output with the same provenanceID
                 if provenanceID != None and datasetSpec.provenanceID != provenanceID:
                     continue
                 # set provenanceID if undefined
                 if provenanceID == None and datasetSpec.provenanceID != None:
                     provenanceID = datasetSpec.provenanceID
                 # collect dummy streams
                 if datasetSpec.type != 'log' and (datasetSpec.datasetID != oldDatasetID and \
                                                       not self.checkDatasetNameMatching(datasetSpec.datasetName,oldAccDatasetNames)):
                     if not datasetSpec.streamName in dummyStreams:
                         dummyStreams.append(datasetSpec.streamName)
                     continue
             # reset attributes
             datasetSpec.status = 'defined'
             datasetSpec.datasetID  = None
             datasetSpec.jediTaskID = jediTaskID
             datasetSpec.nFiles = 0
             datasetSpec.nFilesUsed = 0
             datasetSpec.nFilesToBeUsed = 0
             datasetSpec.nFilesFinished = 0
             datasetSpec.nFilesFailed   = 0
             datasetSpec.nFilesOnHold   = 0
             # remove nosplit and repeat since even the same file is made for each bounaryID
             datasetSpec.remNoSplit()
             datasetSpec.remRepeat()
             # append to map
             datasetNameSpecMap[datasetSpec.datasetName] = datasetSpec
             # set master and secondary for input
             if datasetSpec.type in JediDatasetSpec.getInputTypes():
                 if datasetSpec.isMaster():
                     # master
                     self.inMasterDatasetSpec = datasetSpec
                 else:
                     # secondary
                     self.inSecDatasetSpecList.append(datasetSpec)
             elif datasetSpec.type == 'log':
                 # set new attributes
                 tmpItem = taskParamMap['log']
                 datasetSpec.datasetName = tmpItem['dataset']
                 if tmpItem.has_key('container'):
                     datasetSpec.containerName = tmpItem['container']
                 if tmpItem.has_key('token'):
                     datasetSpec.storageToken = tmpItem['token']
                 if tmpItem.has_key('destination'):
                     datasetSpec.destination = tmpItem['destination']
                 # extract output filename template and change the value field
                 outFileTemplate,tmpItem['value'] = RefinerUtils.extractReplaceOutFileTemplate(tmpItem['value'],
                                                                                               datasetSpec.streamName)
                 # make output template
                 if outFileTemplate != None:
                     if tmpItem.has_key('offset'):
                         offsetVal = 1 + tmpItem['offset']
                     else:
                         offsetVal = 1
                     outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                                       'serialNr' : offsetVal,
                                       'streamName' : datasetSpec.streamName,
                                       'filenameTemplate' : outFileTemplate,
                                       'outtype' : datasetSpec.type,
                                       }
                     self.outputTemplateMap[datasetSpec.outputMapKey()] = [outTemplateMap]
                 # append
                 self.outDatasetSpecList.append(datasetSpec)
             else:
                 # output dataset to make copies later
                 outDatasetSpec = datasetSpec
         # replace redundant output streams with dummy files
         for dummyStream in dummyStreams:
             self.taskSpec.jobParamsTemplate = self.taskSpec.jobParamsTemplate.replace('${'+dummyStream+'}',
                                                                                       dummyStream.lower()+'.tmp')
         self.setJobParamsTemplate(self.taskSpec.jobParamsTemplate)    
         # loop over all lost files
         datasetIDSpecMap = {}
         for lostFileName in taskParamMap['lostFiles']:
             # get FileID
             tmpStat,tmpIDs = self.taskBufferIF.getIDsWithFileDataset_JEDI(oldDatasetName,lostFileName,'output')
             if tmpStat != True or tmpIDs == None:
                 tmpLog.error('failed to get FileID for {0}:{1}'.format(oldDatasetName,
                                                                        lostFileName))
                 return self.SC_FAILED
             # get PandaID
             tmpStat,pandaID = self.taskBufferIF.getPandaIDWithFileID_JEDI(tmpIDs['jediTaskID'],
                                                                           tmpIDs['datasetID'],
                                                                           tmpIDs['fileID'])
             if tmpStat != True or pandaID == None:
                 tmpLog.error('failed to get PandaID for {0}'.format(str(tmpIDs)))
                 return self.SC_FAILED
             # get files
             tmpStat,fileSpecList = self.taskBufferIF.getFilesWithPandaID_JEDI(pandaID)
             if tmpStat != True or fileSpecList == []:
                 tmpLog.error('failed to get files for PandaID={0}'.format(pandaID))
                 return self.SC_FAILED
             # append
             for fileSpec in fileSpecList:
                 # only input types
                 if not fileSpec.type in JediDatasetSpec.getInputTypes():
                     continue
                 # get original datasetSpec
                 if not datasetIDSpecMap.has_key(fileSpec.datasetID):
                     tmpStat,tmpDatasetSpec = self.taskBufferIF.getDatasetWithID_JEDI(fileSpec.jediTaskID,fileSpec.datasetID)
                     if tmpStat != True or tmpDatasetSpec == None:
                         tmpLog.error('failed to get dataset for jediTaskID={0} datasetID={1}'.format(fileSpec.jediTaskID,
                                                                                                      fileSpec.datasetID))
                         return self.SC_FAILED
                     datasetIDSpecMap[fileSpec.datasetID] = tmpDatasetSpec
                 origDatasetSpec = datasetIDSpecMap[fileSpec.datasetID]
                 if not datasetNameSpecMap.has_key(origDatasetSpec.datasetName):
                     tmpLog.error('datasetName={0} is missing in new datasets'.format(origDatasetSpec.datasetName))
                     return self.SC_FAILED
                 # not target or accompany datasets
                 if origDatasetSpec.datasetID != oldDatasetID and \
                         not self.checkDatasetNameMatching(origDatasetSpec.datasetName,oldAccDatasetNames):
                     continue
                 newDatasetSpec = datasetNameSpecMap[origDatasetSpec.datasetName]
                 # set new attributes
                 fileSpec.fileID = None
                 fileSpec.datasetID = None
                 fileSpec.jediTaskID = None
                 fileSpec.boundaryID = pandaID
                 fileSpec.keepTrack = 1
                 fileSpec.attemptNr = 1
                 fileSpec.status = 'ready'
                 # append
                 newDatasetSpec.addFile(fileSpec)
             # make one output dataset per file
             datasetSpec = copy.copy(outDatasetSpec)
             # set new attributes
             tmpItem = taskParamMap['output']
             datasetSpec.datasetName = tmpItem['dataset']
             if tmpItem.has_key('container'):
                 datasetSpec.containerName = tmpItem['container']
             if tmpItem.has_key('token'):
                 datasetSpec.storageToken = tmpItem['token']
             if tmpItem.has_key('destination'):
                 datasetSpec.destination = tmpItem['destination']
             # use PandaID of original job as provenanceID
             datasetSpec.provenanceID = pandaID
             # append                                                                                          
             self.outDatasetSpecList.append(datasetSpec)
             # extract attempt number from original filename
             tmpMatch = re.search('\.(\d+)$',lostFileName)
             if tmpMatch == None:
                 offsetVal = 1
             else:
                 offsetVal = 1 + int(tmpMatch.group(1))
             # filename without attempt number
             baseFileName = re.sub('\.(\d+)$','',lostFileName)    
             # make output template
             outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                               'serialNr' : offsetVal,
                               'streamName' : datasetSpec.streamName,
                               'filenameTemplate' : baseFileName + '.${SN:d}',
                               'outtype' : datasetSpec.type,
                               }
             self.outputTemplateMap[datasetSpec.outputMapKey()] = [outTemplateMap]
         # append datasets to task parameters    
         for datasetSpec in datasetNameSpecMap.values():
             if datasetSpec.Files == []:
                 continue
             fileList = []
             for fileSpec in datasetSpec.Files:
                 fileList.append({'lfn':fileSpec.lfn,
                                  'firstEvent':fileSpec.firstEvent,
                                  'startEvent':fileSpec.startEvent,
                                  'endEvent':fileSpec.endEvent,
                                  'keepTrack':fileSpec.keepTrack,
                                  'boundaryID':fileSpec.boundaryID,
                                  })
             taskParamMap = RefinerUtils.appendDataset(taskParamMap,datasetSpec,fileList)
             self.updatedTaskParams = taskParamMap
         # grouping with boundaryID
         self.setSplitRule(None,4,JediTaskSpec.splitRuleToken['groupBoundaryID'])
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doRefine failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FAILED
     tmpLog.debug('done')
     return self.SC_SUCCEEDED
コード例 #7
0
ファイル: FileRecovery.py プロジェクト: PanDAWMS/panda-jedi
 def doRefine(self, jediTaskID, taskParamMap):
     try:
         # make logger
         tmpLog = self.tmpLog
         tmpLog.debug('start jediTaskID={0}'.format(jediTaskID))
         # old dataset name
         oldDatasetName = taskParamMap['oldDatasetName']
         # accompany datasets
         if 'oldAccompanyDatasetNames' in taskParamMap:
             oldAccDatasetNames = taskParamMap['oldAccompanyDatasetNames']
         else:
             oldAccDatasetNames = None
         # use first file to get task and dataset info
         lostFileName = taskParamMap['lostFiles'][0]
         # get ole jediTaskID and datasetIDs
         tmpStat, oldIDs = self.taskBufferIF.getIDsWithFileDataset_JEDI(
             oldDatasetName, lostFileName, 'output')
         if tmpStat is not True or oldIDs is None:
             tmpLog.error(
                 'failed to get jediTaskID and DatasetID for {0}:{1}'.
                 format(oldDatasetName, lostFileName))
             return self.SC_FAILED
         # get task
         oldJediTaskID = oldIDs['jediTaskID']
         oldDatasetID = oldIDs['datasetID']
         tmpStat, oldTaskSpec = self.taskBufferIF.getTaskWithID_JEDI(
             oldJediTaskID, True)
         if tmpStat is not True:
             tmpLog.error(
                 'failed to get TaskSpec for old jediTaskId={0}'.format(
                     oldJediTaskID))
             return self.SC_FAILED
         # make task spec
         taskSpec = JediTaskSpec()
         taskSpec.copyAttributes(oldTaskSpec)
         # reset attributes
         taskSpec.jediTaskID = jediTaskID
         taskSpec.taskType = taskParamMap['taskType']
         taskSpec.taskPriority = taskParamMap['taskPriority']
         self.taskSpec = taskSpec
         # get datasets
         tmpStat, datasetSpecList = self.taskBufferIF.getDatasetsWithJediTaskID_JEDI(
             oldJediTaskID)
         if tmpStat is not True:
             tmpLog.error('failed to get datasetSpecs')
             return self.SC_FAILED
         # loop over all datasets
         provenanceID = None
         dummyStreams = []
         outDatasetSpec = None
         datasetNameSpecMap = {}
         for datasetSpec in datasetSpecList:
             # for output datasets
             if datasetSpec.type not in JediDatasetSpec.getInputTypes():
                 # collect output with the same provenanceID
                 if provenanceID is not None and datasetSpec.provenanceID != provenanceID:
                     continue
                 # set provenanceID if undefined
                 if provenanceID is None and datasetSpec.provenanceID is not None:
                     provenanceID = datasetSpec.provenanceID
                 # collect dummy streams
                 if datasetSpec.type != 'log' and (datasetSpec.datasetID != oldDatasetID and \
                                                       not self.checkDatasetNameMatching(datasetSpec.datasetName,oldAccDatasetNames)):
                     if datasetSpec.streamName not in dummyStreams:
                         dummyStreams.append(datasetSpec.streamName)
                     continue
             # reset attributes
             datasetSpec.status = 'defined'
             datasetSpec.datasetID = None
             datasetSpec.jediTaskID = jediTaskID
             datasetSpec.nFiles = 0
             datasetSpec.nFilesUsed = 0
             datasetSpec.nFilesToBeUsed = 0
             datasetSpec.nFilesFinished = 0
             datasetSpec.nFilesFailed = 0
             datasetSpec.nFilesOnHold = 0
             # remove nosplit and repeat since even the same file is made for each bounaryID
             datasetSpec.remNoSplit()
             datasetSpec.remRepeat()
             # append to map
             datasetNameSpecMap[datasetSpec.datasetName] = datasetSpec
             # set master and secondary for input
             if datasetSpec.type in JediDatasetSpec.getInputTypes():
                 if datasetSpec.isMaster():
                     # master
                     self.inMasterDatasetSpec = datasetSpec
                 else:
                     # secondary
                     self.inSecDatasetSpecList.append(datasetSpec)
             elif datasetSpec.type == 'log':
                 # set new attributes
                 tmpItem = taskParamMap['log']
                 datasetSpec.datasetName = tmpItem['dataset']
                 if 'container' in tmpItem:
                     datasetSpec.containerName = tmpItem['container']
                 if 'token' in tmpItem:
                     datasetSpec.storageToken = tmpItem['token']
                 if 'destination' in tmpItem:
                     datasetSpec.destination = tmpItem['destination']
                 # extract output filename template and change the value field
                 outFileTemplate, tmpItem[
                     'value'] = RefinerUtils.extractReplaceOutFileTemplate(
                         tmpItem['value'], datasetSpec.streamName)
                 # make output template
                 if outFileTemplate is not None:
                     if 'offset' in tmpItem:
                         offsetVal = 1 + tmpItem['offset']
                     else:
                         offsetVal = 1
                     outTemplateMap = {
                         'jediTaskID': self.taskSpec.jediTaskID,
                         'serialNr': offsetVal,
                         'streamName': datasetSpec.streamName,
                         'filenameTemplate': outFileTemplate,
                         'outtype': datasetSpec.type,
                     }
                     self.outputTemplateMap[datasetSpec.outputMapKey()] = [
                         outTemplateMap
                     ]
                 # append
                 self.outDatasetSpecList.append(datasetSpec)
             else:
                 # output dataset to make copies later
                 outDatasetSpec = datasetSpec
         # replace redundant output streams with dummy files
         for dummyStream in dummyStreams:
             self.taskSpec.jobParamsTemplate = self.taskSpec.jobParamsTemplate.replace(
                 '${' + dummyStream + '}',
                 dummyStream.lower() + '.tmp')
         self.setJobParamsTemplate(self.taskSpec.jobParamsTemplate)
         # loop over all lost files
         datasetIDSpecMap = {}
         for lostFileName in taskParamMap['lostFiles']:
             # get FileID
             tmpStat, tmpIDs = self.taskBufferIF.getIDsWithFileDataset_JEDI(
                 oldDatasetName, lostFileName, 'output')
             if tmpStat is not True or tmpIDs is None:
                 tmpLog.error('failed to get FileID for {0}:{1}'.format(
                     oldDatasetName, lostFileName))
                 return self.SC_FAILED
             # get PandaID
             tmpStat, pandaID = self.taskBufferIF.getPandaIDWithFileID_JEDI(
                 tmpIDs['jediTaskID'], tmpIDs['datasetID'],
                 tmpIDs['fileID'])
             if tmpStat is not True or pandaID is None:
                 tmpLog.error('failed to get PandaID for {0}'.format(
                     str(tmpIDs)))
                 return self.SC_FAILED
             # get files
             tmpStat, fileSpecList = self.taskBufferIF.getFilesWithPandaID_JEDI(
                 pandaID)
             if tmpStat is not True or fileSpecList == []:
                 tmpLog.error(
                     'failed to get files for PandaID={0}'.format(pandaID))
                 return self.SC_FAILED
             # append
             for fileSpec in fileSpecList:
                 # only input types
                 if fileSpec.type not in JediDatasetSpec.getInputTypes():
                     continue
                 # get original datasetSpec
                 if fileSpec.datasetID not in datasetIDSpecMap:
                     tmpStat, tmpDatasetSpec = self.taskBufferIF.getDatasetWithID_JEDI(
                         fileSpec.jediTaskID, fileSpec.datasetID)
                     if tmpStat is not True or tmpDatasetSpec is None:
                         tmpLog.error(
                             'failed to get dataset for jediTaskID={0} datasetID={1}'
                             .format(fileSpec.jediTaskID,
                                     fileSpec.datasetID))
                         return self.SC_FAILED
                     datasetIDSpecMap[fileSpec.datasetID] = tmpDatasetSpec
                 origDatasetSpec = datasetIDSpecMap[fileSpec.datasetID]
                 if origDatasetSpec.datasetName not in datasetNameSpecMap:
                     tmpLog.error(
                         'datasetName={0} is missing in new datasets'.
                         format(origDatasetSpec.datasetName))
                     return self.SC_FAILED
                 # not target or accompany datasets
                 if origDatasetSpec.datasetID != oldDatasetID and \
                         not self.checkDatasetNameMatching(origDatasetSpec.datasetName,oldAccDatasetNames):
                     continue
                 newDatasetSpec = datasetNameSpecMap[
                     origDatasetSpec.datasetName]
                 # set new attributes
                 fileSpec.fileID = None
                 fileSpec.datasetID = None
                 fileSpec.jediTaskID = None
                 fileSpec.boundaryID = pandaID
                 fileSpec.keepTrack = 1
                 fileSpec.attemptNr = 1
                 fileSpec.status = 'ready'
                 # append
                 newDatasetSpec.addFile(fileSpec)
             # make one output dataset per file
             datasetSpec = copy.copy(outDatasetSpec)
             # set new attributes
             tmpItem = taskParamMap['output']
             datasetSpec.datasetName = tmpItem['dataset']
             if 'container' in tmpItem:
                 datasetSpec.containerName = tmpItem['container']
             if 'token' in tmpItem:
                 datasetSpec.storageToken = tmpItem['token']
             if 'destination' in tmpItem:
                 datasetSpec.destination = tmpItem['destination']
             # use PandaID of original job as provenanceID
             datasetSpec.provenanceID = pandaID
             # append
             self.outDatasetSpecList.append(datasetSpec)
             # extract attempt number from original filename
             tmpMatch = re.search('\.(\d+)$', lostFileName)
             if tmpMatch is None:
                 offsetVal = 1
             else:
                 offsetVal = 1 + int(tmpMatch.group(1))
             # filename without attempt number
             baseFileName = re.sub('\.(\d+)$', '', lostFileName)
             # make output template
             outTemplateMap = {
                 'jediTaskID': self.taskSpec.jediTaskID,
                 'serialNr': offsetVal,
                 'streamName': datasetSpec.streamName,
                 'filenameTemplate': baseFileName + '.${SN:d}',
                 'outtype': datasetSpec.type,
             }
             self.outputTemplateMap[datasetSpec.outputMapKey()] = [
                 outTemplateMap
             ]
         # append datasets to task parameters
         for datasetSpec in datasetNameSpecMap.values():
             if datasetSpec.Files == []:
                 continue
             fileList = []
             for fileSpec in datasetSpec.Files:
                 fileList.append({
                     'lfn': fileSpec.lfn,
                     'firstEvent': fileSpec.firstEvent,
                     'startEvent': fileSpec.startEvent,
                     'endEvent': fileSpec.endEvent,
                     'keepTrack': fileSpec.keepTrack,
                     'boundaryID': fileSpec.boundaryID,
                 })
             taskParamMap = RefinerUtils.appendDataset(
                 taskParamMap, datasetSpec, fileList)
             self.updatedTaskParams = taskParamMap
         # grouping with boundaryID
         self.setSplitRule(None, 4,
                           JediTaskSpec.splitRuleToken['groupBoundaryID'])
     except Exception:
         errtype, errvalue = sys.exc_info()[:2]
         tmpLog.error('doRefine failed with {0}:{1}'.format(
             errtype.__name__, errvalue))
         return self.SC_FAILED
     tmpLog.debug('done')
     return self.SC_SUCCEEDED
コード例 #8
0
ファイル: addTestTask.py プロジェクト: PanDAWMS/panda-jedi
task.status = 'defined'
task.userName = '******'
task.vo = 'atlas'
task.prodSourceLabel = 'managed'
task.taskPriority = 100
task.currentPriority = task.taskPriority
task.architecture = 'i686-slc5-gcc43-opt'
task.transUses = 'Atlas-17.2.7'
task.transHome = 'AtlasProduction-17.2.8.10'
task.transPath = 'Reco_trf.py'
task.workQueue_ID = 3

tbIF.insertTask_JEDI(task) 

from pandajedi.jedicore.JediDatasetSpec import JediDatasetSpec
ds = JediDatasetSpec()
ds.jediTaskID = task.jediTaskID
if len(sys.argv) > 2:
    ds.datasetName = sys.argv[2]
else:
    ds.datasetName = 'data12_8TeV.00214651.physics_Egamma.merge.AOD.f489_m1261'    
ds.type = 'input'
ds.vo = task.vo
ds.cloud = 'US'
ds.streamName = 'IN'
ds.status = 'defined'
ds.nFiles = 0
ds.nFilesUsed = 0
ds.nFilesFinished = 0
ds.nFilesFailed = 0