def doPreProRefine(self,taskParamMap):
     # no preprocessing
     if not taskParamMap.has_key('preproSpec'):
         return None,taskParamMap
     # already preprocessed
     if self.taskSpec.checkPreProcessed():
         # get replaced task params
         tmpStat,tmpJsonStr = self.taskBufferIF.getPreprocessMetadata_JEDI(self.taskSpec.jediTaskID)
         try:
             # replace placeholders 
             replaceParams = RefinerUtils.decodeJSON(tmpJsonStr)
             self.tmpLog.debug("replace placeholders with "+str(replaceParams))
             for tmpKey,tmpVal in replaceParams.iteritems():
                 self.replacePlaceHolders(taskParamMap,tmpKey,tmpVal)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             self.tmpLog.error('{0} failed to get additional task params with {1}:{2}'.format(self.__class__.__name__,
                                                                                              errtype.__name__,errvalue))
             return False,taskParamMap
         # succeeded
         self.updatedTaskParams = taskParamMap
         return None,taskParamMap
     # make dummy dataset to keep track of preprocessing
     datasetSpec = JediDatasetSpec()
     datasetSpec.datasetName = 'panda.pp.in.{0}.{1}'.format(uuid.uuid4(),self.taskSpec.jediTaskID)
     datasetSpec.jediTaskID = self.taskSpec.jediTaskID
     datasetSpec.type = 'pp_input'
     datasetSpec.vo = self.taskSpec.vo
     datasetSpec.nFiles = 1
     datasetSpec.nFilesUsed = 0
     datasetSpec.nFilesToBeUsed = 1
     datasetSpec.nFilesFinished = 0
     datasetSpec.nFilesFailed = 0
     datasetSpec.nFilesOnHold = 0
     datasetSpec.status = 'ready'
     self.inMasterDatasetSpec.append(datasetSpec)
     # make file 
     fileSpec = JediFileSpec()
     fileSpec.jediTaskID   = datasetSpec.jediTaskID
     fileSpec.type         = datasetSpec.type
     fileSpec.status       = 'ready'            
     fileSpec.lfn          = 'pseudo_lfn'
     fileSpec.attemptNr    = 0
     fileSpec.maxAttempt   = 3
     fileSpec.keepTrack    = 1
     datasetSpec.addFile(fileSpec)
     # make log dataset
     logDatasetSpec = JediDatasetSpec()
     logDatasetSpec.datasetName = 'panda.pp.log.{0}.{1}'.format(uuid.uuid4(),self.taskSpec.jediTaskID)
     logDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
     logDatasetSpec.type = 'tmpl_pp_log'
     logDatasetSpec.streamName = 'PP_LOG'
     logDatasetSpec.vo = self.taskSpec.vo
     logDatasetSpec.nFiles = 0
     logDatasetSpec.nFilesUsed = 0
     logDatasetSpec.nFilesToBeUsed = 0
     logDatasetSpec.nFilesFinished = 0
     logDatasetSpec.nFilesFailed = 0
     logDatasetSpec.nFilesOnHold = 0
     logDatasetSpec.status = 'defined'
     self.outDatasetSpecList.append(logDatasetSpec)
     # make output template for log
     outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                       'serialNr' : 1,
                       'streamName' : logDatasetSpec.streamName,
                       'filenameTemplate' : "{0}._${{SN}}.log.tgz".format(logDatasetSpec.datasetName),
                       'outtype' : re.sub('^tmpl_','',logDatasetSpec.type),
                       }
     self.outputTemplateMap[logDatasetSpec.outputMapKey()] = [outTemplateMap]
     # set split rule to use preprocessing
     self.taskSpec.setPrePro()
     # set task status
     self.taskSpec.status = 'topreprocess'
     # return
     return True,taskParamMap
 def doBasicRefine(self,taskParamMap):
     # get input/output/log dataset specs
     nIn  = 0
     nOutMap = {}
     if isinstance(taskParamMap['log'],dict):
         itemList = taskParamMap['jobParameters'] + [taskParamMap['log']]
     else:
         itemList = taskParamMap['jobParameters'] + taskParamMap['log']
     # pseudo input
     if taskParamMap.has_key('noInput') and taskParamMap['noInput'] == True:
         tmpItem = {}
         tmpItem['type']       = 'template'
         tmpItem['value']      = ''
         tmpItem['dataset']    = 'pseudo_dataset'
         tmpItem['param_type'] = 'pseudo_input'
         itemList = [tmpItem] + itemList
     # random seed
     if RefinerUtils.useRandomSeed(taskParamMap):
         tmpItem = {}
         tmpItem['type']       = 'template'
         tmpItem['value']      = ''
         tmpItem['dataset']    = 'RNDMSEED'
         tmpItem['param_type'] = 'random_seed'
         itemList.append(tmpItem)
     # loop over all items
     allDsList = []   
     for tmpItem in itemList:
         # look for datasets
         if tmpItem['type'] == 'template' and tmpItem.has_key('dataset'):
             # avoid duplication
             if not tmpItem['dataset'] in allDsList:
                 allDsList.append(tmpItem['dataset'])
             else:
                 continue
             datasetSpec = JediDatasetSpec()
             datasetSpec.datasetName = tmpItem['dataset']
             datasetSpec.jediTaskID = self.taskSpec.jediTaskID
             datasetSpec.type = tmpItem['param_type']
             if tmpItem.has_key('container'):
                 datasetSpec.containerName = tmpItem['container']
             if tmpItem.has_key('token'):
                 datasetSpec.storageToken = tmpItem['token']
             if tmpItem.has_key('destination'):
                 datasetSpec.destination = tmpItem['destination']
             if tmpItem.has_key('attributes'):
                 datasetSpec.setDatasetAttribute(tmpItem['attributes'])
             if tmpItem.has_key('ratio'):
                 datasetSpec.setDatasetAttribute('ratio={0}'.format(tmpItem['ratio']))
             if tmpItem.has_key('check'):
                 datasetSpec.setDatasetAttribute('cc')
             if tmpItem.has_key('usedup'):
                 datasetSpec.setDatasetAttribute('ud')
             if tmpItem.has_key('random'):
                 datasetSpec.setDatasetAttribute('rd')
             if tmpItem.has_key('reusable'):
                 datasetSpec.setDatasetAttribute('ru')
             if tmpItem.has_key('offset'):
                 datasetSpec.setOffset(tmpItem['offset'])
             if tmpItem.has_key('allowNoOutput'):
                 datasetSpec.allowNoOutput()
             if tmpItem.has_key('nFilesPerJob'):
                 datasetSpec.setNumFilesPerJob(tmpItem['nFilesPerJob'])
             if tmpItem.has_key('num_records'):
                 datasetSpec.setNumRecords(tmpItem['num_records'])
             if 'transient' in tmpItem:
                 datasetSpec.setTransient(tmpItem['transient'])
             datasetSpec.vo = self.taskSpec.vo
             datasetSpec.nFiles = 0
             datasetSpec.nFilesUsed = 0
             datasetSpec.nFilesFinished = 0
             datasetSpec.nFilesFailed = 0
             datasetSpec.nFilesOnHold = 0
             datasetSpec.nEvents = 0
             datasetSpec.nEventsUsed = 0
             datasetSpec.nEventsToBeUsed = 0
             datasetSpec.status = 'defined'
             if datasetSpec.type in JediDatasetSpec.getInputTypes() + ['random_seed']:
                 datasetSpec.streamName = RefinerUtils.extractStreamName(tmpItem['value'])
                 if not tmpItem.has_key('expandedList'):
                     tmpItem['expandedList'] = []
                 # dataset names could be comma-concatenated
                 datasetNameList = datasetSpec.datasetName.split(',')
                 # datasets could be added by incexec
                 incexecDS = 'dsFor{0}'.format(datasetSpec.streamName)
                 # remove /XYZ
                 incexecDS = incexecDS.split('/')[0]
                 if taskParamMap.has_key(incexecDS):
                     for tmpDatasetName in taskParamMap[incexecDS].split(','):
                         if not tmpDatasetName in datasetNameList:
                             datasetNameList.append(tmpDatasetName)
                 # loop over all dataset names
                 inDatasetSpecList = []
                 for datasetName in datasetNameList:
                     # skip empty
                     if datasetName == '':
                         continue
                     # expand
                     if datasetSpec.isPseudo() or datasetSpec.type in ['random_seed'] or datasetName == 'DBR_LATEST':
                         # pseudo input
                         tmpDatasetNameList = [datasetName]
                     elif tmpItem.has_key('expand') and tmpItem['expand'] == True:
                         # expand dataset container
                         tmpDatasetNameList = self.ddmIF.getInterface(self.taskSpec.vo).expandContainer(datasetName)
                     else:
                         # normal dataset name
                         tmpDatasetNameList = self.ddmIF.getInterface(self.taskSpec.vo).listDatasets(datasetName)
                     for elementDatasetName in tmpDatasetNameList:
                         if not elementDatasetName in tmpItem['expandedList']:
                             tmpItem['expandedList'].append(elementDatasetName)
                             inDatasetSpec = copy.copy(datasetSpec)
                             inDatasetSpec.datasetName = elementDatasetName
                             inDatasetSpec.containerName = datasetName
                             inDatasetSpecList.append(inDatasetSpec)
                 # empty input
                 if inDatasetSpecList == [] and self.oldTaskStatus != 'rerefine':
                     errStr = 'doBasicRefine : unknown input dataset "{0}"'.format(datasetSpec.datasetName)
                     self.taskSpec.setErrDiag(errStr)
                     if not datasetSpec.datasetName in self.unknownDatasetList:
                         self.unknownDatasetList.append(datasetSpec.datasetName)
                     raise JediException.UnknownDatasetError,errStr
                 # set master flag
                 for inDatasetSpec in inDatasetSpecList:    
                     if nIn == 0:
                         # master
                         self.inMasterDatasetSpec.append(inDatasetSpec)
                     else:
                         # secondary
                         self.inSecDatasetSpecList.append(inDatasetSpec)
                 nIn += 1    
                 continue
             if datasetSpec.type in ['output','log']:
                 if not nOutMap.has_key(datasetSpec.type):
                     nOutMap[datasetSpec.type] = 0
                 # make stream name
                 datasetSpec.streamName = "{0}{1}".format(datasetSpec.type.upper(),nOutMap[datasetSpec.type])
                 nOutMap[datasetSpec.type] += 1
                 # set attribute for event service
                 if self.taskSpec.useEventService() and taskParamMap.has_key('objectStore') and datasetSpec.type in ['output']:
                     datasetSpec.setObjectStore(taskParamMap['objectStore'])
                 # extract output filename template and change the value field
                 outFileTemplate,tmpItem['value'] = RefinerUtils.extractReplaceOutFileTemplate(tmpItem['value'],
                                                                                               datasetSpec.streamName)
                 # make output template
                 if outFileTemplate != None:
                     if tmpItem.has_key('offset'):
                         offsetVal = 1 + tmpItem['offset']
                     else:
                         offsetVal = 1
                     outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                                       'serialNr' : offsetVal,
                                       'streamName' : datasetSpec.streamName,
                                       'filenameTemplate' : outFileTemplate,
                                       'outtype' : datasetSpec.type,
                                       }
                     if self.outputTemplateMap.has_key(datasetSpec.outputMapKey()):
                         # multiple files are associated to the same output datasets
                         self.outputTemplateMap[datasetSpec.outputMapKey()].append(outTemplateMap)
                         # don't insert the same output dataset
                         continue
                     self.outputTemplateMap[datasetSpec.outputMapKey()] = [outTemplateMap]
                 # append
                 self.outDatasetSpecList.append(datasetSpec)
                 # make unmerged dataset
                 if taskParamMap.has_key('mergeOutput') and taskParamMap['mergeOutput'] == True:
                     umDatasetSpec = JediDatasetSpec()
                     umDatasetSpec.datasetName = 'panda.um.' + datasetSpec.datasetName
                     umDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
                     umDatasetSpec.storageToken = 'TOMERGE'
                     umDatasetSpec.vo = datasetSpec.vo
                     umDatasetSpec.type = "tmpl_trn_" + datasetSpec.type
                     umDatasetSpec.nFiles = 0
                     umDatasetSpec.nFilesUsed = 0
                     umDatasetSpec.nFilesToBeUsed = 0
                     umDatasetSpec.nFilesFinished = 0
                     umDatasetSpec.nFilesFailed = 0
                     umDatasetSpec.nFilesOnHold = 0
                     umDatasetSpec.status = 'defined'
                     umDatasetSpec.streamName = datasetSpec.streamName
                     if datasetSpec.isAllowedNoOutput():
                         umDatasetSpec.allowNoOutput()
                     # make unmerged output template 
                     if outFileTemplate != None:
                         umOutTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID,
                                             'serialNr' : 1,
                                             'streamName' : umDatasetSpec.streamName,
                                             'outtype' : datasetSpec.type,
                                             }
                         # append temporary name
                         if taskParamMap.has_key('umNameAtEnd') and taskParamMap['umNameAtEnd'] == True:
                             # append temporary name at the end
                             umOutTemplateMap['filenameTemplate'] = outFileTemplate + '.panda.um'
                         else:
                             umOutTemplateMap['filenameTemplate'] = 'panda.um.' + outFileTemplate
                         if self.outputTemplateMap.has_key(umDatasetSpec.outputMapKey()):
                             # multiple files are associated to the same output datasets
                             self.outputTemplateMap[umDatasetSpec.outputMapKey()].append(umOutTemplateMap)
                             # don't insert the same output dataset
                             continue
                         self.outputTemplateMap[umDatasetSpec.outputMapKey()] = [umOutTemplateMap]
                     # use log as master for merging
                     if datasetSpec.type == 'log':
                         self.unmergeMasterDatasetSpec[datasetSpec.outputMapKey()] = umDatasetSpec
                     else:
                         # append
                         self.unmergeDatasetSpecMap[datasetSpec.outputMapKey()] = umDatasetSpec
     # set attributes for merging
     if taskParamMap.has_key('mergeOutput') and taskParamMap['mergeOutput'] == True:
         self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['mergeOutput'])
     # make job parameters
     rndmSeedOffset = None
     firstEventOffset = None
     jobParameters = ''
     for tmpItem in taskParamMap['jobParameters']:
         if tmpItem.has_key('value'):
             # hidden parameter
             if tmpItem.has_key('hidden') and tmpItem['hidden'] == True:
                 continue
             # add tags for ES-only parameters
             esOnly = False
             if 'es_only' in tmpItem and tmpItem['es_only'] == True:
                 esOnly = True
             if esOnly:
                 jobParameters += '<PANDA_ES_ONLY>'
             jobParameters += '{0}'.format(tmpItem['value'])
             if esOnly:
                 jobParameters += '</PANDA_ES_ONLY>'
             # padding
             if tmpItem.has_key('padding') and tmpItem['padding'] == False:
                 pass
             else:
                 jobParameters += ' '
             # get offset for random seed and first event
             if tmpItem['type'] == 'template' and tmpItem['param_type'] == 'number':
                 if '${RNDMSEED}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         rndmSeedOffset = tmpItem['offset']
                     else:
                         rndmSeedOffset = 0
                 elif '${FIRSTEVENT}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         firstEventOffset = tmpItem['offset']    
     jobParameters = jobParameters[:-1]
     # append parameters for event service merging if necessary
     esmergeParams = self.getParamsForEventServiceMerging(taskParamMap)
     if esmergeParams != None:
         jobParameters += esmergeParams
     self.setJobParamsTemplate(jobParameters)
     # set random seed offset
     if rndmSeedOffset != None:
         self.setSplitRule(None,rndmSeedOffset,JediTaskSpec.splitRuleToken['randomSeed'])
     if firstEventOffset != None:
         self.setSplitRule(None,firstEventOffset,JediTaskSpec.splitRuleToken['firstEvent'])
     # return
     return
 def doPreProRefine(self, taskParamMap):
     # no preprocessing
     if not taskParamMap.has_key('preproSpec'):
         return None, taskParamMap
     # already preprocessed
     if self.taskSpec.checkPreProcessed():
         # get replaced task params
         tmpStat, tmpJsonStr = self.taskBufferIF.getPreprocessMetadata_JEDI(
             self.taskSpec.jediTaskID)
         try:
             # replace placeholders
             replaceParams = RefinerUtils.decodeJSON(tmpJsonStr)
             self.tmpLog.debug("replace placeholders with " +
                               str(replaceParams))
             for tmpKey, tmpVal in replaceParams.iteritems():
                 self.replacePlaceHolders(taskParamMap, tmpKey, tmpVal)
         except:
             errtype, errvalue = sys.exc_info()[:2]
             self.tmpLog.error(
                 '{0} failed to get additional task params with {1}:{2}'.
                 format(self.__class__.__name__, errtype.__name__,
                        errvalue))
             return False, taskParamMap
         # succeeded
         self.updatedTaskParams = taskParamMap
         return None, taskParamMap
     # make dummy dataset to keep track of preprocessing
     datasetSpec = JediDatasetSpec()
     datasetSpec.datasetName = 'panda.pp.in.{0}.{1}'.format(
         uuid.uuid4(), self.taskSpec.jediTaskID)
     datasetSpec.jediTaskID = self.taskSpec.jediTaskID
     datasetSpec.type = 'pp_input'
     datasetSpec.vo = self.taskSpec.vo
     datasetSpec.nFiles = 1
     datasetSpec.nFilesUsed = 0
     datasetSpec.nFilesToBeUsed = 1
     datasetSpec.nFilesFinished = 0
     datasetSpec.nFilesFailed = 0
     datasetSpec.nFilesOnHold = 0
     datasetSpec.status = 'ready'
     self.inMasterDatasetSpec.append(datasetSpec)
     # make file
     fileSpec = JediFileSpec()
     fileSpec.jediTaskID = datasetSpec.jediTaskID
     fileSpec.type = datasetSpec.type
     fileSpec.status = 'ready'
     fileSpec.lfn = 'pseudo_lfn'
     fileSpec.attemptNr = 0
     fileSpec.maxAttempt = 3
     fileSpec.keepTrack = 1
     datasetSpec.addFile(fileSpec)
     # make log dataset
     logDatasetSpec = JediDatasetSpec()
     logDatasetSpec.datasetName = 'panda.pp.log.{0}.{1}'.format(
         uuid.uuid4(), self.taskSpec.jediTaskID)
     logDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
     logDatasetSpec.type = 'tmpl_pp_log'
     logDatasetSpec.streamName = 'PP_LOG'
     logDatasetSpec.vo = self.taskSpec.vo
     logDatasetSpec.nFiles = 0
     logDatasetSpec.nFilesUsed = 0
     logDatasetSpec.nFilesToBeUsed = 0
     logDatasetSpec.nFilesFinished = 0
     logDatasetSpec.nFilesFailed = 0
     logDatasetSpec.nFilesOnHold = 0
     logDatasetSpec.status = 'defined'
     self.outDatasetSpecList.append(logDatasetSpec)
     # make output template for log
     outTemplateMap = {
         'jediTaskID':
         self.taskSpec.jediTaskID,
         'serialNr':
         1,
         'streamName':
         logDatasetSpec.streamName,
         'filenameTemplate':
         "{0}._${{SN}}.log.tgz".format(logDatasetSpec.datasetName),
         'outtype':
         re.sub('^tmpl_', '', logDatasetSpec.type),
     }
     self.outputTemplateMap[logDatasetSpec.outputMapKey()] = [
         outTemplateMap
     ]
     # set split rule to use preprocessing
     self.taskSpec.setPrePro()
     # set task status
     self.taskSpec.status = 'topreprocess'
     # return
     return True, taskParamMap
 def doBasicRefine(self, taskParamMap):
     # get input/output/log dataset specs
     nIn = 0
     nOutMap = {}
     if isinstance(taskParamMap['log'], dict):
         itemList = taskParamMap['jobParameters'] + [taskParamMap['log']]
     else:
         itemList = taskParamMap['jobParameters'] + taskParamMap['log']
     # pseudo input
     if taskParamMap.has_key('noInput') and taskParamMap['noInput'] == True:
         tmpItem = {}
         tmpItem['type'] = 'template'
         tmpItem['value'] = ''
         tmpItem['dataset'] = 'pseudo_dataset'
         tmpItem['param_type'] = 'pseudo_input'
         itemList = [tmpItem] + itemList
     # random seed
     if RefinerUtils.useRandomSeed(taskParamMap):
         tmpItem = {}
         tmpItem['type'] = 'template'
         tmpItem['value'] = ''
         tmpItem['dataset'] = 'RNDMSEED'
         tmpItem['param_type'] = 'random_seed'
         itemList.append(tmpItem)
     # loop over all items
     allDsList = []
     for tmpItem in itemList:
         # look for datasets
         if tmpItem['type'] == 'template' and tmpItem.has_key('dataset'):
             # avoid duplication
             if not tmpItem['dataset'] in allDsList:
                 allDsList.append(tmpItem['dataset'])
             else:
                 continue
             datasetSpec = JediDatasetSpec()
             datasetSpec.datasetName = tmpItem['dataset']
             datasetSpec.jediTaskID = self.taskSpec.jediTaskID
             datasetSpec.type = tmpItem['param_type']
             if tmpItem.has_key('container'):
                 datasetSpec.containerName = tmpItem['container']
             if tmpItem.has_key('token'):
                 datasetSpec.storageToken = tmpItem['token']
             if tmpItem.has_key('destination'):
                 datasetSpec.destination = tmpItem['destination']
             if tmpItem.has_key('attributes'):
                 datasetSpec.setDatasetAttribute(tmpItem['attributes'])
             if tmpItem.has_key('ratio'):
                 datasetSpec.setDatasetAttribute('ratio={0}'.format(
                     tmpItem['ratio']))
             if tmpItem.has_key('eventRatio'):
                 datasetSpec.setEventRatio(tmpItem['eventRatio'])
             if tmpItem.has_key('check'):
                 datasetSpec.setDatasetAttribute('cc')
             if tmpItem.has_key('usedup'):
                 datasetSpec.setDatasetAttribute('ud')
             if tmpItem.has_key('random'):
                 datasetSpec.setDatasetAttribute('rd')
             if tmpItem.has_key('reusable'):
                 datasetSpec.setDatasetAttribute('ru')
             if tmpItem.has_key('offset'):
                 datasetSpec.setOffset(tmpItem['offset'])
             if tmpItem.has_key('allowNoOutput'):
                 datasetSpec.allowNoOutput()
             if tmpItem.has_key('nFilesPerJob'):
                 datasetSpec.setNumFilesPerJob(tmpItem['nFilesPerJob'])
             if tmpItem.has_key('num_records'):
                 datasetSpec.setNumRecords(tmpItem['num_records'])
             if 'transient' in tmpItem:
                 datasetSpec.setTransient(tmpItem['transient'])
             datasetSpec.vo = self.taskSpec.vo
             datasetSpec.nFiles = 0
             datasetSpec.nFilesUsed = 0
             datasetSpec.nFilesFinished = 0
             datasetSpec.nFilesFailed = 0
             datasetSpec.nFilesOnHold = 0
             datasetSpec.nEvents = 0
             datasetSpec.nEventsUsed = 0
             datasetSpec.nEventsToBeUsed = 0
             datasetSpec.status = 'defined'
             if datasetSpec.type in JediDatasetSpec.getInputTypes() + [
                     'random_seed'
             ]:
                 datasetSpec.streamName = RefinerUtils.extractStreamName(
                     tmpItem['value'])
                 if not tmpItem.has_key('expandedList'):
                     tmpItem['expandedList'] = []
                 # dataset names could be comma-concatenated
                 datasetNameList = datasetSpec.datasetName.split(',')
                 # datasets could be added by incexec
                 incexecDS = 'dsFor{0}'.format(datasetSpec.streamName)
                 # remove /XYZ
                 incexecDS = incexecDS.split('/')[0]
                 if taskParamMap.has_key(incexecDS):
                     for tmpDatasetName in taskParamMap[incexecDS].split(
                             ','):
                         if not tmpDatasetName in datasetNameList:
                             datasetNameList.append(tmpDatasetName)
                 # loop over all dataset names
                 inDatasetSpecList = []
                 for datasetName in datasetNameList:
                     # skip empty
                     if datasetName == '':
                         continue
                     # expand
                     if datasetSpec.isPseudo() or datasetSpec.type in [
                             'random_seed'
                     ] or datasetName == 'DBR_LATEST':
                         # pseudo input
                         tmpDatasetNameList = [datasetName]
                     elif tmpItem.has_key(
                             'expand') and tmpItem['expand'] == True:
                         # expand dataset container
                         tmpDatasetNameList = self.ddmIF.getInterface(
                             self.taskSpec.vo).expandContainer(datasetName)
                     else:
                         # normal dataset name
                         tmpDatasetNameList = self.ddmIF.getInterface(
                             self.taskSpec.vo).listDatasets(datasetName)
                     for elementDatasetName in tmpDatasetNameList:
                         if nIn > 0 or not elementDatasetName in tmpItem[
                                 'expandedList']:
                             tmpItem['expandedList'].append(
                                 elementDatasetName)
                             inDatasetSpec = copy.copy(datasetSpec)
                             inDatasetSpec.datasetName = elementDatasetName
                             inDatasetSpec.containerName = datasetName
                             inDatasetSpecList.append(inDatasetSpec)
                 # empty input
                 if inDatasetSpecList == [] and self.oldTaskStatus != 'rerefine':
                     errStr = 'doBasicRefine : unknown input dataset "{0}"'.format(
                         datasetSpec.datasetName)
                     self.taskSpec.setErrDiag(errStr)
                     if not datasetSpec.datasetName in self.unknownDatasetList:
                         self.unknownDatasetList.append(
                             datasetSpec.datasetName)
                     raise JediException.UnknownDatasetError, errStr
                 # set master flag
                 for inDatasetSpec in inDatasetSpecList:
                     if nIn == 0:
                         # master
                         self.inMasterDatasetSpec.append(inDatasetSpec)
                     else:
                         # secondary
                         self.inSecDatasetSpecList.append(inDatasetSpec)
                 nIn += 1
                 continue
             if datasetSpec.type in ['output', 'log']:
                 if not nOutMap.has_key(datasetSpec.type):
                     nOutMap[datasetSpec.type] = 0
                 # make stream name
                 datasetSpec.streamName = "{0}{1}".format(
                     datasetSpec.type.upper(), nOutMap[datasetSpec.type])
                 nOutMap[datasetSpec.type] += 1
                 # set attribute for event service
                 if self.taskSpec.useEventService(
                 ) and taskParamMap.has_key(
                         'objectStore') and datasetSpec.type in ['output']:
                     datasetSpec.setObjectStore(taskParamMap['objectStore'])
                 # extract output filename template and change the value field
                 outFileTemplate, tmpItem[
                     'value'] = RefinerUtils.extractReplaceOutFileTemplate(
                         tmpItem['value'], datasetSpec.streamName)
                 # make output template
                 if outFileTemplate != None:
                     if tmpItem.has_key('offset'):
                         offsetVal = 1 + tmpItem['offset']
                     else:
                         offsetVal = 1
                     outTemplateMap = {
                         'jediTaskID': self.taskSpec.jediTaskID,
                         'serialNr': offsetVal,
                         'streamName': datasetSpec.streamName,
                         'filenameTemplate': outFileTemplate,
                         'outtype': datasetSpec.type,
                     }
                     if self.outputTemplateMap.has_key(
                             datasetSpec.outputMapKey()):
                         # multiple files are associated to the same output datasets
                         self.outputTemplateMap[datasetSpec.outputMapKey(
                         )].append(outTemplateMap)
                         # don't insert the same output dataset
                         continue
                     self.outputTemplateMap[datasetSpec.outputMapKey()] = [
                         outTemplateMap
                     ]
                 # append
                 self.outDatasetSpecList.append(datasetSpec)
                 # make unmerged dataset
                 if taskParamMap.has_key('mergeOutput') and taskParamMap[
                         'mergeOutput'] == True:
                     umDatasetSpec = JediDatasetSpec()
                     umDatasetSpec.datasetName = 'panda.um.' + datasetSpec.datasetName
                     umDatasetSpec.jediTaskID = self.taskSpec.jediTaskID
                     umDatasetSpec.storageToken = 'TOMERGE'
                     umDatasetSpec.vo = datasetSpec.vo
                     umDatasetSpec.type = "tmpl_trn_" + datasetSpec.type
                     umDatasetSpec.nFiles = 0
                     umDatasetSpec.nFilesUsed = 0
                     umDatasetSpec.nFilesToBeUsed = 0
                     umDatasetSpec.nFilesFinished = 0
                     umDatasetSpec.nFilesFailed = 0
                     umDatasetSpec.nFilesOnHold = 0
                     umDatasetSpec.status = 'defined'
                     umDatasetSpec.streamName = datasetSpec.streamName
                     if datasetSpec.isAllowedNoOutput():
                         umDatasetSpec.allowNoOutput()
                     # ratio
                     if datasetSpec.getRatioToMaster() > 1:
                         umDatasetSpec.setDatasetAttribute(
                             'ratio={0}'.format(
                                 datasetSpec.getRatioToMaster()))
                     # make unmerged output template
                     if outFileTemplate != None:
                         umOutTemplateMap = {
                             'jediTaskID': self.taskSpec.jediTaskID,
                             'serialNr': 1,
                             'streamName': umDatasetSpec.streamName,
                             'outtype': datasetSpec.type,
                         }
                         # append temporary name
                         if taskParamMap.has_key(
                                 'umNameAtEnd'
                         ) and taskParamMap['umNameAtEnd'] == True:
                             # append temporary name at the end
                             umOutTemplateMap[
                                 'filenameTemplate'] = outFileTemplate + '.panda.um'
                         else:
                             umOutTemplateMap[
                                 'filenameTemplate'] = 'panda.um.' + outFileTemplate
                         if self.outputTemplateMap.has_key(
                                 umDatasetSpec.outputMapKey()):
                             # multiple files are associated to the same output datasets
                             self.outputTemplateMap[
                                 umDatasetSpec.outputMapKey()].append(
                                     umOutTemplateMap)
                             # don't insert the same output dataset
                             continue
                         self.outputTemplateMap[
                             umDatasetSpec.outputMapKey()] = [
                                 umOutTemplateMap
                             ]
                     # use log as master for merging
                     if datasetSpec.type == 'log':
                         self.unmergeMasterDatasetSpec[
                             datasetSpec.outputMapKey()] = umDatasetSpec
                     else:
                         # append
                         self.unmergeDatasetSpecMap[
                             datasetSpec.outputMapKey()] = umDatasetSpec
     # set attributes for merging
     if taskParamMap.has_key(
             'mergeOutput') and taskParamMap['mergeOutput'] == True:
         self.setSplitRule(None, 1,
                           JediTaskSpec.splitRuleToken['mergeOutput'])
     # make job parameters
     rndmSeedOffset = None
     firstEventOffset = None
     jobParameters = ''
     for tmpItem in taskParamMap['jobParameters']:
         if tmpItem.has_key('value'):
             # hidden parameter
             if tmpItem.has_key('hidden') and tmpItem['hidden'] == True:
                 continue
             # add tags for ES-only parameters
             esOnly = False
             if 'es_only' in tmpItem and tmpItem['es_only'] == True:
                 esOnly = True
             if esOnly:
                 jobParameters += '<PANDA_ES_ONLY>'
             jobParameters += '{0}'.format(tmpItem['value'])
             if esOnly:
                 jobParameters += '</PANDA_ES_ONLY>'
             # padding
             if tmpItem.has_key('padding') and tmpItem['padding'] == False:
                 pass
             else:
                 jobParameters += ' '
             # get offset for random seed and first event
             if tmpItem['type'] == 'template' and tmpItem[
                     'param_type'] == 'number':
                 if '${RNDMSEED}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         rndmSeedOffset = tmpItem['offset']
                     else:
                         rndmSeedOffset = 0
                 elif '${FIRSTEVENT}' in tmpItem['value']:
                     if tmpItem.has_key('offset'):
                         firstEventOffset = tmpItem['offset']
     jobParameters = jobParameters[:-1]
     # append parameters for event service merging if necessary
     esmergeParams = self.getParamsForEventServiceMerging(taskParamMap)
     if esmergeParams != None:
         jobParameters += esmergeParams
     self.setJobParamsTemplate(jobParameters)
     # set random seed offset
     if rndmSeedOffset != None:
         self.setSplitRule(None, rndmSeedOffset,
                           JediTaskSpec.splitRuleToken['randomSeed'])
     if firstEventOffset != None:
         self.setSplitRule(None, firstEventOffset,
                           JediTaskSpec.splitRuleToken['firstEvent'])
     # return
     return
Exemple #5
0
task.transHome = 'AtlasProduction-17.2.8.10'
task.transPath = 'Reco_trf.py'
task.workQueue_ID = 3

tbIF.insertTask_JEDI(task)

from pandajedi.jedicore.JediDatasetSpec import JediDatasetSpec

ds = JediDatasetSpec()
ds.jediTaskID = task.jediTaskID
if len(sys.argv) > 2:
    ds.datasetName = sys.argv[2]
else:
    ds.datasetName = 'data12_8TeV.00214651.physics_Egamma.merge.AOD.f489_m1261'
ds.type = 'input'
ds.vo = task.vo
ds.cloud = 'US'
ds.streamName = 'IN'
ds.status = 'defined'
ds.nFiles = 0
ds.nFilesUsed = 0
ds.nFilesFinished = 0
ds.nFilesFailed = 0

st, datasetID = tbIF.insertDataset_JEDI(ds)

ds = JediDatasetSpec()
ds.jediTaskID = task.jediTaskID
ds.datasetName = 'ddo.000001.Atlas.Ideal.DBRelease.v220701'
ds.type = 'input'
ds.vo = task.vo
Exemple #6
0
task.transUses = 'Atlas-17.2.7'
task.transHome = 'AtlasProduction-17.2.8.10'
task.transPath = 'Reco_trf.py'
task.workQueue_ID = 3

tbIF.insertTask_JEDI(task) 

from pandajedi.jedicore.JediDatasetSpec import JediDatasetSpec
ds = JediDatasetSpec()
ds.jediTaskID = task.jediTaskID
if len(sys.argv) > 2:
    ds.datasetName = sys.argv[2]
else:
    ds.datasetName = 'data12_8TeV.00214651.physics_Egamma.merge.AOD.f489_m1261'    
ds.type = 'input'
ds.vo = task.vo
ds.cloud = 'US'
ds.streamName = 'IN'
ds.status = 'defined'
ds.nFiles = 0
ds.nFilesUsed = 0
ds.nFilesFinished = 0
ds.nFilesFailed = 0

st,datasetID = tbIF.insertDataset_JEDI(ds)

ds = JediDatasetSpec()
ds.jediTaskID = task.jediTaskID
ds.datasetName = 'ddo.000001.Atlas.Ideal.DBRelease.v220701'
ds.type = 'input'
ds.vo = task.vo