def doBasicRefine(self,taskParamMap): # get input/output/log dataset specs nIn = 0 nOutMap = {} if isinstance(taskParamMap['log'],dict): itemList = taskParamMap['jobParameters'] + [taskParamMap['log']] else: itemList = taskParamMap['jobParameters'] + taskParamMap['log'] # pseudo input if taskParamMap.has_key('noInput') and taskParamMap['noInput'] == True: tmpItem = {} tmpItem['type'] = 'template' tmpItem['value'] = '' tmpItem['dataset'] = 'pseudo_dataset' tmpItem['param_type'] = 'pseudo_input' itemList = [tmpItem] + itemList # random seed if RefinerUtils.useRandomSeed(taskParamMap): tmpItem = {} tmpItem['type'] = 'template' tmpItem['value'] = '' tmpItem['dataset'] = 'RNDMSEED' tmpItem['param_type'] = 'random_seed' itemList.append(tmpItem) # loop over all items allDsList = [] for tmpItem in itemList: # look for datasets if tmpItem['type'] == 'template' and tmpItem.has_key('dataset'): # avoid duplication if not tmpItem['dataset'] in allDsList: allDsList.append(tmpItem['dataset']) else: continue datasetSpec = JediDatasetSpec() datasetSpec.datasetName = tmpItem['dataset'] datasetSpec.jediTaskID = self.taskSpec.jediTaskID datasetSpec.type = tmpItem['param_type'] if tmpItem.has_key('container'): datasetSpec.containerName = tmpItem['container'] if tmpItem.has_key('token'): datasetSpec.storageToken = tmpItem['token'] if tmpItem.has_key('destination'): datasetSpec.destination = tmpItem['destination'] if tmpItem.has_key('attributes'): datasetSpec.setDatasetAttribute(tmpItem['attributes']) if tmpItem.has_key('ratio'): datasetSpec.setDatasetAttribute('ratio={0}'.format(tmpItem['ratio'])) if tmpItem.has_key('check'): datasetSpec.setDatasetAttribute('cc') if tmpItem.has_key('usedup'): datasetSpec.setDatasetAttribute('ud') if tmpItem.has_key('random'): datasetSpec.setDatasetAttribute('rd') if tmpItem.has_key('reusable'): datasetSpec.setDatasetAttribute('ru') if tmpItem.has_key('offset'): datasetSpec.setOffset(tmpItem['offset']) if tmpItem.has_key('allowNoOutput'): datasetSpec.allowNoOutput() if tmpItem.has_key('nFilesPerJob'): datasetSpec.setNumFilesPerJob(tmpItem['nFilesPerJob']) if tmpItem.has_key('num_records'): datasetSpec.setNumRecords(tmpItem['num_records']) if 'transient' in tmpItem: datasetSpec.setTransient(tmpItem['transient']) datasetSpec.vo = self.taskSpec.vo datasetSpec.nFiles = 0 datasetSpec.nFilesUsed = 0 datasetSpec.nFilesFinished = 0 datasetSpec.nFilesFailed = 0 datasetSpec.nFilesOnHold = 0 datasetSpec.nEvents = 0 datasetSpec.nEventsUsed = 0 datasetSpec.nEventsToBeUsed = 0 datasetSpec.status = 'defined' if datasetSpec.type in JediDatasetSpec.getInputTypes() + ['random_seed']: datasetSpec.streamName = RefinerUtils.extractStreamName(tmpItem['value']) if not tmpItem.has_key('expandedList'): tmpItem['expandedList'] = [] # dataset names could be comma-concatenated datasetNameList = datasetSpec.datasetName.split(',') # datasets could be added by incexec incexecDS = 'dsFor{0}'.format(datasetSpec.streamName) # remove /XYZ incexecDS = incexecDS.split('/')[0] if taskParamMap.has_key(incexecDS): for tmpDatasetName in taskParamMap[incexecDS].split(','): if not tmpDatasetName in datasetNameList: datasetNameList.append(tmpDatasetName) # loop over all dataset names inDatasetSpecList = [] for datasetName in datasetNameList: # skip empty if datasetName == '': continue # expand if datasetSpec.isPseudo() or datasetSpec.type in ['random_seed'] or datasetName == 'DBR_LATEST': # pseudo input tmpDatasetNameList = [datasetName] elif tmpItem.has_key('expand') and tmpItem['expand'] == True: # expand dataset container tmpDatasetNameList = self.ddmIF.getInterface(self.taskSpec.vo).expandContainer(datasetName) else: # normal dataset name tmpDatasetNameList = self.ddmIF.getInterface(self.taskSpec.vo).listDatasets(datasetName) for elementDatasetName in tmpDatasetNameList: if not elementDatasetName in tmpItem['expandedList']: tmpItem['expandedList'].append(elementDatasetName) inDatasetSpec = copy.copy(datasetSpec) inDatasetSpec.datasetName = elementDatasetName inDatasetSpec.containerName = datasetName inDatasetSpecList.append(inDatasetSpec) # empty input if inDatasetSpecList == [] and self.oldTaskStatus != 'rerefine': errStr = 'doBasicRefine : unknown input dataset "{0}"'.format(datasetSpec.datasetName) self.taskSpec.setErrDiag(errStr) if not datasetSpec.datasetName in self.unknownDatasetList: self.unknownDatasetList.append(datasetSpec.datasetName) raise JediException.UnknownDatasetError,errStr # set master flag for inDatasetSpec in inDatasetSpecList: if nIn == 0: # master self.inMasterDatasetSpec.append(inDatasetSpec) else: # secondary self.inSecDatasetSpecList.append(inDatasetSpec) nIn += 1 continue if datasetSpec.type in ['output','log']: if not nOutMap.has_key(datasetSpec.type): nOutMap[datasetSpec.type] = 0 # make stream name datasetSpec.streamName = "{0}{1}".format(datasetSpec.type.upper(),nOutMap[datasetSpec.type]) nOutMap[datasetSpec.type] += 1 # set attribute for event service if self.taskSpec.useEventService() and taskParamMap.has_key('objectStore') and datasetSpec.type in ['output']: datasetSpec.setObjectStore(taskParamMap['objectStore']) # extract output filename template and change the value field outFileTemplate,tmpItem['value'] = RefinerUtils.extractReplaceOutFileTemplate(tmpItem['value'], datasetSpec.streamName) # make output template if outFileTemplate != None: if tmpItem.has_key('offset'): offsetVal = 1 + tmpItem['offset'] else: offsetVal = 1 outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID, 'serialNr' : offsetVal, 'streamName' : datasetSpec.streamName, 'filenameTemplate' : outFileTemplate, 'outtype' : datasetSpec.type, } if self.outputTemplateMap.has_key(datasetSpec.outputMapKey()): # multiple files are associated to the same output datasets self.outputTemplateMap[datasetSpec.outputMapKey()].append(outTemplateMap) # don't insert the same output dataset continue self.outputTemplateMap[datasetSpec.outputMapKey()] = [outTemplateMap] # append self.outDatasetSpecList.append(datasetSpec) # make unmerged dataset if taskParamMap.has_key('mergeOutput') and taskParamMap['mergeOutput'] == True: umDatasetSpec = JediDatasetSpec() umDatasetSpec.datasetName = 'panda.um.' + datasetSpec.datasetName umDatasetSpec.jediTaskID = self.taskSpec.jediTaskID umDatasetSpec.storageToken = 'TOMERGE' umDatasetSpec.vo = datasetSpec.vo umDatasetSpec.type = "tmpl_trn_" + datasetSpec.type umDatasetSpec.nFiles = 0 umDatasetSpec.nFilesUsed = 0 umDatasetSpec.nFilesToBeUsed = 0 umDatasetSpec.nFilesFinished = 0 umDatasetSpec.nFilesFailed = 0 umDatasetSpec.nFilesOnHold = 0 umDatasetSpec.status = 'defined' umDatasetSpec.streamName = datasetSpec.streamName if datasetSpec.isAllowedNoOutput(): umDatasetSpec.allowNoOutput() # make unmerged output template if outFileTemplate != None: umOutTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID, 'serialNr' : 1, 'streamName' : umDatasetSpec.streamName, 'outtype' : datasetSpec.type, } # append temporary name if taskParamMap.has_key('umNameAtEnd') and taskParamMap['umNameAtEnd'] == True: # append temporary name at the end umOutTemplateMap['filenameTemplate'] = outFileTemplate + '.panda.um' else: umOutTemplateMap['filenameTemplate'] = 'panda.um.' + outFileTemplate if self.outputTemplateMap.has_key(umDatasetSpec.outputMapKey()): # multiple files are associated to the same output datasets self.outputTemplateMap[umDatasetSpec.outputMapKey()].append(umOutTemplateMap) # don't insert the same output dataset continue self.outputTemplateMap[umDatasetSpec.outputMapKey()] = [umOutTemplateMap] # use log as master for merging if datasetSpec.type == 'log': self.unmergeMasterDatasetSpec[datasetSpec.outputMapKey()] = umDatasetSpec else: # append self.unmergeDatasetSpecMap[datasetSpec.outputMapKey()] = umDatasetSpec # set attributes for merging if taskParamMap.has_key('mergeOutput') and taskParamMap['mergeOutput'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['mergeOutput']) # make job parameters rndmSeedOffset = None firstEventOffset = None jobParameters = '' for tmpItem in taskParamMap['jobParameters']: if tmpItem.has_key('value'): # hidden parameter if tmpItem.has_key('hidden') and tmpItem['hidden'] == True: continue # add tags for ES-only parameters esOnly = False if 'es_only' in tmpItem and tmpItem['es_only'] == True: esOnly = True if esOnly: jobParameters += '<PANDA_ES_ONLY>' jobParameters += '{0}'.format(tmpItem['value']) if esOnly: jobParameters += '</PANDA_ES_ONLY>' # padding if tmpItem.has_key('padding') and tmpItem['padding'] == False: pass else: jobParameters += ' ' # get offset for random seed and first event if tmpItem['type'] == 'template' and tmpItem['param_type'] == 'number': if '${RNDMSEED}' in tmpItem['value']: if tmpItem.has_key('offset'): rndmSeedOffset = tmpItem['offset'] else: rndmSeedOffset = 0 elif '${FIRSTEVENT}' in tmpItem['value']: if tmpItem.has_key('offset'): firstEventOffset = tmpItem['offset'] jobParameters = jobParameters[:-1] # append parameters for event service merging if necessary esmergeParams = self.getParamsForEventServiceMerging(taskParamMap) if esmergeParams != None: jobParameters += esmergeParams self.setJobParamsTemplate(jobParameters) # set random seed offset if rndmSeedOffset != None: self.setSplitRule(None,rndmSeedOffset,JediTaskSpec.splitRuleToken['randomSeed']) if firstEventOffset != None: self.setSplitRule(None,firstEventOffset,JediTaskSpec.splitRuleToken['firstEvent']) # return return
def doBasicRefine(self, taskParamMap): # get input/output/log dataset specs nIn = 0 nOutMap = {} if isinstance(taskParamMap['log'], dict): itemList = taskParamMap['jobParameters'] + [taskParamMap['log']] else: itemList = taskParamMap['jobParameters'] + taskParamMap['log'] # pseudo input if taskParamMap.has_key('noInput') and taskParamMap['noInput'] == True: tmpItem = {} tmpItem['type'] = 'template' tmpItem['value'] = '' tmpItem['dataset'] = 'pseudo_dataset' tmpItem['param_type'] = 'pseudo_input' itemList = [tmpItem] + itemList # random seed if RefinerUtils.useRandomSeed(taskParamMap): tmpItem = {} tmpItem['type'] = 'template' tmpItem['value'] = '' tmpItem['dataset'] = 'RNDMSEED' tmpItem['param_type'] = 'random_seed' itemList.append(tmpItem) # loop over all items allDsList = [] for tmpItem in itemList: # look for datasets if tmpItem['type'] == 'template' and tmpItem.has_key('dataset'): # avoid duplication if not tmpItem['dataset'] in allDsList: allDsList.append(tmpItem['dataset']) else: continue datasetSpec = JediDatasetSpec() datasetSpec.datasetName = tmpItem['dataset'] datasetSpec.jediTaskID = self.taskSpec.jediTaskID datasetSpec.type = tmpItem['param_type'] if tmpItem.has_key('container'): datasetSpec.containerName = tmpItem['container'] if tmpItem.has_key('token'): datasetSpec.storageToken = tmpItem['token'] if tmpItem.has_key('destination'): datasetSpec.destination = tmpItem['destination'] if tmpItem.has_key('attributes'): datasetSpec.setDatasetAttribute(tmpItem['attributes']) if tmpItem.has_key('ratio'): datasetSpec.setDatasetAttribute('ratio={0}'.format( tmpItem['ratio'])) if tmpItem.has_key('eventRatio'): datasetSpec.setEventRatio(tmpItem['eventRatio']) if tmpItem.has_key('check'): datasetSpec.setDatasetAttribute('cc') if tmpItem.has_key('usedup'): datasetSpec.setDatasetAttribute('ud') if tmpItem.has_key('random'): datasetSpec.setDatasetAttribute('rd') if tmpItem.has_key('reusable'): datasetSpec.setDatasetAttribute('ru') if tmpItem.has_key('offset'): datasetSpec.setOffset(tmpItem['offset']) if tmpItem.has_key('allowNoOutput'): datasetSpec.allowNoOutput() if tmpItem.has_key('nFilesPerJob'): datasetSpec.setNumFilesPerJob(tmpItem['nFilesPerJob']) if tmpItem.has_key('num_records'): datasetSpec.setNumRecords(tmpItem['num_records']) if 'transient' in tmpItem: datasetSpec.setTransient(tmpItem['transient']) datasetSpec.vo = self.taskSpec.vo datasetSpec.nFiles = 0 datasetSpec.nFilesUsed = 0 datasetSpec.nFilesFinished = 0 datasetSpec.nFilesFailed = 0 datasetSpec.nFilesOnHold = 0 datasetSpec.nEvents = 0 datasetSpec.nEventsUsed = 0 datasetSpec.nEventsToBeUsed = 0 datasetSpec.status = 'defined' if datasetSpec.type in JediDatasetSpec.getInputTypes() + [ 'random_seed' ]: datasetSpec.streamName = RefinerUtils.extractStreamName( tmpItem['value']) if not tmpItem.has_key('expandedList'): tmpItem['expandedList'] = [] # dataset names could be comma-concatenated datasetNameList = datasetSpec.datasetName.split(',') # datasets could be added by incexec incexecDS = 'dsFor{0}'.format(datasetSpec.streamName) # remove /XYZ incexecDS = incexecDS.split('/')[0] if taskParamMap.has_key(incexecDS): for tmpDatasetName in taskParamMap[incexecDS].split( ','): if not tmpDatasetName in datasetNameList: datasetNameList.append(tmpDatasetName) # loop over all dataset names inDatasetSpecList = [] for datasetName in datasetNameList: # skip empty if datasetName == '': continue # expand if datasetSpec.isPseudo() or datasetSpec.type in [ 'random_seed' ] or datasetName == 'DBR_LATEST': # pseudo input tmpDatasetNameList = [datasetName] elif tmpItem.has_key( 'expand') and tmpItem['expand'] == True: # expand dataset container tmpDatasetNameList = self.ddmIF.getInterface( self.taskSpec.vo).expandContainer(datasetName) else: # normal dataset name tmpDatasetNameList = self.ddmIF.getInterface( self.taskSpec.vo).listDatasets(datasetName) for elementDatasetName in tmpDatasetNameList: if nIn > 0 or not elementDatasetName in tmpItem[ 'expandedList']: tmpItem['expandedList'].append( elementDatasetName) inDatasetSpec = copy.copy(datasetSpec) inDatasetSpec.datasetName = elementDatasetName inDatasetSpec.containerName = datasetName inDatasetSpecList.append(inDatasetSpec) # empty input if inDatasetSpecList == [] and self.oldTaskStatus != 'rerefine': errStr = 'doBasicRefine : unknown input dataset "{0}"'.format( datasetSpec.datasetName) self.taskSpec.setErrDiag(errStr) if not datasetSpec.datasetName in self.unknownDatasetList: self.unknownDatasetList.append( datasetSpec.datasetName) raise JediException.UnknownDatasetError, errStr # set master flag for inDatasetSpec in inDatasetSpecList: if nIn == 0: # master self.inMasterDatasetSpec.append(inDatasetSpec) else: # secondary self.inSecDatasetSpecList.append(inDatasetSpec) nIn += 1 continue if datasetSpec.type in ['output', 'log']: if not nOutMap.has_key(datasetSpec.type): nOutMap[datasetSpec.type] = 0 # make stream name datasetSpec.streamName = "{0}{1}".format( datasetSpec.type.upper(), nOutMap[datasetSpec.type]) nOutMap[datasetSpec.type] += 1 # set attribute for event service if self.taskSpec.useEventService( ) and taskParamMap.has_key( 'objectStore') and datasetSpec.type in ['output']: datasetSpec.setObjectStore(taskParamMap['objectStore']) # extract output filename template and change the value field outFileTemplate, tmpItem[ 'value'] = RefinerUtils.extractReplaceOutFileTemplate( tmpItem['value'], datasetSpec.streamName) # make output template if outFileTemplate != None: if tmpItem.has_key('offset'): offsetVal = 1 + tmpItem['offset'] else: offsetVal = 1 outTemplateMap = { 'jediTaskID': self.taskSpec.jediTaskID, 'serialNr': offsetVal, 'streamName': datasetSpec.streamName, 'filenameTemplate': outFileTemplate, 'outtype': datasetSpec.type, } if self.outputTemplateMap.has_key( datasetSpec.outputMapKey()): # multiple files are associated to the same output datasets self.outputTemplateMap[datasetSpec.outputMapKey( )].append(outTemplateMap) # don't insert the same output dataset continue self.outputTemplateMap[datasetSpec.outputMapKey()] = [ outTemplateMap ] # append self.outDatasetSpecList.append(datasetSpec) # make unmerged dataset if taskParamMap.has_key('mergeOutput') and taskParamMap[ 'mergeOutput'] == True: umDatasetSpec = JediDatasetSpec() umDatasetSpec.datasetName = 'panda.um.' + datasetSpec.datasetName umDatasetSpec.jediTaskID = self.taskSpec.jediTaskID umDatasetSpec.storageToken = 'TOMERGE' umDatasetSpec.vo = datasetSpec.vo umDatasetSpec.type = "tmpl_trn_" + datasetSpec.type umDatasetSpec.nFiles = 0 umDatasetSpec.nFilesUsed = 0 umDatasetSpec.nFilesToBeUsed = 0 umDatasetSpec.nFilesFinished = 0 umDatasetSpec.nFilesFailed = 0 umDatasetSpec.nFilesOnHold = 0 umDatasetSpec.status = 'defined' umDatasetSpec.streamName = datasetSpec.streamName if datasetSpec.isAllowedNoOutput(): umDatasetSpec.allowNoOutput() # ratio if datasetSpec.getRatioToMaster() > 1: umDatasetSpec.setDatasetAttribute( 'ratio={0}'.format( datasetSpec.getRatioToMaster())) # make unmerged output template if outFileTemplate != None: umOutTemplateMap = { 'jediTaskID': self.taskSpec.jediTaskID, 'serialNr': 1, 'streamName': umDatasetSpec.streamName, 'outtype': datasetSpec.type, } # append temporary name if taskParamMap.has_key( 'umNameAtEnd' ) and taskParamMap['umNameAtEnd'] == True: # append temporary name at the end umOutTemplateMap[ 'filenameTemplate'] = outFileTemplate + '.panda.um' else: umOutTemplateMap[ 'filenameTemplate'] = 'panda.um.' + outFileTemplate if self.outputTemplateMap.has_key( umDatasetSpec.outputMapKey()): # multiple files are associated to the same output datasets self.outputTemplateMap[ umDatasetSpec.outputMapKey()].append( umOutTemplateMap) # don't insert the same output dataset continue self.outputTemplateMap[ umDatasetSpec.outputMapKey()] = [ umOutTemplateMap ] # use log as master for merging if datasetSpec.type == 'log': self.unmergeMasterDatasetSpec[ datasetSpec.outputMapKey()] = umDatasetSpec else: # append self.unmergeDatasetSpecMap[ datasetSpec.outputMapKey()] = umDatasetSpec # set attributes for merging if taskParamMap.has_key( 'mergeOutput') and taskParamMap['mergeOutput'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['mergeOutput']) # make job parameters rndmSeedOffset = None firstEventOffset = None jobParameters = '' for tmpItem in taskParamMap['jobParameters']: if tmpItem.has_key('value'): # hidden parameter if tmpItem.has_key('hidden') and tmpItem['hidden'] == True: continue # add tags for ES-only parameters esOnly = False if 'es_only' in tmpItem and tmpItem['es_only'] == True: esOnly = True if esOnly: jobParameters += '<PANDA_ES_ONLY>' jobParameters += '{0}'.format(tmpItem['value']) if esOnly: jobParameters += '</PANDA_ES_ONLY>' # padding if tmpItem.has_key('padding') and tmpItem['padding'] == False: pass else: jobParameters += ' ' # get offset for random seed and first event if tmpItem['type'] == 'template' and tmpItem[ 'param_type'] == 'number': if '${RNDMSEED}' in tmpItem['value']: if tmpItem.has_key('offset'): rndmSeedOffset = tmpItem['offset'] else: rndmSeedOffset = 0 elif '${FIRSTEVENT}' in tmpItem['value']: if tmpItem.has_key('offset'): firstEventOffset = tmpItem['offset'] jobParameters = jobParameters[:-1] # append parameters for event service merging if necessary esmergeParams = self.getParamsForEventServiceMerging(taskParamMap) if esmergeParams != None: jobParameters += esmergeParams self.setJobParamsTemplate(jobParameters) # set random seed offset if rndmSeedOffset != None: self.setSplitRule(None, rndmSeedOffset, JediTaskSpec.splitRuleToken['randomSeed']) if firstEventOffset != None: self.setSplitRule(None, firstEventOffset, JediTaskSpec.splitRuleToken['firstEvent']) # return return