def doPreProRefine(self,taskParamMap): # no preprocessing if not taskParamMap.has_key('preproSpec'): return None,taskParamMap # already preprocessed if self.taskSpec.checkPreProcessed(): # get replaced task params tmpStat,tmpJsonStr = self.taskBufferIF.getPreprocessMetadata_JEDI(self.taskSpec.jediTaskID) try: # replace placeholders replaceParams = RefinerUtils.decodeJSON(tmpJsonStr) self.tmpLog.debug("replace placeholders with "+str(replaceParams)) for tmpKey,tmpVal in replaceParams.iteritems(): self.replacePlaceHolders(taskParamMap,tmpKey,tmpVal) except: errtype,errvalue = sys.exc_info()[:2] self.tmpLog.error('{0} failed to get additional task params with {1}:{2}'.format(self.__class__.__name__, errtype.__name__,errvalue)) return False,taskParamMap # succeeded self.updatedTaskParams = taskParamMap return None,taskParamMap # make dummy dataset to keep track of preprocessing datasetSpec = JediDatasetSpec() datasetSpec.datasetName = 'panda.pp.in.{0}.{1}'.format(uuid.uuid4(),self.taskSpec.jediTaskID) datasetSpec.jediTaskID = self.taskSpec.jediTaskID datasetSpec.type = 'pp_input' datasetSpec.vo = self.taskSpec.vo datasetSpec.nFiles = 1 datasetSpec.nFilesUsed = 0 datasetSpec.nFilesToBeUsed = 1 datasetSpec.nFilesFinished = 0 datasetSpec.nFilesFailed = 0 datasetSpec.nFilesOnHold = 0 datasetSpec.status = 'ready' self.inMasterDatasetSpec.append(datasetSpec) # make file fileSpec = JediFileSpec() fileSpec.jediTaskID = datasetSpec.jediTaskID fileSpec.type = datasetSpec.type fileSpec.status = 'ready' fileSpec.lfn = 'pseudo_lfn' fileSpec.attemptNr = 0 fileSpec.maxAttempt = 3 fileSpec.keepTrack = 1 datasetSpec.addFile(fileSpec) # make log dataset logDatasetSpec = JediDatasetSpec() logDatasetSpec.datasetName = 'panda.pp.log.{0}.{1}'.format(uuid.uuid4(),self.taskSpec.jediTaskID) logDatasetSpec.jediTaskID = self.taskSpec.jediTaskID logDatasetSpec.type = 'tmpl_pp_log' logDatasetSpec.streamName = 'PP_LOG' logDatasetSpec.vo = self.taskSpec.vo logDatasetSpec.nFiles = 0 logDatasetSpec.nFilesUsed = 0 logDatasetSpec.nFilesToBeUsed = 0 logDatasetSpec.nFilesFinished = 0 logDatasetSpec.nFilesFailed = 0 logDatasetSpec.nFilesOnHold = 0 logDatasetSpec.status = 'defined' self.outDatasetSpecList.append(logDatasetSpec) # make output template for log outTemplateMap = {'jediTaskID' : self.taskSpec.jediTaskID, 'serialNr' : 1, 'streamName' : logDatasetSpec.streamName, 'filenameTemplate' : "{0}._${{SN}}.log.tgz".format(logDatasetSpec.datasetName), 'outtype' : re.sub('^tmpl_','',logDatasetSpec.type), } self.outputTemplateMap[logDatasetSpec.outputMapKey()] = [outTemplateMap] # set split rule to use preprocessing self.taskSpec.setPrePro() # set task status self.taskSpec.status = 'topreprocess' # return return True,taskParamMap
def doPreProRefine(self, taskParamMap): # no preprocessing if not taskParamMap.has_key('preproSpec'): return None, taskParamMap # already preprocessed if self.taskSpec.checkPreProcessed(): # get replaced task params tmpStat, tmpJsonStr = self.taskBufferIF.getPreprocessMetadata_JEDI( self.taskSpec.jediTaskID) try: # replace placeholders replaceParams = RefinerUtils.decodeJSON(tmpJsonStr) self.tmpLog.debug("replace placeholders with " + str(replaceParams)) for tmpKey, tmpVal in replaceParams.iteritems(): self.replacePlaceHolders(taskParamMap, tmpKey, tmpVal) except: errtype, errvalue = sys.exc_info()[:2] self.tmpLog.error( '{0} failed to get additional task params with {1}:{2}'. format(self.__class__.__name__, errtype.__name__, errvalue)) return False, taskParamMap # succeeded self.updatedTaskParams = taskParamMap return None, taskParamMap # make dummy dataset to keep track of preprocessing datasetSpec = JediDatasetSpec() datasetSpec.datasetName = 'panda.pp.in.{0}.{1}'.format( uuid.uuid4(), self.taskSpec.jediTaskID) datasetSpec.jediTaskID = self.taskSpec.jediTaskID datasetSpec.type = 'pp_input' datasetSpec.vo = self.taskSpec.vo datasetSpec.nFiles = 1 datasetSpec.nFilesUsed = 0 datasetSpec.nFilesToBeUsed = 1 datasetSpec.nFilesFinished = 0 datasetSpec.nFilesFailed = 0 datasetSpec.nFilesOnHold = 0 datasetSpec.status = 'ready' self.inMasterDatasetSpec.append(datasetSpec) # make file fileSpec = JediFileSpec() fileSpec.jediTaskID = datasetSpec.jediTaskID fileSpec.type = datasetSpec.type fileSpec.status = 'ready' fileSpec.lfn = 'pseudo_lfn' fileSpec.attemptNr = 0 fileSpec.maxAttempt = 3 fileSpec.keepTrack = 1 datasetSpec.addFile(fileSpec) # make log dataset logDatasetSpec = JediDatasetSpec() logDatasetSpec.datasetName = 'panda.pp.log.{0}.{1}'.format( uuid.uuid4(), self.taskSpec.jediTaskID) logDatasetSpec.jediTaskID = self.taskSpec.jediTaskID logDatasetSpec.type = 'tmpl_pp_log' logDatasetSpec.streamName = 'PP_LOG' logDatasetSpec.vo = self.taskSpec.vo logDatasetSpec.nFiles = 0 logDatasetSpec.nFilesUsed = 0 logDatasetSpec.nFilesToBeUsed = 0 logDatasetSpec.nFilesFinished = 0 logDatasetSpec.nFilesFailed = 0 logDatasetSpec.nFilesOnHold = 0 logDatasetSpec.status = 'defined' self.outDatasetSpecList.append(logDatasetSpec) # make output template for log outTemplateMap = { 'jediTaskID': self.taskSpec.jediTaskID, 'serialNr': 1, 'streamName': logDatasetSpec.streamName, 'filenameTemplate': "{0}._${{SN}}.log.tgz".format(logDatasetSpec.datasetName), 'outtype': re.sub('^tmpl_', '', logDatasetSpec.type), } self.outputTemplateMap[logDatasetSpec.outputMapKey()] = [ outTemplateMap ] # set split rule to use preprocessing self.taskSpec.setPrePro() # set task status self.taskSpec.status = 'topreprocess' # return return True, taskParamMap