def makeStep(self, step, overrides): from Configuration.PyReleaseValidation.relval_steps import merge if len(overrides) > 0: copyStep = merge([overrides] + [step]) return copyStep else: return step
def makeStep(self,step,overrides): from Configuration.PyReleaseValidation.relval_steps import merge if len(overrides) > 0: copyStep=merge([overrides]+[step]) return copyStep else: return step
def merge_additional_command(workflow_step, command): """ Merge workflow arguments with additional parameters provided by user """ command_dict = split_command_to_dict(command) if '--step' in command_dict: command_dict['-s'] = command_dict.pop('--step') if '--number' in command_dict: command_dict['-n'] = command_dict.pop('--number') print('Merging user commands %s' % (command_dict)) print('Merging to %s' % (workflow_step)) return steps_module.merge([command_dict, workflow_step])
class MatrixReader(object): def __init__(self, opt): self.reset(opt.what) self.wm=opt.wmcontrol self.addCommand=opt.command self.commandLineWf=opt.workflow self.overWrite=opt.overWrite return def reset(self, what='all'): self.what = what #a bunch of information, but not yet the WorkFlow object self.workFlowSteps = {} #the actual WorkFlow objects self.workFlows = [] self.nameList = {} self.filesPrefMap = {'relval_standard' : 'std-' , 'relval_highstats': 'hi-' , 'relval_pileup': 'PU-' , 'relval_generator': 'gen-' , 'relval_production': 'prod-' , 'relval_ged': 'ged-', 'relval_identity':'id-' } self.files = ['relval_standard' , 'relval_highstats', 'relval_pileup', 'relval_generator', 'relval_production', 'relval_ged', 'relval_identity' ] self.relvalModule = None return def makeCmd(self, step): cmd = '' cfg = None input = None for k,v in step.items(): if 'no_exec' in k : continue # we want to really run it ... if k.lower() == 'cfg': cfg = v continue # do not append to cmd, return separately if k.lower() == 'input': input = v continue # do not append to cmd, return separately #chain the configs #if k.lower() == '--python': # v = 'step%d_%s'%(index,v) cmd += ' ' + k + ' ' + str(v) return cfg, input, cmd def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None): prefix = self.filesPrefMap[fileNameIn] print "processing ", fileNameIn try: _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn ) self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn] except Exception, e: print "ERROR importing file ", fileNameIn, str(e) return print "request for INPUT for ", useInput fromInput={} if useInput: for i in useInput: if ':' in i: (ik,il)=i.split(':') if ik=='all': for k in self.relvalModule.workflows.keys(): fromInput[float(k)]=int(il) else: fromInput[float(ik)]=int(il) else: if i=='all': for k in self.relvalModule.workflows.keys(): fromInput[float(k)]=0 else: fromInput[float(i)]=0 if fromScratch: fromScratch=map(float,fromScratch) for num in fromScratch: if num in fromInput: fromInput.pop(num) #overwrite steps if self.overWrite: for p in self.overWrite: self.relvalModule.steps.overwrite(p) #change the origin of dataset on the fly if refRel: if ',' in refRel: refRels=refRel.split(',') if len(refRels)!=len(self.relvalModule.baseDataSetRelease): return self.relvalModule.changeRefRelease( self.relvalModule.steps, zip(self.relvalModule.baseDataSetRelease,refRels) ) else: self.relvalModule.changeRefRelease( self.relvalModule.steps, [(x,refRel) for x in self.relvalModule.baseDataSetRelease] ) for num, wfInfo in self.relvalModule.workflows.items(): commands=[] wfName = wfInfo[0] stepList = wfInfo[1] # if no explicit name given for the workflow, use the name of step1 if wfName.strip() == '': wfName = stepList[0] # option to specialize the wf as the third item in the WF list addTo=None addCom=None if len(wfInfo)>=3: addCom=wfInfo[2] if not type(addCom)==list: addCom=[addCom] #print 'added dict',addCom if len(wfInfo)>=4: addTo=wfInfo[3] #pad with 0 while len(addTo)!=len(stepList): addTo.append(0) name=wfName stepIndex=0 ranStepList=[] #first resolve INPUT possibilities if num in fromInput: ilevel=fromInput[num] #print num,ilevel for (stepIr,step) in enumerate(reversed(stepList)): stepName=step stepI=(len(stepList)-stepIr)-1 #print stepIr,step,stepI,ilevel if stepI>ilevel: #print "ignoring" continue if stepI!=0: testName='__'.join(stepList[0:stepI+1])+'INPUT' else: testName=step+'INPUT' #print "JR",stepI,stepIr,testName,stepList if testName in self.relvalModule.steps.keys(): #print "JR",stepI,stepIr stepList[stepI]=testName #pop the rest in the list #print "\tmod prepop",stepList for p in range(stepI): stepList.pop(0) #print "\t\tmod",stepList break for (stepI,step) in enumerate(stepList): stepName=step if self.wm: #cannot put a certain number of things in wm if stepName in ['HARVEST','HARVESTD','HARVESTDreHLT','RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT']: continue #replace stepName is needed #if stepName in self.replaceStep if len(name) > 0 : name += '+' #any step can be mirrored with INPUT ## maybe we want too level deep input """ if num in fromInput: if step+'INPUT' in self.relvalModule.steps.keys(): stepName = step+"INPUT" stepList.remove(step) stepList.insert(stepIndex,stepName) """ name += stepName if addCom and (not addTo or addTo[stepIndex]==1): from Configuration.PyReleaseValidation.relval_steps import merge copyStep=merge(addCom+[self.relvalModule.steps[stepName]]) cfg, input, opts = self.makeCmd(copyStep) else: cfg, input, opts = self.makeCmd(self.relvalModule.steps[stepName]) if input and cfg : msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName raise MatrixException(msg) if input: cmd = input else: if cfg: cmd = 'cmsDriver.py '+cfg+' '+opts else: cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts if self.wm: cmd+=' --io %s.io --python %s.py'%(stepName,stepName) if self.addCommand: cmd +=' '+self.addCommand if self.wm: cmd=cmd.replace('DQMROOT','DQM') cmd=cmd.replace('--filetype DQM','') commands.append(cmd) ranStepList.append(stepName) stepIndex+=1 self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList) return
def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None): prefix = self.filesPrefMap[fileNameIn] print("processing", fileNameIn) try: _tmpMod = __import__('Configuration.PyReleaseValidation.' + fileNameIn) self.relvalModule = sys.modules[ 'Configuration.PyReleaseValidation.' + fileNameIn] except Exception as e: print("ERROR importing file ", fileNameIn, str(e)) return if useInput is not None: print("request for INPUT for ", useInput) fromInput = {} if useInput: for i in useInput: if ':' in i: (ik, il) = i.split(':') if ik == 'all': for k in self.relvalModule.workflows.keys(): fromInput[float(k)] = int(il) else: fromInput[float(ik)] = int(il) else: if i == 'all': for k in self.relvalModule.workflows.keys(): fromInput[float(k)] = 0 else: fromInput[float(i)] = 0 if fromScratch: fromScratch = map(float, fromScratch) for num in fromScratch: if num in fromInput: fromInput.pop(num) #overwrite steps if self.overWrite: for p in self.overWrite: self.relvalModule.steps.overwrite(p) #change the origin of dataset on the fly if refRel: if ',' in refRel: refRels = refRel.split(',') if len(refRels) != len(self.relvalModule.baseDataSetRelease): return self.relvalModule.changeRefRelease( self.relvalModule.steps, list(zip(self.relvalModule.baseDataSetRelease, refRels))) else: self.relvalModule.changeRefRelease( self.relvalModule.steps, [(x, refRel) for x in self.relvalModule.baseDataSetRelease]) for num, wfInfo in self.relvalModule.workflows.items(): commands = [] wfName = wfInfo[0] stepList = wfInfo[1] stepOverrides = wfInfo.overrides # if no explicit name given for the workflow, use the name of step1 if wfName.strip() == '': wfName = stepList[0] # option to specialize the wf as the third item in the WF list addTo = None addCom = None if len(wfInfo) >= 3: addCom = wfInfo[2] if not isinstance(addCom, list): addCom = [addCom] #print 'added dict',addCom if len(wfInfo) >= 4: addTo = wfInfo[3] #pad with 0 while len(addTo) != len(stepList): addTo.append(0) name = wfName stepIndex = 0 ranStepList = [] #first resolve INPUT possibilities if num in fromInput: ilevel = fromInput[num] #print num,ilevel for (stepIr, step) in enumerate(reversed(stepList)): stepName = step stepI = (len(stepList) - stepIr) - 1 #print stepIr,step,stepI,ilevel if stepI > ilevel: #print "ignoring" continue if stepI != 0: testName = '__'.join(stepList[0:stepI + 1]) + 'INPUT' else: testName = step + 'INPUT' #print "JR",stepI,stepIr,testName,stepList if testName in self.relvalModule.steps.keys(): #print "JR",stepI,stepIr stepList[stepI] = testName #pop the rest in the list #print "\tmod prepop",stepList for p in range(stepI): stepList.pop(0) #print "\t\tmod",stepList break for (stepI, step) in enumerate(stepList): stepName = step if self.relvalModule.steps[stepName] is None: continue if self.wm: #cannot put a certain number of things in wm if stepName in [ #'HARVEST','HARVESTD','HARVESTDreHLT', 'RECODFROMRAWRECO', 'SKIMD', 'SKIMCOSD', 'SKIMDreHLT' ]: continue #replace stepName is needed #if stepName in self.replaceStep if len(name) > 0: name += '+' #any step can be mirrored with INPUT ## maybe we want too level deep input """ if num in fromInput: if step+'INPUT' in self.relvalModule.steps.keys(): stepName = step+"INPUT" stepList.remove(step) stepList.insert(stepIndex,stepName) """ name += stepName if addCom and (not addTo or addTo[stepIndex] == 1): from Configuration.PyReleaseValidation.relval_steps import merge copyStep = merge(addCom + [ self.makeStep(self.relvalModule.steps[stepName], stepOverrides) ]) cfg, input, opts = self.makeCmd(copyStep) else: cfg, input, opts = self.makeCmd( self.makeStep(self.relvalModule.steps[stepName], stepOverrides)) if input and cfg: msg = "FATAL ERROR: found both cfg and input for workflow " + str( num) + ' step ' + stepName raise MatrixException(msg) if input: cmd = input if self.noRun: cmd.run = [] else: if cfg: cmd = 'cmsDriver.py ' + cfg + ' ' + opts else: cmd = 'cmsDriver.py step' + str(stepIndex + 1) + ' ' + opts if self.wm: cmd += ' --io %s.io --python %s.py' % (stepName, stepName) if self.addCommand: if self.apply: if stepIndex in self.apply or stepName in self.apply: cmd += ' ' + self.addCommand else: cmd += ' ' + self.addCommand if self.wm and self.revertDqmio == 'yes': cmd = cmd.replace('DQMIO', 'DQM') cmd = cmd.replace('--filetype DQM', '') commands.append(cmd) ranStepList.append(stepName) stepIndex += 1 self.workFlowSteps[(num, prefix)] = (num, name, commands, ranStepList) return
def readMatrix(self, fileNameIn, useInput=None, refRel=None, fromScratch=None): prefix = self.filesPrefMap[fileNameIn] print "processing", fileNameIn try: _tmpMod = __import__( 'Configuration.PyReleaseValidation.'+fileNameIn ) self.relvalModule = sys.modules['Configuration.PyReleaseValidation.'+fileNameIn] except Exception as e: print "ERROR importing file ", fileNameIn, str(e) return if useInput is not None: print "request for INPUT for ", useInput fromInput={} if useInput: for i in useInput: if ':' in i: (ik,il)=i.split(':') if ik=='all': for k in self.relvalModule.workflows.keys(): fromInput[float(k)]=int(il) else: fromInput[float(ik)]=int(il) else: if i=='all': for k in self.relvalModule.workflows.keys(): fromInput[float(k)]=0 else: fromInput[float(i)]=0 if fromScratch: fromScratch=map(float,fromScratch) for num in fromScratch: if num in fromInput: fromInput.pop(num) #overwrite steps if self.overWrite: for p in self.overWrite: self.relvalModule.steps.overwrite(p) #change the origin of dataset on the fly if refRel: if ',' in refRel: refRels=refRel.split(',') if len(refRels)!=len(self.relvalModule.baseDataSetRelease): return self.relvalModule.changeRefRelease( self.relvalModule.steps, list(zip(self.relvalModule.baseDataSetRelease,refRels)) ) else: self.relvalModule.changeRefRelease( self.relvalModule.steps, [(x,refRel) for x in self.relvalModule.baseDataSetRelease] ) for num, wfInfo in self.relvalModule.workflows.items(): commands=[] wfName = wfInfo[0] stepList = wfInfo[1] stepOverrides=wfInfo.overrides # if no explicit name given for the workflow, use the name of step1 if wfName.strip() == '': wfName = stepList[0] # option to specialize the wf as the third item in the WF list addTo=None addCom=None if len(wfInfo)>=3: addCom=wfInfo[2] if not isinstance(addCom, list): addCom=[addCom] #print 'added dict',addCom if len(wfInfo)>=4: addTo=wfInfo[3] #pad with 0 while len(addTo)!=len(stepList): addTo.append(0) name=wfName stepIndex=0 ranStepList=[] #first resolve INPUT possibilities if num in fromInput: ilevel=fromInput[num] #print num,ilevel for (stepIr,step) in enumerate(reversed(stepList)): stepName=step stepI=(len(stepList)-stepIr)-1 #print stepIr,step,stepI,ilevel if stepI>ilevel: #print "ignoring" continue if stepI!=0: testName='__'.join(stepList[0:stepI+1])+'INPUT' else: testName=step+'INPUT' #print "JR",stepI,stepIr,testName,stepList if testName in self.relvalModule.steps.keys(): #print "JR",stepI,stepIr stepList[stepI]=testName #pop the rest in the list #print "\tmod prepop",stepList for p in range(stepI): stepList.pop(0) #print "\t\tmod",stepList break for (stepI,step) in enumerate(stepList): stepName=step if self.wm: #cannot put a certain number of things in wm if stepName in [ #'HARVEST','HARVESTD','HARVESTDreHLT', 'RECODFROMRAWRECO','SKIMD','SKIMCOSD','SKIMDreHLT' ]: continue #replace stepName is needed #if stepName in self.replaceStep if len(name) > 0 : name += '+' #any step can be mirrored with INPUT ## maybe we want too level deep input """ if num in fromInput: if step+'INPUT' in self.relvalModule.steps.keys(): stepName = step+"INPUT" stepList.remove(step) stepList.insert(stepIndex,stepName) """ name += stepName if addCom and (not addTo or addTo[stepIndex]==1): from Configuration.PyReleaseValidation.relval_steps import merge copyStep=merge(addCom+[self.makeStep(self.relvalModule.steps[stepName],stepOverrides)]) cfg, input, opts = self.makeCmd(copyStep) else: cfg, input, opts = self.makeCmd(self.makeStep(self.relvalModule.steps[stepName],stepOverrides)) if input and cfg : msg = "FATAL ERROR: found both cfg and input for workflow "+str(num)+' step '+stepName raise MatrixException(msg) if input: cmd = input if self.noRun: cmd.run=[] else: if cfg: cmd = 'cmsDriver.py '+cfg+' '+opts else: cmd = 'cmsDriver.py step'+str(stepIndex+1)+' '+opts if self.wm: cmd+=' --io %s.io --python %s.py'%(stepName,stepName) if self.addCommand: if self.apply: if stepIndex in self.apply or stepName in self.apply: cmd +=' '+self.addCommand else: cmd +=' '+self.addCommand if self.wm and self.revertDqmio=='yes': cmd=cmd.replace('DQMIO','DQM') cmd=cmd.replace('--filetype DQM','') commands.append(cmd) ranStepList.append(stepName) stepIndex+=1 self.workFlowSteps[(num,prefix)] = (num, name, commands, ranStepList) return
def main(): """ Main """ parser = argparse.ArgumentParser() parser.add_argument('-l', '--list', dest='workflow_ids', help='Comma separated list of workflow ids') parser.add_argument('-w', '--what', dest='matrix_name', help='RelVal workflows file: standard, upgrade, ...') parser.add_argument('-c', '--command', dest='command', help='Additional command to add to each cmsDriver') parser.add_argument( '-cs', '--command_steps', dest='command_steps', help= 'Specify which RelVal steps should have additional command applied', default='') parser.add_argument('-o', '--output', dest='output_file', help='Output file name') parser.add_argument('-r', '--recycle_gs', dest='recycle_gs', action='store_true', help='Recycle GS') opt = parser.parse_args() workflow_ids = sorted(list({float(x) for x in opt.workflow_ids.split(',')})) print('Given workflow ids (%s): %s' % (len(workflow_ids), workflow_ids)) print('Workflows file: %s' % (opt.matrix_name)) print('User given command: %s (%s)' % (opt.command, opt.command_steps)) print('Output file: %s' % (opt.output_file)) print('Recycle GS: %s' % (opt.recycle_gs)) workflows_module = get_workflows_module(opt.matrix_name) command_steps = set(clean_split(opt.command_steps)) # wmsplit is a dictionary with LumisPerJob values wmsplit = get_wmsplit() workflows = {} for workflow_id in workflow_ids: print('Getting %s workflow' % (workflow_id)) # workflow_matrix is a list where first element is the name of workflow # and second element is list of step names # if workflow name is not present, first step name is used if workflow_id not in workflows_module.workflows: print('Can\'t find %s in %s matrix' % (workflow_id, opt.matrix_name), file=sys.stderr) sys.exit(1) workflow_matrix = workflows_module.workflows[workflow_id] print('Matrix: %s' % (workflow_matrix)) workflows[workflow_id] = { 'steps': [], 'workflow_name': get_workflow_name(workflow_matrix) } if workflow_matrix.overrides: print('Overrides: %s' % (workflow_matrix.overrides)) # Go through steps and get the arguments for workflow_step_index, workflow_step_name in enumerate( workflow_matrix[1]): print('\nStep %s. %s' % (workflow_step_index + 1, workflow_step_name)) if workflow_step_index == 0 and opt.recycle_gs: # Add INPUT to step name to recycle GS workflow_step_name += 'INPUT' print('Step name changed to %s to recycle input' % (workflow_step_name)) if workflow_step_name not in steps_module.steps: print('Could not find %s in steps module' % (workflow_step_name), file=sys.stderr) sys.exit(1) # Merge user command, workflow and overrides workflow_step = steps_module.steps[workflow_step_name] if workflow_step is None: print('Workflow step %s is none, skipping it' % (workflow_step_name)) continue # Because first item in the list has highest priority print('Step: %s' % (workflow_step)) workflow_step = steps_module.merge( [workflow_matrix.overrides, workflow_step]) if opt.command and should_apply_additional_command( workflow_step, command_steps): workflow_step = merge_additional_command( workflow_step, opt.command) workflows[workflow_id]['steps'].append( make_relval_step(workflow_step, workflow_step_name, wmsplit)) # Additional newline inbetween each workflow print('\n') print('All workflows:') print(json.dumps(workflows, indent=2, sort_keys=True)) if opt.output_file: with open(opt.output_file, 'w') as workflows_file: json.dump(workflows, workflows_file)