def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task,'parset'): task_parset.adoptFile(self.task_definitions.get(task,'parset')) print 'possible arguments: key = value' for k in task_parset.keywords(): print ' ',k,' ','=',' ',task_parset[k]
def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task, 'parset'): task_parset.adoptFile(self.task_definitions.get(task, 'parset')) print 'possible arguments: key = value' for k in task_parset.keys: print ' ', k, ' ', '=', ' ', task_parset[ k]
def _create_mapfile_from_parset(parset, identifier): pars = Parset() pars.adoptFile(parset) dps = pars.makeSubset(pars.fullModuleName('DataProducts') + '.') datamap = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip, ) for location, filename, skip in zip( dps.getStringVector(identifier + '.locations'), dps.getStringVector(identifier + '.filenames'), dps.getBoolVector(identifier + '.skip')) ]) return datamap
def _create_mapfile_from_parset(parset, identifier): pars = Parset() pars.adoptFile(parset) dps = pars.makeSubset( pars.fullModuleName('DataProducts') + '.' ) datamap = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector(identifier + '.locations'), dps.getStringVector(identifier + '.filenames'), dps.getBoolVector(identifier + '.skip')) ]) return datamap
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.iteritems(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in parsetdict_copy.values(): for k, v in parsetdict_copy.iteritems(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in job.results.items(): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in jobresultdict.items(): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
class GenericPipeline(control): inputs = { 'loglevel': ingredient.StringField( '--loglevel', help="loglevel", default='INFO', optional=True ) } def __init__(self): control.__init__(self) self.parset = Parset() self.input_data = {} self.output_data = {} self.parset_feedback_file = None #self.logger = None#logging.RootLogger('DEBUG') self.name = '' #if not overwrite: # self.inputs['job_name'] = 'generic-pipeline' # if not self.inputs.has_key("start_time"): # import datetime # self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat() # if not hasattr(self, "config"): # self.config = self._read_config() # #self._read_config() # # ...and task files, if applicable # if not self.inputs.has_key("task_files"): # try: # self.inputs["task_files"] = utilities.string_to_list( # self.config.get('DEFAULT', "task_files") # ) # except NoOptionError: # self.inputs["task_files"] = [] # self.task_definitions = ConfigParser(self.config.defaults()) # print >> sys.stderr, "Reading task definition file(s): %s" % \ # ",".join(self.inputs["task_files"]) # self.task_definitions.read(self.inputs["task_files"]) # self.go() def usage(self): """ Display usage """ print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0] print >> sys.stderr, "Parset structure should look like:\n" \ "NYI" #return 1 def go(self): #""" #Read the parset-file that was given as input argument, and set the #jobname before calling the base-class's `go()` method. #""" try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: #return self.usage() self.usage() # Set job-name to basename of parset-file w/o extension, if it's not # set on the command-line with '-j' or '--job-name' if not 'job_name' in self.inputs: self.inputs['job_name'] = ( os.path.splitext(os.path.basename(parset_file))[0]) self.name = self.inputs['job_name'] try: self.logger except: self.logger = getSearchingLogger(self.name) self.logger.setLevel(self.inputs['loglevel']) # Call the base-class's `go()` method. return super(GenericPipeline, self).go() # def pipeline_logic(self): # print 'Dummy because of wrapping inside the framework' # if overwrite: # self.execute_pipeline() #def execute_pipeline(self): def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') pipeline_steps = self.parset.makeSubset( self.parset.fullModuleName('steps') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') # construct the step name list if there were pipeline.steps.<subset> for item in pipeline_steps.keys(): if item in step_name_list: loc = step_name_list.index(item) step_name_list[loc:loc] = pipeline_steps.getStringVector(item) step_name_list.remove(item) step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines input_dictionary = { 'parset': parset_file, 'parsetobj': self.parset, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir} resultdicts = {} for section in self.config.sections(): tmp_dict = {} for entry in self.config.items(section): input_dictionary[entry[0]] = entry[1] tmp_dict[entry[0]] = entry[1] resultdicts.update({section: copy.deepcopy(tmp_dict)}) resultdicts.update({'input': input_dictionary}) resultdicts.update({self.name: input_dictionary}) if 'pipeline.mapfile' in self.parset.keywords(): resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) self.logger.info("Beginning step %s" % (stepname,)) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' adds = None if stepname in step_parset_obj: adds = self._construct_step_parset(inputdict, step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict['stepname'] = stepname if adds: inputdict.update(adds) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keywords(): submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, }}) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keywords(): subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in self._keys(subpipeline_parset): if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in self._keys(subpipeline_parset): val = subpipeline_parset[k] if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'): for item in checklist: if item+".output" in str(val): val = str(val).replace(item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: # remove replacements strings to prevent loading the same key twice if k in self._keys(self.parset): self.parset.remove(k) self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self._keys(self.parset): if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip(): self.parset.remove(k) self.parset.add('! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task( typeval, inputargs, **inputdict ) # plugins if kind_of_step == 'plugin': bla = str(self.config.get('DEFAULT', 'recipe_directories')) pluginpath = bla.rstrip(']').lstrip('[').split(',') for i, item in enumerate(pluginpath): pluginpath[i] = os.path.join(item, 'plugins') if 'pluginpath' in pipeline_args.keys(): pluginpath.append(pipeline_args.getString('pluginpath')) with duration(self, stepname): resultdict = loader.call_plugin(typeval, pluginpath, inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict: resultdicts[activeloop[0]]['break'] = resultdict['break'] # ********************************************************************* # build the inputs for the master recipes. def _construct_input(self, inoutdict, controlparset, resdicts): # intermediate backward compatibility for opts subparset if controlparset.fullModuleName('opts'): argsparset = controlparset.makeSubset(controlparset.fullModuleName('opts') + '.') # hack elif 'loopcount' not in controlparset.keys(): argsparset = controlparset else: argsparset = controlparset.makeSubset(controlparset.fullModuleName('imaginary') + '.') # \hack self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts) def _construct_cmdline(self, inoutargs, controlparset, resdicts): inoutdict = {} argsparset = controlparset.makeSubset(controlparset.fullModuleName('cmdline') + '.') self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts) for k in inoutdict.keys(): inoutargs.append(inoutdict[k]) for k in controlparset.keys(): if 'cmdline' in k: controlparset.remove(k) def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir): step_list_copy = (copy.deepcopy(step_name_list)) counter = 0 while step_list_copy: counter -= 1 stepname = step_list_copy.pop(-1) fullparset = self.parset.makeSubset(self.parset.fullModuleName(str(stepname)) + '.') subparset = fullparset.makeSubset(fullparset.fullModuleName('control') + '.') number = 0 for item in step_list_copy: if item == stepname: number += 1 if number != 0: stepname += str(number) step_name_list[counter] = stepname step_control_dict[stepname] = subparset if fullparset.fullModuleName('argument'): stepparset = fullparset.makeSubset(fullparset.fullModuleName('argument') + '.') # ********************************************************************* # save parsets # either a filename is given in the main parset # or files will be created from subsets with stepnames.parset as filenames # for name, parset in step_parset_dict.iteritems(): try: file_parset = Parset(stepparset.getString('parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) stepparset.remove('parset') except: pass # parset from task.cfg try: file_parset = Parset(self.task_definitions.get(str(subparset['type']), 'parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) except: pass # for parset in control section try: file_parset = Parset(subparset.getString('parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) subparset.remove('parset') except: pass step_parset = os.path.join(parset_dir, stepname + '.parset') stepparset.writeFile(step_parset) step_parset_files[stepname] = step_parset step_parset_obj[stepname] = stepparset def _replace_output_keyword(self, inoutdict, argsparset, keyorder, resdicts): addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []} regobj = re.compile('([\w\+_-]+)\.output\.([\w\+._-]+)') for k in keyorder: keystring = argsparset.getString(k) hitlist = regobj.findall(keystring) if hitlist: for hit in hitlist: keystring = regobj.sub(str(resdicts[hit[0]][hit[1]]), keystring, 1) if 'mapfile' in hit[1] and not 'mapfile' in k: addvals['inputkeys'].append(resdicts[hit[0]][hit[1]]) addvals['mapfiles_in'].append(resdicts[hit[0]][hit[1]]) inoutdict[k] = keystring else: inoutdict[k] = argsparset.getString(k) if k == 'flags': addvals['arguments'] = keystring if 'outputkey' in keystring: addvals['outputkey'] = 'outputkey' return addvals def _construct_step_parset(self, inoutdict, argsparset, resdicts, filename, stepname): tmp_keys = argsparset.keys() ordered_keys = [] parsetdict = {} for orig in self._keys(self.parset): for item in tmp_keys: if (stepname + '.') in orig and ('argument.'+item in orig and not 'argument.'+item+'.' in orig): ordered_keys.append(item) continue # add keys from parset files that were not in the original list for item in argsparset.keys(): if not item in ordered_keys: ordered_keys.append(item) additional = self._replace_output_keyword(parsetdict, argsparset, ordered_keys, resdicts) for k in argsparset.keys(): argsparset.replace(k, parsetdict[k]) if k == 'flags': argsparset.remove(k) argsparset.writeFile(filename) return additional #inoutdict.update(additional) def _keys(self, inparset): outlist = [] for k in inparset.keys: for l in inparset.keywords(): if k == l: outlist.append(l) return outlist def _get_parset_dicts(self): return {} def show_tasks(self): tasklist = [] tasklist = self.task_definitions.sections() for item in tasklist: print item #return tasklist def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task,'parset'): task_parset.adoptFile(self.task_definitions.get(task,'parset')) print 'possible arguments: key = value' for k in task_parset.keywords(): print ' ',k,' ','=',' ',task_parset[k] def _add_step(self): steplist = [] def _replace_values(self): replacedict = OrderedDict() for check in self._keys(self.parset): if str(check).startswith('!'): replacedict[str(check).lstrip('!').lstrip(' ')] = str(self.parset[check]) if str(check).startswith('pipeline.replace.'): replacedict[str(check).replace('pipeline.replace.', '').lstrip(' ')] = str(self.parset[check]) #expand environment variables for k, v in replacedict.items(): replacedict[k] = os.path.expandvars(v) for check in self._keys(self.parset): for k, v in reversed(replacedict.items()): if '{{ '+k+' }}' in str(self.parset[check]): replacestring = str(self.parset[check]).replace('{{ '+k+' }}',v) self.parset.replace(check,replacestring)
def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') pipeline_steps = self.parset.makeSubset( self.parset.fullModuleName('steps') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') # construct the step name list if there were pipeline.steps.<subset> for item in pipeline_steps.keys(): if item in step_name_list: loc = step_name_list.index(item) step_name_list[loc:loc] = pipeline_steps.getStringVector(item) step_name_list.remove(item) step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines input_dictionary = { 'parset': parset_file, 'parsetobj': self.parset, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir} resultdicts = {} for section in self.config.sections(): tmp_dict = {} for entry in self.config.items(section): input_dictionary[entry[0]] = entry[1] tmp_dict[entry[0]] = entry[1] resultdicts.update({section: copy.deepcopy(tmp_dict)}) resultdicts.update({'input': input_dictionary}) resultdicts.update({self.name: input_dictionary}) if 'pipeline.mapfile' in self.parset.keywords(): resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) self.logger.info("Beginning step %s" % (stepname,)) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' adds = None if stepname in step_parset_obj: adds = self._construct_step_parset(inputdict, step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict['stepname'] = stepname if adds: inputdict.update(adds) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keywords(): submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, }}) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keywords(): subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in self._keys(subpipeline_parset): if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in self._keys(subpipeline_parset): val = subpipeline_parset[k] if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'): for item in checklist: if item+".output" in str(val): val = str(val).replace(item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: # remove replacements strings to prevent loading the same key twice if k in self._keys(self.parset): self.parset.remove(k) self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self._keys(self.parset): if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip(): self.parset.remove(k) self.parset.add('! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task( typeval, inputargs, **inputdict ) # plugins if kind_of_step == 'plugin': bla = str(self.config.get('DEFAULT', 'recipe_directories')) pluginpath = bla.rstrip(']').lstrip('[').split(',') for i, item in enumerate(pluginpath): pluginpath[i] = os.path.join(item, 'plugins') if 'pluginpath' in pipeline_args.keys(): pluginpath.append(pipeline_args.getString('pluginpath')) with duration(self, stepname): resultdict = loader.call_plugin(typeval, pluginpath, inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict: resultdicts[activeloop[0]]['break'] = resultdict['break']
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.items(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in list(parsetdict_copy.values()): for k, v in parsetdict_copy.items(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in list(job.results.items()): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in list(jobresultdict.items()): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
class GenericPipeline(control): inputs = { 'loglevel': ingredient.StringField('--loglevel', help="loglevel", default='INFO', optional=True) } def __init__(self): control.__init__(self) self.parset = Parset() self.input_data = {} self.output_data = {} self.parset_feedback_file = None #self.logger = None#logging.RootLogger('DEBUG') self.name = '' #if not overwrite: # self.inputs['job_name'] = 'generic-pipeline' # if not self.inputs.has_key("start_time"): # import datetime # self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat() # if not hasattr(self, "config"): # self.config = self._read_config() # #self._read_config() # # ...and task files, if applicable # if not self.inputs.has_key("task_files"): # try: # self.inputs["task_files"] = utilities.string_to_list( # self.config.get('DEFAULT', "task_files") # ) # except NoOptionError: # self.inputs["task_files"] = [] # self.task_definitions = ConfigParser(self.config.defaults()) # print >> sys.stderr, "Reading task definition file(s): %s" % \ # ",".join(self.inputs["task_files"]) # self.task_definitions.read(self.inputs["task_files"]) # self.go() def usage(self): """ Display usage """ print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0] print >> sys.stderr, "Parset structure should look like:\n" \ "NYI" #return 1 def go(self): #""" #Read the parset-file that was given as input argument, and set the #jobname before calling the base-class's `go()` method. #""" try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: #return self.usage() self.usage() # Set job-name to basename of parset-file w/o extension, if it's not # set on the command-line with '-j' or '--job-name' if not 'job_name' in self.inputs: self.inputs['job_name'] = (os.path.splitext( os.path.basename(parset_file))[0]) self.name = self.inputs['job_name'] try: self.logger except: self.logger = getSearchingLogger(self.name) self.logger.setLevel(self.inputs['loglevel']) # Call the base-class's `go()` method. return super(GenericPipeline, self).go() # def pipeline_logic(self): # print 'Dummy because of stupid wrapping inside the framework' # if overwrite: # self.execute_pipeline() #def execute_pipeline(self): def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines resultdicts = { 'input': { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } } resultdicts.update({ self.name: { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } }) if 'pipeline.mapfile' in self.parset.keys: resultdicts['input']['mapfile'] = str( self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str( self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' #self._construct_cmdline(inputargs, step, resultdicts) additional_input = {} if stepname in step_parset_obj: additional_input = self._construct_step_parset( step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict = {'stepname': stepname} inputdict.update(additional_input) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace( ' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector( 'pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keys: submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({ os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, } }) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keys: subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in subpipeline_parset.keys: if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in subpipeline_parset.keys: if not str(k).startswith('#'): val = subpipeline_parset[k] if not str(k).startswith('!'): for item in checklist: if item in str(val): val = str(val).replace( item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self.parset.keys: if str(k).startswith('!') and item in k: self.parset.remove(k) self.parset.add( '! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # remove replacements strings to prevent loading the same key twice for k in copy.deepcopy(self.parset.keys): if str(k).startswith('!'): self.parset.remove(k) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task(typeval, inputargs, **inputdict) # plugins if kind_of_step == 'plugin': with duration(self, stepname): resultdict = loader.call_plugin( typeval, pipeline_args.getString('pluginpath'), inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if resultdict is not None and 'break' in resultdict: if resultdict['break']: resultdicts[activeloop[0]]['break'] = resultdict['break'] # ********************************************************************* # build the inputs for the master recipes. def _construct_input(self, inoutdict, controlparset, resdicts): # intermediate backward compatibility for opts subparset if controlparset.fullModuleName('opts'): argsparset = controlparset.makeSubset( controlparset.fullModuleName('opts') + '.') # hack elif 'loopcount' not in controlparset.keys(): argsparset = controlparset else: argsparset = controlparset.makeSubset( controlparset.fullModuleName('imaginary') + '.') # \hack self._replace_output_keyword(inoutdict, argsparset, resdicts) def _construct_cmdline(self, inoutargs, controlparset, resdicts): argsparset = controlparset.makeSubset( controlparset.fullModuleName('cmdline') + '.') for k in argsparset.keys(): if argsparset.getString(k).__contains__('.output.'): step, outvar = argsparset.getString(k).split('.output.') inoutargs.append(resdicts[step][outvar]) else: inoutargs.append(argsparset.getString(k)) try: controlparset.remove('cmdline.inmap') except: pass def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir): step_list_copy = (copy.deepcopy(step_name_list)) counter = 0 while step_list_copy: counter -= 1 stepname = step_list_copy.pop(-1) fullparset = self.parset.makeSubset( self.parset.fullModuleName(str(stepname)) + '.') subparset = fullparset.makeSubset( fullparset.fullModuleName('control') + '.') number = 0 for item in step_list_copy: if item == stepname: number += 1 if number != 0: stepname += str(number) step_name_list[counter] = stepname step_control_dict[stepname] = subparset # double implementation for intermediate backward compatibility if fullparset.fullModuleName( 'parsetarg') or fullparset.fullModuleName('argument'): if fullparset.fullModuleName('parsetarg'): stepparset = fullparset.makeSubset( fullparset.fullModuleName('parsetarg') + '.') if fullparset.fullModuleName('argument'): stepparset = fullparset.makeSubset( fullparset.fullModuleName('argument') + '.') # ********************************************************************* # save parsets # either a filename is given in the main parset # or files will be created from subsets with stepnames.parset as filenames # for name, parset in step_parset_dict.iteritems(): try: file_parset = Parset(stepparset.getString('parset')) for k in file_parset.keys: if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) stepparset.remove('parset') except: pass # parset from task.cfg try: file_parset = Parset( self.task_definitions.get(str(subparset['type']), 'parset')) for k in file_parset.keys: if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) except: pass # for parset in control section try: file_parset = Parset(subparset.getString('parset')) for k in file_parset.keys: if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) subparset.remove('parset') except: pass step_parset = os.path.join(parset_dir, stepname + '.parset') stepparset.writeFile(step_parset) step_parset_files[stepname] = step_parset step_parset_obj[stepname] = stepparset def _replace_output_keyword(self, inoutdict, argsparset, resdicts): for k in argsparset.keys(): keystring = argsparset.getString(k) if keystring.__contains__('.output.'): if keystring.__contains__(','): keystring = keystring.rstrip(']') keystring = keystring.lstrip('[') vec = [] for item in keystring.split(','): if item.__contains__('.output.'): step, outvar = item.split('.output.') vec.append(resdicts[step][outvar]) else: vec.append(item) inoutdict[k] = vec else: step, outvar = argsparset.getString(k).split('.output.') if '+' in outvar: tmplist = str(outvar).split('+') inoutdict[k] = resdicts[step][tmplist[0]] + tmplist[1] else: inoutdict[k] = resdicts[step][outvar] else: inoutdict[k] = argsparset.getString(k) def _construct_step_parset(self, argsparset, resdicts, filename, stepname): addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []} # hack for original order of args tmp_keys = argsparset.keys() ordered_keys = [] for orig in self.parset.keys: for item in tmp_keys: if (stepname + '.') in orig and ( 'argument.' + item in orig and not 'argument.' + item + '.' in orig): ordered_keys.append(item) continue # \hack for k in ordered_keys: valuestring = argsparset.getString(k) if valuestring.__contains__('.output.'): if valuestring.__contains__(','): valuestring = valuestring.rstrip(']') valuestring = valuestring.lstrip('[') vec = [] for item in valuestring.split(','): if item.__contains__('.output.'): step, outvar = item.split('.output.') vec.append(resdicts[step][outvar]) if 'mapfile' in str(outvar): addvals['inputkeys'].append( resdicts[step][outvar]) addvals['mapfiles_in'].append( resdicts[step][outvar]) else: vec.append(item) argsparset.replace(k, str(vec)) if k == 'flags': addvals['arguments'] = vec argsparset.remove(k) else: step, outvar = argsparset.getString(k).split('.output.') #more ugly hacks... really needs clearly structured replacement method... if '+' in outvar: tmplist = str(outvar).split('+') argsparset.replace( k, str(resdicts[step][tmplist[0]]) + tmplist[1]) else: argsparset.replace(k, str(resdicts[step][outvar])) #if isinstance(resdicts[step][outvar], str): if 'mapfile' in str(outvar): addvals['inputkeys'].append(resdicts[step][outvar]) addvals['mapfiles_in'].append(resdicts[step][outvar]) if k == 'flags': addvals['arguments'] = str(argsparset[k]) argsparset.remove(k) else: if k == 'flags': addvals['arguments'] = str(argsparset[k]) argsparset.remove(k) #direct usage of outputkey if valuestring.__contains__('outputkey'): addvals['outputkey'] = 'outputkey' argsparset.writeFile(filename) return addvals def _get_parset_dicts(self): return {} def show_tasks(self): tasklist = [] tasklist = self.task_definitions.sections() for item in tasklist: print item #return tasklist def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task, 'parset'): task_parset.adoptFile(self.task_definitions.get(task, 'parset')) print 'possible arguments: key = value' for k in task_parset.keys: print ' ', k, ' ', '=', ' ', task_parset[ k] def _add_step(self): steplist = [] def _replace_values(self): replacedict = {} try: import imp plugin = imp.load_source('main', str(self.parset['prepare'])) replacedict = plugin.main() except: pass for check in self.parset.keys: if str(check).startswith('!'): replacedict[str(check).lstrip('!').lstrip(' ')] = str( self.parset[check]) #print 'REPLACEDICT: ',replacedict for check in self.parset.keys: if not str(check).startswith('#'): for k, v in replacedict.iteritems(): if '{{ ' + k + ' }}' in str(self.parset[check]): replacestring = str(self.parset[check]).replace( '{{ ' + k + ' }}', v) self.parset.replace(check, replacestring)
def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines resultdicts = { 'input': { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } } resultdicts.update({ self.name: { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } }) if 'pipeline.mapfile' in self.parset.keys: resultdicts['input']['mapfile'] = str( self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str( self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' #self._construct_cmdline(inputargs, step, resultdicts) additional_input = {} if stepname in step_parset_obj: additional_input = self._construct_step_parset( step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict = {'stepname': stepname} inputdict.update(additional_input) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace( ' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector( 'pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keys: submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({ os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, } }) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keys: subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in subpipeline_parset.keys: if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in subpipeline_parset.keys: if not str(k).startswith('#'): val = subpipeline_parset[k] if not str(k).startswith('!'): for item in checklist: if item in str(val): val = str(val).replace( item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self.parset.keys: if str(k).startswith('!') and item in k: self.parset.remove(k) self.parset.add( '! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # remove replacements strings to prevent loading the same key twice for k in copy.deepcopy(self.parset.keys): if str(k).startswith('!'): self.parset.remove(k) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task(typeval, inputargs, **inputdict) # plugins if kind_of_step == 'plugin': with duration(self, stepname): resultdict = loader.call_plugin( typeval, pipeline_args.getString('pluginpath'), inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if resultdict is not None and 'break' in resultdict: if resultdict['break']: resultdicts[activeloop[0]]['break'] = resultdict['break']
def plugin_main(*args, **kwargs): parset = Parset(kwargs['first_parset']) parset.adoptFile(kwargs['second_parset']) parset.writeFile(kwargs['result_parset'] + '_feedback_file')
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] prefix = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(prefix, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(prefix, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( prefix, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if not self.inputs['inputkeys'] and self.inputs['inputkey']: self.inputs['inputkeys'].append(self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % len(self.inputs['inputkeys']), len(inputmapfiles)) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap in zip(self.inputs['inputkeys'], inputmapfiles): filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) #if keylist: #for name, value in zip(keylist, inputlist): if filedict: for name, value in filedict.iteritems(): if arglist_copy and name in arglist_copy: ind = arglist_copy.index(name) arglist_copy[ind] = value[i] elif name in parsetdict_copy.values(): for k, v in parsetdict_copy.iteritems(): if v == name: parsetdict_copy[k] = value[i] else: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, prefix, self.inputs['parsetasfile'], args_format, #self.inputs['working_directory'], self.environment ] ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True for k, v in job.results.items(): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") for k, v in jobresultdict.items(): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0