예제 #1
0
 def show_task(self, task):
     task_parset = Parset()
     if self.task_definitions.has_option(task,'parset'):
         task_parset.adoptFile(self.task_definitions.get(task,'parset'))
         print 'possible arguments: key    =    value'
         for k in task_parset.keywords():
             print '                   ',k,'    ','=','    ',task_parset[k]
예제 #2
0
 def show_task(self, task):
     task_parset = Parset()
     if self.task_definitions.has_option(task, 'parset'):
         task_parset.adoptFile(self.task_definitions.get(task, 'parset'))
         print 'possible arguments: key    =    value'
         for k in task_parset.keys:
             print '                   ', k, '    ', '=', '    ', task_parset[
                 k]
예제 #3
0
def _create_mapfile_from_parset(parset, identifier):
    pars = Parset()
    pars.adoptFile(parset)
    dps = pars.makeSubset(pars.fullModuleName('DataProducts') + '.')
    datamap = DataMap([
        tuple(os.path.join(location, filename).split(':')) + (skip, )
        for location, filename, skip in zip(
            dps.getStringVector(identifier + '.locations'),
            dps.getStringVector(identifier + '.filenames'),
            dps.getBoolVector(identifier + '.skip'))
    ])
    return datamap
예제 #4
0
def _create_mapfile_from_parset(parset, identifier):
    pars = Parset()
    pars.adoptFile(parset)
    dps = pars.makeSubset(
        pars.fullModuleName('DataProducts') + '.'
    )
    datamap = DataMap([
        tuple(os.path.join(location, filename).split(':')) + (skip,)
        for location, filename, skip in zip(
            dps.getStringVector(identifier + '.locations'),
            dps.getStringVector(identifier + '.filenames'),
            dps.getBoolVector(identifier + '.skip'))
    ])
    return datamap
예제 #5
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.iteritems():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in parsetdict_copy.values():
                            for k, v in parsetdict_copy.iteritems():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in job.results.items():
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in jobresultdict.items():
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
예제 #6
0
class GenericPipeline(control):

    inputs = {
        'loglevel': ingredient.StringField(
            '--loglevel',
            help="loglevel",
            default='INFO',
            optional=True
        )
    }

    def __init__(self):
        control.__init__(self)
        self.parset = Parset()
        self.input_data = {}
        self.output_data = {}
        self.parset_feedback_file = None
        #self.logger = None#logging.RootLogger('DEBUG')
        self.name = ''

        #if not overwrite:
        #    self.inputs['job_name'] = 'generic-pipeline'
        # if not self.inputs.has_key("start_time"):
        #     import datetime
        #     self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        # if not hasattr(self, "config"):
        #     self.config = self._read_config()
        # #self._read_config()
        # # ...and task files, if applicable
        # if not self.inputs.has_key("task_files"):
        #     try:
        #         self.inputs["task_files"] = utilities.string_to_list(
        #             self.config.get('DEFAULT', "task_files")
        #         )
        #     except NoOptionError:
        #         self.inputs["task_files"] = []
        # self.task_definitions = ConfigParser(self.config.defaults())
        # print >> sys.stderr, "Reading task definition file(s): %s" % \
        #                      ",".join(self.inputs["task_files"])
        # self.task_definitions.read(self.inputs["task_files"])
        #    self.go()

    def usage(self):
        """
        Display usage
        """
        print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0]
        print >> sys.stderr, "Parset structure should look like:\n" \
                             "NYI"
        #return 1

    def go(self):
        #"""
        #Read the parset-file that was given as input argument, and set the
        #jobname before calling the base-class's `go()` method.
        #"""
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            #return self.usage()
            self.usage()

        # Set job-name to basename of parset-file w/o extension, if it's not
        # set on the command-line with '-j' or '--job-name'
        if not 'job_name' in self.inputs:
            self.inputs['job_name'] = (
                os.path.splitext(os.path.basename(parset_file))[0])
            self.name = self.inputs['job_name']
        try:
            self.logger
        except:
            self.logger = getSearchingLogger(self.name)
            self.logger.setLevel(self.inputs['loglevel'])
        # Call the base-class's `go()` method.
        return super(GenericPipeline, self).go()

#    def pipeline_logic(self):
#        print 'Dummy because of wrapping inside the framework'
#        if overwrite:
#            self.execute_pipeline()

    #def execute_pipeline(self):
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')
        pipeline_steps = self.parset.makeSubset(
            self.parset.fullModuleName('steps') + '.')
        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        # construct the step name list if there were pipeline.steps.<subset>
        for item in pipeline_steps.keys():
            if item in step_name_list:
                loc = step_name_list.index(item)
                step_name_list[loc:loc] = pipeline_steps.getStringVector(item)
                step_name_list.remove(item)

        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        input_dictionary = {
            'parset': parset_file,
            'parsetobj': self.parset,
            'parset_dir': parset_dir,
            'mapfile_dir': mapfile_dir}

        resultdicts = {}
        for section in self.config.sections():
            tmp_dict = {}
            for entry in self.config.items(section):
                input_dictionary[entry[0]] = entry[1]
                tmp_dict[entry[0]] = entry[1]
            resultdicts.update({section: copy.deepcopy(tmp_dict)})

        resultdicts.update({'input': input_dictionary})
        resultdicts.update({self.name: input_dictionary})

        if 'pipeline.mapfile' in self.parset.keywords():
            resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            self.logger.info("Beginning step %s" % (stepname,))
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            adds = None
            if stepname in step_parset_obj:
                adds = self._construct_step_parset(inputdict,
                                             step_parset_obj[stepname],
                                             resultdicts,
                                             step_parset_files[stepname],
                                             stepname)
            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval, 'recipe') == 'executable_args':
                    inputdict['stepname'] = stepname
                    if adds:
                        inputdict.update(adds)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]


            self._construct_input(inputdict, step, resultdicts)
            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keywords():
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: {
                    'parset': typeval,
                    'mapfile': submapfile,
                }})
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keywords():
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in self._keys(subpipeline_parset):
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in self._keys(subpipeline_parset):
                    val = subpipeline_parset[k]
                    if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'):
                        for item in checklist:
                            if item+".output" in str(val):
                                val = str(val).replace(item, stepname + '-' + item)

                        self.parset.add(stepname + '-' + k, str(val))
                    else:
                        # remove replacements strings to prevent loading the same key twice
                        if k in self._keys(self.parset):
                            self.parset.remove(k)
                        self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self._keys(self.parset):
                        if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip():
                            self.parset.remove(k)
                            self.parset.add('! ' + item, str(step_parset_obj[stepname][item]))
                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)


            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(
                        typeval,
                        inputargs,
                        **inputdict
                    )

            # plugins
            if kind_of_step == 'plugin':
                bla = str(self.config.get('DEFAULT', 'recipe_directories'))
                pluginpath = bla.rstrip(']').lstrip('[').split(',')
                for i, item in enumerate(pluginpath):
                    pluginpath[i] = os.path.join(item, 'plugins')
                if 'pluginpath' in pipeline_args.keys():
                    pluginpath.append(pipeline_args.getString('pluginpath'))
                with duration(self, stepname):
                    resultdict = loader.call_plugin(typeval, pluginpath,
                                                    inputargs,
                                                    **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict:
                resultdicts[activeloop[0]]['break'] = resultdict['break']

    # *********************************************************************
    # build the inputs for the master recipes.
    def _construct_input(self, inoutdict, controlparset, resdicts):
        # intermediate backward compatibility for opts subparset
        if controlparset.fullModuleName('opts'):
            argsparset = controlparset.makeSubset(controlparset.fullModuleName('opts') + '.')
        # hack
        elif 'loopcount' not in controlparset.keys():
            argsparset = controlparset
        else:
            argsparset = controlparset.makeSubset(controlparset.fullModuleName('imaginary') + '.')
        # \hack
        self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts)

    def _construct_cmdline(self, inoutargs, controlparset, resdicts):
        inoutdict = {}
        argsparset = controlparset.makeSubset(controlparset.fullModuleName('cmdline') + '.')
        self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts)
        for k in inoutdict.keys():
            inoutargs.append(inoutdict[k])
        for k in controlparset.keys():
            if 'cmdline' in k:
                controlparset.remove(k)

    def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir):
        step_list_copy = (copy.deepcopy(step_name_list))
        counter = 0
        while step_list_copy:
            counter -= 1
            stepname = step_list_copy.pop(-1)
            fullparset = self.parset.makeSubset(self.parset.fullModuleName(str(stepname)) + '.')
            subparset = fullparset.makeSubset(fullparset.fullModuleName('control') + '.')
            number = 0
            for item in step_list_copy:
                if item == stepname:
                    number += 1
            if number != 0:
                stepname += str(number)
            step_name_list[counter] = stepname
            step_control_dict[stepname] = subparset
            if fullparset.fullModuleName('argument'):
                stepparset = fullparset.makeSubset(fullparset.fullModuleName('argument') + '.')
                # *********************************************************************
                # save parsets
                # either a filename is given in the main parset
                # or files will be created from subsets with stepnames.parset as filenames
                # for name, parset in step_parset_dict.iteritems():
                try:
                    file_parset = Parset(stepparset.getString('parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    stepparset.remove('parset')
                except:
                    pass
                # parset from task.cfg
                try:
                    file_parset = Parset(self.task_definitions.get(str(subparset['type']), 'parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                except:
                    pass
                # for parset in control section
                try:
                    file_parset = Parset(subparset.getString('parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    subparset.remove('parset')
                except:
                    pass
                step_parset = os.path.join(parset_dir, stepname + '.parset')
                stepparset.writeFile(step_parset)
                step_parset_files[stepname] = step_parset
                step_parset_obj[stepname] = stepparset

    def _replace_output_keyword(self, inoutdict, argsparset, keyorder, resdicts):
        addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []}
        regobj = re.compile('([\w\+_-]+)\.output\.([\w\+._-]+)')
        for k in keyorder:
            keystring = argsparset.getString(k)
            hitlist = regobj.findall(keystring)
            if hitlist:
                for hit in hitlist:
                    keystring = regobj.sub(str(resdicts[hit[0]][hit[1]]), keystring, 1)
                    if 'mapfile' in hit[1] and not 'mapfile' in k:
                        addvals['inputkeys'].append(resdicts[hit[0]][hit[1]])
                        addvals['mapfiles_in'].append(resdicts[hit[0]][hit[1]])
                inoutdict[k] = keystring
            else:
                inoutdict[k] = argsparset.getString(k)
            if k == 'flags':
                addvals['arguments'] = keystring
            if 'outputkey' in keystring:
                addvals['outputkey'] = 'outputkey'
        return addvals

    def _construct_step_parset(self, inoutdict, argsparset, resdicts, filename, stepname):
        tmp_keys = argsparset.keys()
        ordered_keys = []
        parsetdict = {}
        for orig in self._keys(self.parset):
            for item in tmp_keys:
                if (stepname + '.') in orig and ('argument.'+item in orig and not 'argument.'+item+'.' in orig):
                    ordered_keys.append(item)
                    continue
        # add keys from parset files that were not in the original list
        for item in argsparset.keys():
            if not item in ordered_keys:
                ordered_keys.append(item)
        additional = self._replace_output_keyword(parsetdict, argsparset, ordered_keys, resdicts)
        for k in argsparset.keys():
            argsparset.replace(k, parsetdict[k])
            if k == 'flags':
                argsparset.remove(k)
        argsparset.writeFile(filename)
        return additional
        #inoutdict.update(additional)

    def _keys(self, inparset):
        outlist = []
        for k in inparset.keys:
            for l in inparset.keywords():
                if k == l:
                    outlist.append(l)
        return outlist

    def _get_parset_dicts(self):
        return {}

    def show_tasks(self):
        tasklist = []
        tasklist = self.task_definitions.sections()
        for item in tasklist:
            print item
        #return tasklist

    def show_task(self, task):
        task_parset = Parset()
        if self.task_definitions.has_option(task,'parset'):
            task_parset.adoptFile(self.task_definitions.get(task,'parset'))
            print 'possible arguments: key    =    value'
            for k in task_parset.keywords():
                print '                   ',k,'    ','=','    ',task_parset[k]

    def _add_step(self):
        steplist = []

    def _replace_values(self):
        replacedict = OrderedDict()
        for check in self._keys(self.parset):
            if str(check).startswith('!'):
                replacedict[str(check).lstrip('!').lstrip(' ')] = str(self.parset[check])
            if str(check).startswith('pipeline.replace.'):
                replacedict[str(check).replace('pipeline.replace.', '').lstrip(' ')] = str(self.parset[check])
        #expand environment variables
        for k, v in replacedict.items():
            replacedict[k] = os.path.expandvars(v)

        for check in self._keys(self.parset):
            for k, v in reversed(replacedict.items()):
                if '{{ '+k+' }}' in str(self.parset[check]):
                    replacestring = str(self.parset[check]).replace('{{ '+k+' }}',v)
                    self.parset.replace(check,replacestring)
예제 #7
0
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')
        pipeline_steps = self.parset.makeSubset(
            self.parset.fullModuleName('steps') + '.')
        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        # construct the step name list if there were pipeline.steps.<subset>
        for item in pipeline_steps.keys():
            if item in step_name_list:
                loc = step_name_list.index(item)
                step_name_list[loc:loc] = pipeline_steps.getStringVector(item)
                step_name_list.remove(item)

        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        input_dictionary = {
            'parset': parset_file,
            'parsetobj': self.parset,
            'parset_dir': parset_dir,
            'mapfile_dir': mapfile_dir}

        resultdicts = {}
        for section in self.config.sections():
            tmp_dict = {}
            for entry in self.config.items(section):
                input_dictionary[entry[0]] = entry[1]
                tmp_dict[entry[0]] = entry[1]
            resultdicts.update({section: copy.deepcopy(tmp_dict)})

        resultdicts.update({'input': input_dictionary})
        resultdicts.update({self.name: input_dictionary})

        if 'pipeline.mapfile' in self.parset.keywords():
            resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            self.logger.info("Beginning step %s" % (stepname,))
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            adds = None
            if stepname in step_parset_obj:
                adds = self._construct_step_parset(inputdict,
                                             step_parset_obj[stepname],
                                             resultdicts,
                                             step_parset_files[stepname],
                                             stepname)
            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval, 'recipe') == 'executable_args':
                    inputdict['stepname'] = stepname
                    if adds:
                        inputdict.update(adds)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]


            self._construct_input(inputdict, step, resultdicts)
            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keywords():
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: {
                    'parset': typeval,
                    'mapfile': submapfile,
                }})
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keywords():
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in self._keys(subpipeline_parset):
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in self._keys(subpipeline_parset):
                    val = subpipeline_parset[k]
                    if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'):
                        for item in checklist:
                            if item+".output" in str(val):
                                val = str(val).replace(item, stepname + '-' + item)

                        self.parset.add(stepname + '-' + k, str(val))
                    else:
                        # remove replacements strings to prevent loading the same key twice
                        if k in self._keys(self.parset):
                            self.parset.remove(k)
                        self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self._keys(self.parset):
                        if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip():
                            self.parset.remove(k)
                            self.parset.add('! ' + item, str(step_parset_obj[stepname][item]))
                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)


            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(
                        typeval,
                        inputargs,
                        **inputdict
                    )

            # plugins
            if kind_of_step == 'plugin':
                bla = str(self.config.get('DEFAULT', 'recipe_directories'))
                pluginpath = bla.rstrip(']').lstrip('[').split(',')
                for i, item in enumerate(pluginpath):
                    pluginpath[i] = os.path.join(item, 'plugins')
                if 'pluginpath' in pipeline_args.keys():
                    pluginpath.append(pipeline_args.getString('pluginpath'))
                with duration(self, stepname):
                    resultdict = loader.call_plugin(typeval, pluginpath,
                                                    inputargs,
                                                    **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict:
                resultdicts[activeloop[0]]['break'] = resultdict['break']
예제 #8
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
예제 #9
0
class GenericPipeline(control):

    inputs = {
        'loglevel':
        ingredient.StringField('--loglevel',
                               help="loglevel",
                               default='INFO',
                               optional=True)
    }

    def __init__(self):
        control.__init__(self)
        self.parset = Parset()
        self.input_data = {}
        self.output_data = {}
        self.parset_feedback_file = None
        #self.logger = None#logging.RootLogger('DEBUG')
        self.name = ''

        #if not overwrite:
        #    self.inputs['job_name'] = 'generic-pipeline'
        # if not self.inputs.has_key("start_time"):
        #     import datetime
        #     self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        # if not hasattr(self, "config"):
        #     self.config = self._read_config()
        # #self._read_config()
        # # ...and task files, if applicable
        # if not self.inputs.has_key("task_files"):
        #     try:
        #         self.inputs["task_files"] = utilities.string_to_list(
        #             self.config.get('DEFAULT', "task_files")
        #         )
        #     except NoOptionError:
        #         self.inputs["task_files"] = []
        # self.task_definitions = ConfigParser(self.config.defaults())
        # print >> sys.stderr, "Reading task definition file(s): %s" % \
        #                      ",".join(self.inputs["task_files"])
        # self.task_definitions.read(self.inputs["task_files"])
        #    self.go()

    def usage(self):
        """
        Display usage
        """
        print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0]
        print >> sys.stderr, "Parset structure should look like:\n" \
                             "NYI"
        #return 1

    def go(self):
        #"""
        #Read the parset-file that was given as input argument, and set the
        #jobname before calling the base-class's `go()` method.
        #"""
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            #return self.usage()
            self.usage()

        # Set job-name to basename of parset-file w/o extension, if it's not
        # set on the command-line with '-j' or '--job-name'
        if not 'job_name' in self.inputs:
            self.inputs['job_name'] = (os.path.splitext(
                os.path.basename(parset_file))[0])
            self.name = self.inputs['job_name']
        try:
            self.logger
        except:
            self.logger = getSearchingLogger(self.name)
            self.logger.setLevel(self.inputs['loglevel'])
        # Call the base-class's `go()` method.
        return super(GenericPipeline, self).go()


#    def pipeline_logic(self):
#        print 'Dummy because of stupid wrapping inside the framework'
#        if overwrite:
#            self.execute_pipeline()

#def execute_pipeline(self):

    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')

        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict,
                              step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        resultdicts = {
            'input': {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        }

        resultdicts.update({
            self.name: {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        })

        if 'pipeline.mapfile' in self.parset.keys:
            resultdicts['input']['mapfile'] = str(
                self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(
                self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            #self._construct_cmdline(inputargs, step, resultdicts)

            additional_input = {}

            if stepname in step_parset_obj:
                additional_input = self._construct_step_parset(
                    step_parset_obj[stepname], resultdicts,
                    step_parset_files[stepname], stepname)

            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval,
                                             'recipe') == 'executable_args':
                    inputdict = {'stepname': stepname}
                    inputdict.update(additional_input)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]

            self._construct_input(inputdict, step, resultdicts)

            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(
                    ' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector(
                    'pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keys:
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({
                    os.path.splitext(os.path.basename(typeval))[0]: {
                        'parset': typeval,
                        'mapfile': submapfile,
                    }
                })
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keys:
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in subpipeline_parset.keys:
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in subpipeline_parset.keys:
                    if not str(k).startswith('#'):
                        val = subpipeline_parset[k]
                        if not str(k).startswith('!'):
                            for item in checklist:
                                if item in str(val):
                                    val = str(val).replace(
                                        item, stepname + '-' + item)

                            self.parset.add(stepname + '-' + k, str(val))
                        else:
                            self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self.parset.keys:
                        if str(k).startswith('!') and item in k:
                            self.parset.remove(k)
                            self.parset.add(
                                '! ' + item,
                                str(step_parset_obj[stepname][item]))

                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict,
                                      step_parset_files, step_parset_obj,
                                      parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)

                # remove replacements strings to prevent loading the same key twice
                for k in copy.deepcopy(self.parset.keys):
                    if str(k).startswith('!'):
                        self.parset.remove(k)

            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict,
                                          step_parset_files, step_parset_obj,
                                          parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(typeval, inputargs, **inputdict)

            # plugins
            if kind_of_step == 'plugin':
                with duration(self, stepname):
                    resultdict = loader.call_plugin(
                        typeval, pipeline_args.getString('pluginpath'),
                        inputargs, **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if resultdict is not None and 'break' in resultdict:
                if resultdict['break']:
                    resultdicts[activeloop[0]]['break'] = resultdict['break']

    # *********************************************************************
    # build the inputs for the master recipes.
    def _construct_input(self, inoutdict, controlparset, resdicts):
        # intermediate backward compatibility for opts subparset
        if controlparset.fullModuleName('opts'):
            argsparset = controlparset.makeSubset(
                controlparset.fullModuleName('opts') + '.')
        # hack
        elif 'loopcount' not in controlparset.keys():
            argsparset = controlparset
        else:
            argsparset = controlparset.makeSubset(
                controlparset.fullModuleName('imaginary') + '.')
        # \hack

        self._replace_output_keyword(inoutdict, argsparset, resdicts)

    def _construct_cmdline(self, inoutargs, controlparset, resdicts):
        argsparset = controlparset.makeSubset(
            controlparset.fullModuleName('cmdline') + '.')
        for k in argsparset.keys():
            if argsparset.getString(k).__contains__('.output.'):
                step, outvar = argsparset.getString(k).split('.output.')
                inoutargs.append(resdicts[step][outvar])
            else:
                inoutargs.append(argsparset.getString(k))
        try:
            controlparset.remove('cmdline.inmap')
        except:
            pass

    def _construct_steps(self, step_name_list, step_control_dict,
                         step_parset_files, step_parset_obj, parset_dir):
        step_list_copy = (copy.deepcopy(step_name_list))
        counter = 0
        while step_list_copy:
            counter -= 1
            stepname = step_list_copy.pop(-1)
            fullparset = self.parset.makeSubset(
                self.parset.fullModuleName(str(stepname)) + '.')
            subparset = fullparset.makeSubset(
                fullparset.fullModuleName('control') + '.')
            number = 0
            for item in step_list_copy:
                if item == stepname:
                    number += 1
            if number != 0:
                stepname += str(number)
            step_name_list[counter] = stepname
            step_control_dict[stepname] = subparset
            # double implementation for intermediate backward compatibility
            if fullparset.fullModuleName(
                    'parsetarg') or fullparset.fullModuleName('argument'):
                if fullparset.fullModuleName('parsetarg'):
                    stepparset = fullparset.makeSubset(
                        fullparset.fullModuleName('parsetarg') + '.')
                if fullparset.fullModuleName('argument'):
                    stepparset = fullparset.makeSubset(
                        fullparset.fullModuleName('argument') + '.')
                # *********************************************************************
                # save parsets
                # either a filename is given in the main parset
                # or files will be created from subsets with stepnames.parset as filenames
                # for name, parset in step_parset_dict.iteritems():
                try:
                    file_parset = Parset(stepparset.getString('parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    stepparset.remove('parset')
                except:
                    pass
                # parset from task.cfg
                try:
                    file_parset = Parset(
                        self.task_definitions.get(str(subparset['type']),
                                                  'parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                except:
                    pass
                # for parset in control section
                try:
                    file_parset = Parset(subparset.getString('parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    subparset.remove('parset')
                except:
                    pass
                step_parset = os.path.join(parset_dir, stepname + '.parset')
                stepparset.writeFile(step_parset)
                step_parset_files[stepname] = step_parset
                step_parset_obj[stepname] = stepparset

    def _replace_output_keyword(self, inoutdict, argsparset, resdicts):
        for k in argsparset.keys():
            keystring = argsparset.getString(k)
            if keystring.__contains__('.output.'):
                if keystring.__contains__(','):
                    keystring = keystring.rstrip(']')
                    keystring = keystring.lstrip('[')
                    vec = []
                    for item in keystring.split(','):
                        if item.__contains__('.output.'):
                            step, outvar = item.split('.output.')
                            vec.append(resdicts[step][outvar])
                        else:
                            vec.append(item)
                    inoutdict[k] = vec
                else:
                    step, outvar = argsparset.getString(k).split('.output.')
                    if '+' in outvar:
                        tmplist = str(outvar).split('+')
                        inoutdict[k] = resdicts[step][tmplist[0]] + tmplist[1]
                    else:
                        inoutdict[k] = resdicts[step][outvar]
            else:
                inoutdict[k] = argsparset.getString(k)

    def _construct_step_parset(self, argsparset, resdicts, filename, stepname):
        addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []}
        # hack for original order of args
        tmp_keys = argsparset.keys()
        ordered_keys = []
        for orig in self.parset.keys:
            for item in tmp_keys:
                if (stepname + '.') in orig and (
                        'argument.' + item in orig
                        and not 'argument.' + item + '.' in orig):
                    ordered_keys.append(item)
                    continue
        # \hack
        for k in ordered_keys:
            valuestring = argsparset.getString(k)
            if valuestring.__contains__('.output.'):
                if valuestring.__contains__(','):
                    valuestring = valuestring.rstrip(']')
                    valuestring = valuestring.lstrip('[')
                    vec = []
                    for item in valuestring.split(','):
                        if item.__contains__('.output.'):
                            step, outvar = item.split('.output.')
                            vec.append(resdicts[step][outvar])
                            if 'mapfile' in str(outvar):
                                addvals['inputkeys'].append(
                                    resdicts[step][outvar])
                                addvals['mapfiles_in'].append(
                                    resdicts[step][outvar])
                        else:
                            vec.append(item)
                    argsparset.replace(k, str(vec))
                    if k == 'flags':
                        addvals['arguments'] = vec
                        argsparset.remove(k)
                else:
                    step, outvar = argsparset.getString(k).split('.output.')
                    #more ugly hacks... really needs clearly structured replacement method...
                    if '+' in outvar:
                        tmplist = str(outvar).split('+')
                        argsparset.replace(
                            k,
                            str(resdicts[step][tmplist[0]]) + tmplist[1])
                    else:
                        argsparset.replace(k, str(resdicts[step][outvar]))
                    #if isinstance(resdicts[step][outvar], str):
                    if 'mapfile' in str(outvar):
                        addvals['inputkeys'].append(resdicts[step][outvar])
                        addvals['mapfiles_in'].append(resdicts[step][outvar])
                    if k == 'flags':
                        addvals['arguments'] = str(argsparset[k])
                        argsparset.remove(k)
            else:
                if k == 'flags':
                    addvals['arguments'] = str(argsparset[k])
                    argsparset.remove(k)

            #direct usage of outputkey
            if valuestring.__contains__('outputkey'):
                addvals['outputkey'] = 'outputkey'

        argsparset.writeFile(filename)
        return addvals

    def _get_parset_dicts(self):
        return {}

    def show_tasks(self):
        tasklist = []
        tasklist = self.task_definitions.sections()
        for item in tasklist:
            print item
        #return tasklist

    def show_task(self, task):
        task_parset = Parset()
        if self.task_definitions.has_option(task, 'parset'):
            task_parset.adoptFile(self.task_definitions.get(task, 'parset'))
            print 'possible arguments: key    =    value'
            for k in task_parset.keys:
                print '                   ', k, '    ', '=', '    ', task_parset[
                    k]

    def _add_step(self):
        steplist = []

    def _replace_values(self):
        replacedict = {}
        try:
            import imp
            plugin = imp.load_source('main', str(self.parset['prepare']))
            replacedict = plugin.main()
        except:
            pass
        for check in self.parset.keys:
            if str(check).startswith('!'):
                replacedict[str(check).lstrip('!').lstrip(' ')] = str(
                    self.parset[check])
        #print 'REPLACEDICT: ',replacedict
        for check in self.parset.keys:
            if not str(check).startswith('#'):
                for k, v in replacedict.iteritems():
                    if '{{ ' + k + ' }}' in str(self.parset[check]):
                        replacestring = str(self.parset[check]).replace(
                            '{{ ' + k + ' }}', v)
                        self.parset.replace(check, replacestring)
예제 #10
0
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')

        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict,
                              step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        resultdicts = {
            'input': {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        }

        resultdicts.update({
            self.name: {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        })

        if 'pipeline.mapfile' in self.parset.keys:
            resultdicts['input']['mapfile'] = str(
                self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(
                self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            #self._construct_cmdline(inputargs, step, resultdicts)

            additional_input = {}

            if stepname in step_parset_obj:
                additional_input = self._construct_step_parset(
                    step_parset_obj[stepname], resultdicts,
                    step_parset_files[stepname], stepname)

            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval,
                                             'recipe') == 'executable_args':
                    inputdict = {'stepname': stepname}
                    inputdict.update(additional_input)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]

            self._construct_input(inputdict, step, resultdicts)

            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(
                    ' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector(
                    'pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keys:
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({
                    os.path.splitext(os.path.basename(typeval))[0]: {
                        'parset': typeval,
                        'mapfile': submapfile,
                    }
                })
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keys:
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in subpipeline_parset.keys:
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in subpipeline_parset.keys:
                    if not str(k).startswith('#'):
                        val = subpipeline_parset[k]
                        if not str(k).startswith('!'):
                            for item in checklist:
                                if item in str(val):
                                    val = str(val).replace(
                                        item, stepname + '-' + item)

                            self.parset.add(stepname + '-' + k, str(val))
                        else:
                            self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self.parset.keys:
                        if str(k).startswith('!') and item in k:
                            self.parset.remove(k)
                            self.parset.add(
                                '! ' + item,
                                str(step_parset_obj[stepname][item]))

                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict,
                                      step_parset_files, step_parset_obj,
                                      parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)

                # remove replacements strings to prevent loading the same key twice
                for k in copy.deepcopy(self.parset.keys):
                    if str(k).startswith('!'):
                        self.parset.remove(k)

            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict,
                                          step_parset_files, step_parset_obj,
                                          parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(typeval, inputargs, **inputdict)

            # plugins
            if kind_of_step == 'plugin':
                with duration(self, stepname):
                    resultdict = loader.call_plugin(
                        typeval, pipeline_args.getString('pluginpath'),
                        inputargs, **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if resultdict is not None and 'break' in resultdict:
                if resultdict['break']:
                    resultdicts[activeloop[0]]['break'] = resultdict['break']
def plugin_main(*args, **kwargs):
    parset = Parset(kwargs['first_parset'])
    parset.adoptFile(kwargs['second_parset'])
    parset.writeFile(kwargs['result_parset'] + '_feedback_file')
예제 #12
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}

        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        prefix = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(prefix, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(prefix, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        prefix,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if not self.inputs['inputkeys'] and self.inputs['inputkey']:
            self.inputs['inputkeys'].append(self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              len(self.inputs['inputkeys']), len(inputmapfiles))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap in zip(self.inputs['inputkeys'], inputmapfiles):
                filedict[key] = []
                for inp in filemap:
                    filedict[key].append(inp.file)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            #if keylist:
                #for name, value in zip(keylist, inputlist):
            if filedict:
                for name, value in filedict.iteritems():
                    if arglist_copy and name in arglist_copy:
                        ind = arglist_copy.index(name)
                        arglist_copy[ind] = value[i]
                    elif name in parsetdict_copy.values():
                        for k, v in parsetdict_copy.iteritems():
                            if v == name:
                                parsetdict_copy[k] = value[i]
                    else:
                        parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        prefix,
                        self.inputs['parsetasfile'],
                        args_format,
                        #self.inputs['working_directory'],
                        self.environment
                    ]
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
            for k, v in job.results.items():
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        for k, v in jobresultdict.items():
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0