예제 #1
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Script %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
            # ****************************************************************
            # Run
                # Change to working directory for the script
                pipedir = os.getcwd()
                os.chdir(work_dir)
                outdict = {}
                plugin = imp.load_source('main', executable)
                outdict = plugin.main(*args, **kwargs)
                os.chdir(pipedir)

            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #2
0
class GenericPipeline(control):

    inputs = {
        'loglevel': ingredient.StringField(
            '--loglevel',
            help="loglevel",
            default='INFO',
            optional=True
        )
    }

    def __init__(self):
        control.__init__(self)
        self.parset = Parset()
        self.input_data = {}
        self.output_data = {}
        self.parset_feedback_file = None
        #self.logger = None#logging.RootLogger('DEBUG')
        self.name = ''

        #if not overwrite:
        #    self.inputs['job_name'] = 'generic-pipeline'
        # if not self.inputs.has_key("start_time"):
        #     import datetime
        #     self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        # if not hasattr(self, "config"):
        #     self.config = self._read_config()
        # #self._read_config()
        # # ...and task files, if applicable
        # if not self.inputs.has_key("task_files"):
        #     try:
        #         self.inputs["task_files"] = utilities.string_to_list(
        #             self.config.get('DEFAULT', "task_files")
        #         )
        #     except NoOptionError:
        #         self.inputs["task_files"] = []
        # self.task_definitions = ConfigParser(self.config.defaults())
        # print >> sys.stderr, "Reading task definition file(s): %s" % \
        #                      ",".join(self.inputs["task_files"])
        # self.task_definitions.read(self.inputs["task_files"])
        #    self.go()

    def usage(self):
        """
        Display usage
        """
        print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0]
        print >> sys.stderr, "Parset structure should look like:\n" \
                             "NYI"
        #return 1

    def go(self):
        #"""
        #Read the parset-file that was given as input argument, and set the
        #jobname before calling the base-class's `go()` method.
        #"""
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            #return self.usage()
            self.usage()

        # Set job-name to basename of parset-file w/o extension, if it's not
        # set on the command-line with '-j' or '--job-name'
        if not 'job_name' in self.inputs:
            self.inputs['job_name'] = (
                os.path.splitext(os.path.basename(parset_file))[0])
            self.name = self.inputs['job_name']
        try:
            self.logger
        except:
            self.logger = getSearchingLogger(self.name)
            self.logger.setLevel(self.inputs['loglevel'])
        # Call the base-class's `go()` method.
        return super(GenericPipeline, self).go()

#    def pipeline_logic(self):
#        print 'Dummy because of wrapping inside the framework'
#        if overwrite:
#            self.execute_pipeline()

    #def execute_pipeline(self):
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')
        pipeline_steps = self.parset.makeSubset(
            self.parset.fullModuleName('steps') + '.')
        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        # construct the step name list if there were pipeline.steps.<subset>
        for item in pipeline_steps.keys():
            if item in step_name_list:
                loc = step_name_list.index(item)
                step_name_list[loc:loc] = pipeline_steps.getStringVector(item)
                step_name_list.remove(item)

        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        input_dictionary = {
            'parset': parset_file,
            'parsetobj': self.parset,
            'parset_dir': parset_dir,
            'mapfile_dir': mapfile_dir}

        resultdicts = {}
        for section in self.config.sections():
            tmp_dict = {}
            for entry in self.config.items(section):
                input_dictionary[entry[0]] = entry[1]
                tmp_dict[entry[0]] = entry[1]
            resultdicts.update({section: copy.deepcopy(tmp_dict)})

        resultdicts.update({'input': input_dictionary})
        resultdicts.update({self.name: input_dictionary})

        if 'pipeline.mapfile' in self.parset.keywords():
            resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            self.logger.info("Beginning step %s" % (stepname,))
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            adds = None
            if stepname in step_parset_obj:
                adds = self._construct_step_parset(inputdict,
                                             step_parset_obj[stepname],
                                             resultdicts,
                                             step_parset_files[stepname],
                                             stepname)
            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval, 'recipe') == 'executable_args':
                    inputdict['stepname'] = stepname
                    if adds:
                        inputdict.update(adds)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]


            self._construct_input(inputdict, step, resultdicts)
            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keywords():
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: {
                    'parset': typeval,
                    'mapfile': submapfile,
                }})
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keywords():
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in self._keys(subpipeline_parset):
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in self._keys(subpipeline_parset):
                    val = subpipeline_parset[k]
                    if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'):
                        for item in checklist:
                            if item+".output" in str(val):
                                val = str(val).replace(item, stepname + '-' + item)

                        self.parset.add(stepname + '-' + k, str(val))
                    else:
                        # remove replacements strings to prevent loading the same key twice
                        if k in self._keys(self.parset):
                            self.parset.remove(k)
                        self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self._keys(self.parset):
                        if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip():
                            self.parset.remove(k)
                            self.parset.add('! ' + item, str(step_parset_obj[stepname][item]))
                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)


            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(
                        typeval,
                        inputargs,
                        **inputdict
                    )

            # plugins
            if kind_of_step == 'plugin':
                bla = str(self.config.get('DEFAULT', 'recipe_directories'))
                pluginpath = bla.rstrip(']').lstrip('[').split(',')
                for i, item in enumerate(pluginpath):
                    pluginpath[i] = os.path.join(item, 'plugins')
                if 'pluginpath' in pipeline_args.keys():
                    pluginpath.append(pipeline_args.getString('pluginpath'))
                with duration(self, stepname):
                    resultdict = loader.call_plugin(typeval, pluginpath,
                                                    inputargs,
                                                    **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict:
                resultdicts[activeloop[0]]['break'] = resultdict['break']

    # *********************************************************************
    # build the inputs for the master recipes.
    def _construct_input(self, inoutdict, controlparset, resdicts):
        # intermediate backward compatibility for opts subparset
        if controlparset.fullModuleName('opts'):
            argsparset = controlparset.makeSubset(controlparset.fullModuleName('opts') + '.')
        # hack
        elif 'loopcount' not in controlparset.keys():
            argsparset = controlparset
        else:
            argsparset = controlparset.makeSubset(controlparset.fullModuleName('imaginary') + '.')
        # \hack
        self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts)

    def _construct_cmdline(self, inoutargs, controlparset, resdicts):
        inoutdict = {}
        argsparset = controlparset.makeSubset(controlparset.fullModuleName('cmdline') + '.')
        self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts)
        for k in inoutdict.keys():
            inoutargs.append(inoutdict[k])
        for k in controlparset.keys():
            if 'cmdline' in k:
                controlparset.remove(k)

    def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir):
        step_list_copy = (copy.deepcopy(step_name_list))
        counter = 0
        while step_list_copy:
            counter -= 1
            stepname = step_list_copy.pop(-1)
            fullparset = self.parset.makeSubset(self.parset.fullModuleName(str(stepname)) + '.')
            subparset = fullparset.makeSubset(fullparset.fullModuleName('control') + '.')
            number = 0
            for item in step_list_copy:
                if item == stepname:
                    number += 1
            if number != 0:
                stepname += str(number)
            step_name_list[counter] = stepname
            step_control_dict[stepname] = subparset
            if fullparset.fullModuleName('argument'):
                stepparset = fullparset.makeSubset(fullparset.fullModuleName('argument') + '.')
                # *********************************************************************
                # save parsets
                # either a filename is given in the main parset
                # or files will be created from subsets with stepnames.parset as filenames
                # for name, parset in step_parset_dict.iteritems():
                try:
                    file_parset = Parset(stepparset.getString('parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    stepparset.remove('parset')
                except:
                    pass
                # parset from task.cfg
                try:
                    file_parset = Parset(self.task_definitions.get(str(subparset['type']), 'parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                except:
                    pass
                # for parset in control section
                try:
                    file_parset = Parset(subparset.getString('parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    subparset.remove('parset')
                except:
                    pass
                step_parset = os.path.join(parset_dir, stepname + '.parset')
                stepparset.writeFile(step_parset)
                step_parset_files[stepname] = step_parset
                step_parset_obj[stepname] = stepparset

    def _replace_output_keyword(self, inoutdict, argsparset, keyorder, resdicts):
        addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []}
        regobj = re.compile('([\w\+_-]+)\.output\.([\w\+._-]+)')
        for k in keyorder:
            keystring = argsparset.getString(k)
            hitlist = regobj.findall(keystring)
            if hitlist:
                for hit in hitlist:
                    keystring = regobj.sub(str(resdicts[hit[0]][hit[1]]), keystring, 1)
                    if 'mapfile' in hit[1] and not 'mapfile' in k:
                        addvals['inputkeys'].append(resdicts[hit[0]][hit[1]])
                        addvals['mapfiles_in'].append(resdicts[hit[0]][hit[1]])
                inoutdict[k] = keystring
            else:
                inoutdict[k] = argsparset.getString(k)
            if k == 'flags':
                addvals['arguments'] = keystring
            if 'outputkey' in keystring:
                addvals['outputkey'] = 'outputkey'
        return addvals

    def _construct_step_parset(self, inoutdict, argsparset, resdicts, filename, stepname):
        tmp_keys = argsparset.keys()
        ordered_keys = []
        parsetdict = {}
        for orig in self._keys(self.parset):
            for item in tmp_keys:
                if (stepname + '.') in orig and ('argument.'+item in orig and not 'argument.'+item+'.' in orig):
                    ordered_keys.append(item)
                    continue
        # add keys from parset files that were not in the original list
        for item in argsparset.keys():
            if not item in ordered_keys:
                ordered_keys.append(item)
        additional = self._replace_output_keyword(parsetdict, argsparset, ordered_keys, resdicts)
        for k in argsparset.keys():
            argsparset.replace(k, parsetdict[k])
            if k == 'flags':
                argsparset.remove(k)
        argsparset.writeFile(filename)
        return additional
        #inoutdict.update(additional)

    def _keys(self, inparset):
        outlist = []
        for k in inparset.keys:
            for l in inparset.keywords():
                if k == l:
                    outlist.append(l)
        return outlist

    def _get_parset_dicts(self):
        return {}

    def show_tasks(self):
        tasklist = []
        tasklist = self.task_definitions.sections()
        for item in tasklist:
            print item
        #return tasklist

    def show_task(self, task):
        task_parset = Parset()
        if self.task_definitions.has_option(task,'parset'):
            task_parset.adoptFile(self.task_definitions.get(task,'parset'))
            print 'possible arguments: key    =    value'
            for k in task_parset.keywords():
                print '                   ',k,'    ','=','    ',task_parset[k]

    def _add_step(self):
        steplist = []

    def _replace_values(self):
        replacedict = OrderedDict()
        for check in self._keys(self.parset):
            if str(check).startswith('!'):
                replacedict[str(check).lstrip('!').lstrip(' ')] = str(self.parset[check])
            if str(check).startswith('pipeline.replace.'):
                replacedict[str(check).replace('pipeline.replace.', '').lstrip(' ')] = str(self.parset[check])
        #expand environment variables
        for k, v in replacedict.items():
            replacedict[k] = os.path.expandvars(v)

        for check in self._keys(self.parset):
            for k, v in reversed(replacedict.items()):
                if '{{ '+k+' }}' in str(self.parset[check]):
                    replacestring = str(self.parset[check]).replace('{{ '+k+' }}',v)
                    self.parset.replace(check,replacestring)
예제 #3
0
    def run(self, executable, initscript, infile, key, db_name, db_user,
            db_host):
        #                           executable: path to KernelControl executable
        #                           initscript:             path to lofarinit.sh
        #                               infile:    MeasurementSet for processing
        #       key, db_name, db_user, db_host:   database connection parameters
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % (infile))
            else:
                self.logger.error("Dataset %s does not exist" % (infile))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up kernel parset")
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile))
            fd, parset_filename = mkstemp()
            kernel_parset = Parset()
            for key, value in {
                    "ObservationPart.Filesystem": filesystem,
                    "ObservationPart.Path": infile,
                    "BBDB.Key": key,
                    "BBDB.Name": db_name,
                    "BBDB.User": db_user,
                    "BBDB.Host": db_host,
                    "ParmLog": "",
                    "ParmLoglevel": "",
                    "ParmDB.Sky": infile + ".sky",
                    "ParmDB.Instrument": infile + ".instrument"
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_filename)
            os.close(fd)
            self.logger.debug("Parset written to %s" % (parset_filename, ))

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp()
            env = read_initscript(self.logger, initscript)
            try:
                cmd = [executable, parset_filename, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                        working_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(cmd,
                                               stdout=PIPE,
                                               stderr=PIPE,
                                               cwd=working_dir)
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(bbs_kernel_process.returncode,
                                             executable)
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
예제 #4
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        self.msout_original = kwargs['msout'].rstrip('/')
        kwargs.pop('msout')
        self.msout_destination_dir = os.path.dirname(self.msout_original)

        # Set up scratch paths
        scratch_dir = kwargs['local_scratch_dir']
        kwargs.pop('local_scratch_dir')
        try:
            os.mkdir(scratch_dir)
        except OSError:
            pass
        self.scratch_dir = tempfile.mkdtemp(dir=scratch_dir)
        self.logger.info('Using {} as scratch directory'.format(self.scratch_dir))
        self.msout_scratch = os.path.join(self.scratch_dir, os.path.basename(self.msout_original))
        args.append('msout=' + self.msout_scratch)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-'+ k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
            # ****************************************************************
            #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                    work_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, work_dir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                self.cleanup()
                return 1
            except Exception, err:
                self.logger.error(str(err))
                self.cleanup()
                return 1
예제 #5
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=True,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        self.msout_original = kwargs['msout'].rstrip('/')
        kwargs.pop('msout')
        self.msout_destination_dir = os.path.dirname(self.msout_original)
        self.scratch_dir = tempfile.mkdtemp(dir=kwargs['local_scratch_dir'])
        kwargs.pop('local_scratch_dir')
        self.logger.info('Using {} as scratch directory'.format(
            self.scratch_dir))

        # Set up scratch paths
        self.msout_scratch = os.path.join(
            self.scratch_dir, os.path.basename(self.msout_original))
        args.append('msout=' + self.msout_scratch)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-' + k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
                # ****************************************************************
                #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                        work_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, work_dir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                self.cleanup()
                return 1
            except Exception, err:
                self.logger.error(str(err))
                self.cleanup()
                return 1
예제 #6
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)
            # else:
            #     self.logger.error("Dataset %s does not exist" % infile)
            #     return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            # deal with multiple input files for wsclean
            if argsformat == 'wsclean':
                for i in reversed(xrange(len(args))):
                    if str(args[i]).startswith('[') and str(
                            args[i]).endswith(']'):
                        tmplist = args.pop(i).lstrip('[').rstrip(']').split(
                            ',')
                        for val in reversed(tmplist):
                            args.insert(i, val.strip(' \'\"'))
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        if str(v).startswith('[') and str(v).endswith(']'):
                            v = v.lstrip('[').rstrip(']').replace(' ', '')
                            multargs = v.split(',')
                        else:
                            multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-' + k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                if argsformat == 'losoto':
                    args.append(parsetname)
                else:
                    args.insert(0, parsetname)

            try:
                # ****************************************************************
                # Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                        work_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, work_dir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #7
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Script %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
                # ****************************************************************
                # Run
                # Change to working directory for the script
                pipedir = os.getcwd()
                os.chdir(work_dir)
                outdict = {}
                plugin = imp.load_source('main', executable)
                outdict = plugin.main(*args, **kwargs)
                os.chdir(pipedir)

            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #8
0
파일: bbs.py 프로젝트: jjdmol/LOFAR
    def run(
        self, executable, initscript, infile, key, db_name, db_user, db_host
    ):
        #                           executable: path to KernelControl executable
        #                           initscript:             path to lofarinit.sh
        #                               infile:    MeasurementSet for processing
        #       key, db_name, db_user, db_host:   database connection parameters
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % (infile))
            else:
                self.logger.error("Dataset %s does not exist" % (infile))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up kernel parset")
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile))
            fd, parset_filename = mkstemp()
            kernel_parset = Parset()
            for key, value in {
                "ObservationPart.Filesystem": filesystem,
                "ObservationPart.Path": infile,
                "BBDB.Key": key,
                "BBDB.Name": db_name,
                "BBDB.User": db_user,
                "BBDB.Host": db_host,
                "ParmLog": "",
                "ParmLoglevel": "",
                "ParmDB.Sky": infile + ".sky",
                "ParmDB.Instrument": infile + ".instrument"
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_filename)
            os.close(fd)
            self.logger.debug("Parset written to %s" % (parset_filename,))


            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            env = read_initscript(self.logger, initscript)
            try:
                cmd = [executable, parset_filename, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                    working_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(
                        cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(
                        bbs_kernel_process.returncode, executable
                    )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
예제 #9
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This function contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            if infile[0] == '[':
                infiles = [ms.strip(" []\'\"") for ms in infile.split(',')]
                reffile = infiles[0]
            else:
                reffile = infile

            if os.path.exists(reffile):
                self.logger.info("Processing %s" % reffile)
            else:
                self.logger.error("Dataset %s does not exist" % reffile)
                return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if not parsetasfile:
                self.logger.error(
                    "Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!"
                )
                return 1
            else:
                nodeparset = Parset()
                sublist = []
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                    if str(k).find('.'):
                        if not str(k).split('.')[0] in sublist:
                            sublist.append(str(k).split('.')[0])

                #quick hacks below. for proof of concept.
                casastring = ''
                for sub in sublist:
                    subpar = nodeparset.makeSubset(
                        nodeparset.fullModuleName(sub) + '.')
                    casastring = sub + '('
                    for k in subpar.keys():
                        if str(subpar[k]).find('/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]) + "'" + ','
                        elif str(subpar[k]).find('casastr/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]).strip('casastr/') + "'" + ','
                        elif str(subpar[k]).lower() == 'false' or str(
                                subpar[k]).lower() == 'true':
                            casastring += str(k) + '=' + str(subpar[k]) + ','
                        else:
                            # Test if int/float or list of int/float
                            try:
                                self.logger.info('value: {}'.format(subpar[k]))
                                test = float(str(subpar[k]))
                                is_int_float = True
                            except:
                                is_int_float = False
                            if is_int_float:
                                casastring += str(k) + '=' + str(
                                    subpar[k]) + ','
                            else:
                                if '[' in str(subpar[k]) or '(' in str(
                                        subpar[k]):
                                    # Check if list of int/float or strings
                                    list_vals = [
                                        f.strip() for f in str(
                                            subpar[k]).strip('[]()').split(',')
                                    ]
                                    is_int_float = True
                                    for list_val in list_vals:
                                        try:
                                            test = float(list_val)
                                        except:
                                            is_int_float = False
                                            break
                                    if is_int_float:
                                        casastring += str(k) + '=' + str(
                                            subpar[k]) + ','
                                    else:
                                        casastring += str(
                                            k) + '=' + '[{}]'.format(','.join([
                                                "'" + list_val + "'"
                                                for list_val in list_vals
                                            ])) + ','
                                else:
                                    # Simple string
                                    casastring += str(k) + '=' + "'" + str(
                                        subpar[k]) + "'" + ','

                    casastring = casastring.rstrip(',')
                    casastring += ')\n'

                # 1) return code of a casapy is not properly recognized by the pipeline
                # wrapping in shellscript works for succesful runs.
                # failed runs seem to hang the pipeline...
                # 2) casapy can not have two instances running from the same directory.
                # create tmp dirs
                casapydir = tempfile.mkdtemp(dir=work_dir)
                if casastring != '':
                    casafilename = os.path.join(
                        work_dir,
                        os.path.basename(reffile) + '.casacommand.py')
                    casacommandfile = open(casafilename, 'w')
                    casacommandfile.write(casastring)
                    casacommandfile.close()
                    args.append(casafilename)

                somename = os.path.join(
                    work_dir,
                    os.path.basename(reffile) + '.casashell.sh')
                commandstring = ''
                commandstring += executable
                for item in args:
                    if str(item).find(' ') > -1 or str(item).find('[') > -1:
                        commandstring += ' "' + item + '"'
                    else:
                        commandstring += ' ' + item

                crap = open(somename, 'w')
                crap.write('#!/bin/bash \n')
                crap.write('echo "Trying CASAPY command" \n')
                crap.write(commandstring + ' >& casa.log\n')
                crap.close()

                # file permissions
                st = os.stat(somename)
                os.chmod(
                    somename,
                    st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

            try:
                # ****************************************************************
                # Run
                cmd = [somename]
                with CatchLog4CPlus(
                        casapydir,
                        self.logger.name + "." + os.path.basename(reffile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, casapydir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #10
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=True,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        if 'replace-sourcedb' in kwargs:
            self.replace_sourcedb = kwargs['replace-sourcedb']
            kwargs.pop('replace-sourcedb')
        if 'replace-parmdb' in kwargs:
            self.replace_parmdb = kwargs['replace-parmdb']
            kwargs.pop('replace-parmdb')
        if 'dry-run' in kwargs:
            self.dry_run = kwargs['dry-run']
            kwargs.pop('dry-run')
        if 'sourcedb' in kwargs:
            self.sourcedb = kwargs['sourcedb']
            kwargs.pop('sourcedb')
        if 'parmdb' in kwargs:
            self.parmdb = kwargs['parmdb']
            kwargs.pop('parmdb')
        if 'sourcedb-name' in kwargs:
            self.sourcedb_basename = kwargs['sourcedb-name']
            self.replace_sourcedb = True
            kwargs.pop('sourcedb-name')
        if 'parmdb-name' in kwargs:
            self.parmdb_basename = kwargs['parmdb-name']
            self.replace_parmdb = True
            kwargs.pop('parmdb-name')
        if 'force' in kwargs:
            self.replace_parmdb = True
            self.replace_sourcedb = True
            kwargs.pop('force')
        numthreads = 1
        if 'numthreads' in kwargs:
            numthreads = kwargs['numthreads']
            kwargs.pop('numthreads')
        args.append('--numthreads=' + str(numthreads))
        if 'observation' in kwargs:
            self.observation = kwargs.pop('observation')
        if 'catalog' in kwargs:
            self.catalog = kwargs.pop('catalog')

        self.createsourcedb()
        self.createparmdb()
        if not 'no-columns' in kwargs:
            #if not kwargs['no-columns']:
            self.addcolumns()
        else:
            kwargs.pop('no-columns')

        args.append('--sourcedb=' + self.sourcedb_path)
        args.append('--parmdb=' + self.parmdb_path)

        args.append(self.observation)
        #catalog = None

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in list(kwargs.items()):
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                #args.insert(0, parsetname)
                args.append(parsetname)

            #if catalog is not None:
            #    args.append(catalog)

            try:
                # ****************************************************************
                #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                        work_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, work_dir, self.environment, logger)
            except CalledProcessError as err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception as err:
                self.logger.error(str(err))
                return 1
        # We need some signal to the master script that the script ran ok.
        self.outputs['ok'] = True
        return 0
예제 #11
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        if 'replace-sourcedb' in kwargs:
            self.replace_sourcedb = kwargs['replace-sourcedb']
            kwargs.pop('replace-sourcedb')
        if 'replace-parmdb' in kwargs:
            self.replace_parmdb = kwargs['replace-parmdb']
            kwargs.pop('replace-parmdb')
        if 'dry-run' in kwargs:
            self.dry_run = kwargs['dry-run']
            kwargs.pop('dry-run')
        if 'sourcedb' in kwargs:
            self.sourcedb = kwargs['sourcedb']
            kwargs.pop('sourcedb')
        if 'parmdb' in kwargs:
            self.parmdb = kwargs['parmdb']
            kwargs.pop('parmdb')
        if 'sourcedb-name' in kwargs:
            self.sourcedb_basename = kwargs['sourcedb-name']
            self.replace_sourcedb = True
            kwargs.pop('sourcedb-name')
        if 'parmdb-name' in kwargs:
            self.parmdb_basename = kwargs['parmdb-name']
            self.replace_parmdb = True
            kwargs.pop('parmdb-name')
        if 'force' in kwargs:
            self.replace_parmdb = True
            self.replace_sourcedb = True
            kwargs.pop('force')
        numthreads = 1
        if 'numthreads' in kwargs:
            numthreads = kwargs['numthreads']
            kwargs.pop('numthreads')
        args.append('--numthreads='+str(numthreads))
        if 'observation' in kwargs:
            self.observation = kwargs.pop('observation')
        if 'catalog' in kwargs:
            self.catalog = kwargs.pop('catalog')

        self.createsourcedb()
        self.createparmdb()
        if not 'no-columns' in kwargs:
            #if not kwargs['no-columns']:
            self.addcolumns()
        else:
            kwargs.pop('no-columns')

        args.append('--sourcedb=' + self.sourcedb_path)
        args.append('--parmdb=' + self.parmdb_path)

        args.append(self.observation)
        #catalog = None


        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                #args.insert(0, parsetname)
                args.append(parsetname)

            #if catalog is not None:
            #    args.append(catalog)

            try:
            # ****************************************************************
            #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                    work_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, work_dir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #12
0
class GenericPipeline(control):

    inputs = {
        'loglevel':
        ingredient.StringField('--loglevel',
                               help="loglevel",
                               default='INFO',
                               optional=True)
    }

    def __init__(self):
        control.__init__(self)
        self.parset = Parset()
        self.input_data = {}
        self.output_data = {}
        self.parset_feedback_file = None
        #self.logger = None#logging.RootLogger('DEBUG')
        self.name = ''

        #if not overwrite:
        #    self.inputs['job_name'] = 'generic-pipeline'
        # if not self.inputs.has_key("start_time"):
        #     import datetime
        #     self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        # if not hasattr(self, "config"):
        #     self.config = self._read_config()
        # #self._read_config()
        # # ...and task files, if applicable
        # if not self.inputs.has_key("task_files"):
        #     try:
        #         self.inputs["task_files"] = utilities.string_to_list(
        #             self.config.get('DEFAULT', "task_files")
        #         )
        #     except NoOptionError:
        #         self.inputs["task_files"] = []
        # self.task_definitions = ConfigParser(self.config.defaults())
        # print >> sys.stderr, "Reading task definition file(s): %s" % \
        #                      ",".join(self.inputs["task_files"])
        # self.task_definitions.read(self.inputs["task_files"])
        #    self.go()

    def usage(self):
        """
        Display usage
        """
        print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0]
        print >> sys.stderr, "Parset structure should look like:\n" \
                             "NYI"
        #return 1

    def go(self):
        #"""
        #Read the parset-file that was given as input argument, and set the
        #jobname before calling the base-class's `go()` method.
        #"""
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            #return self.usage()
            self.usage()

        # Set job-name to basename of parset-file w/o extension, if it's not
        # set on the command-line with '-j' or '--job-name'
        if not 'job_name' in self.inputs:
            self.inputs['job_name'] = (os.path.splitext(
                os.path.basename(parset_file))[0])
            self.name = self.inputs['job_name']
        try:
            self.logger
        except:
            self.logger = getSearchingLogger(self.name)
            self.logger.setLevel(self.inputs['loglevel'])
        # Call the base-class's `go()` method.
        return super(GenericPipeline, self).go()


#    def pipeline_logic(self):
#        print 'Dummy because of stupid wrapping inside the framework'
#        if overwrite:
#            self.execute_pipeline()

#def execute_pipeline(self):

    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')

        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict,
                              step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        resultdicts = {
            'input': {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        }

        resultdicts.update({
            self.name: {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        })

        if 'pipeline.mapfile' in self.parset.keys:
            resultdicts['input']['mapfile'] = str(
                self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(
                self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            #self._construct_cmdline(inputargs, step, resultdicts)

            additional_input = {}

            if stepname in step_parset_obj:
                additional_input = self._construct_step_parset(
                    step_parset_obj[stepname], resultdicts,
                    step_parset_files[stepname], stepname)

            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval,
                                             'recipe') == 'executable_args':
                    inputdict = {'stepname': stepname}
                    inputdict.update(additional_input)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]

            self._construct_input(inputdict, step, resultdicts)

            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(
                    ' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector(
                    'pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keys:
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({
                    os.path.splitext(os.path.basename(typeval))[0]: {
                        'parset': typeval,
                        'mapfile': submapfile,
                    }
                })
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keys:
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in subpipeline_parset.keys:
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in subpipeline_parset.keys:
                    if not str(k).startswith('#'):
                        val = subpipeline_parset[k]
                        if not str(k).startswith('!'):
                            for item in checklist:
                                if item in str(val):
                                    val = str(val).replace(
                                        item, stepname + '-' + item)

                            self.parset.add(stepname + '-' + k, str(val))
                        else:
                            self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self.parset.keys:
                        if str(k).startswith('!') and item in k:
                            self.parset.remove(k)
                            self.parset.add(
                                '! ' + item,
                                str(step_parset_obj[stepname][item]))

                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict,
                                      step_parset_files, step_parset_obj,
                                      parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)

                # remove replacements strings to prevent loading the same key twice
                for k in copy.deepcopy(self.parset.keys):
                    if str(k).startswith('!'):
                        self.parset.remove(k)

            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict,
                                          step_parset_files, step_parset_obj,
                                          parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(typeval, inputargs, **inputdict)

            # plugins
            if kind_of_step == 'plugin':
                with duration(self, stepname):
                    resultdict = loader.call_plugin(
                        typeval, pipeline_args.getString('pluginpath'),
                        inputargs, **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if resultdict is not None and 'break' in resultdict:
                if resultdict['break']:
                    resultdicts[activeloop[0]]['break'] = resultdict['break']

    # *********************************************************************
    # build the inputs for the master recipes.
    def _construct_input(self, inoutdict, controlparset, resdicts):
        # intermediate backward compatibility for opts subparset
        if controlparset.fullModuleName('opts'):
            argsparset = controlparset.makeSubset(
                controlparset.fullModuleName('opts') + '.')
        # hack
        elif 'loopcount' not in controlparset.keys():
            argsparset = controlparset
        else:
            argsparset = controlparset.makeSubset(
                controlparset.fullModuleName('imaginary') + '.')
        # \hack

        self._replace_output_keyword(inoutdict, argsparset, resdicts)

    def _construct_cmdline(self, inoutargs, controlparset, resdicts):
        argsparset = controlparset.makeSubset(
            controlparset.fullModuleName('cmdline') + '.')
        for k in argsparset.keys():
            if argsparset.getString(k).__contains__('.output.'):
                step, outvar = argsparset.getString(k).split('.output.')
                inoutargs.append(resdicts[step][outvar])
            else:
                inoutargs.append(argsparset.getString(k))
        try:
            controlparset.remove('cmdline.inmap')
        except:
            pass

    def _construct_steps(self, step_name_list, step_control_dict,
                         step_parset_files, step_parset_obj, parset_dir):
        step_list_copy = (copy.deepcopy(step_name_list))
        counter = 0
        while step_list_copy:
            counter -= 1
            stepname = step_list_copy.pop(-1)
            fullparset = self.parset.makeSubset(
                self.parset.fullModuleName(str(stepname)) + '.')
            subparset = fullparset.makeSubset(
                fullparset.fullModuleName('control') + '.')
            number = 0
            for item in step_list_copy:
                if item == stepname:
                    number += 1
            if number != 0:
                stepname += str(number)
            step_name_list[counter] = stepname
            step_control_dict[stepname] = subparset
            # double implementation for intermediate backward compatibility
            if fullparset.fullModuleName(
                    'parsetarg') or fullparset.fullModuleName('argument'):
                if fullparset.fullModuleName('parsetarg'):
                    stepparset = fullparset.makeSubset(
                        fullparset.fullModuleName('parsetarg') + '.')
                if fullparset.fullModuleName('argument'):
                    stepparset = fullparset.makeSubset(
                        fullparset.fullModuleName('argument') + '.')
                # *********************************************************************
                # save parsets
                # either a filename is given in the main parset
                # or files will be created from subsets with stepnames.parset as filenames
                # for name, parset in step_parset_dict.iteritems():
                try:
                    file_parset = Parset(stepparset.getString('parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    stepparset.remove('parset')
                except:
                    pass
                # parset from task.cfg
                try:
                    file_parset = Parset(
                        self.task_definitions.get(str(subparset['type']),
                                                  'parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                except:
                    pass
                # for parset in control section
                try:
                    file_parset = Parset(subparset.getString('parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    subparset.remove('parset')
                except:
                    pass
                step_parset = os.path.join(parset_dir, stepname + '.parset')
                stepparset.writeFile(step_parset)
                step_parset_files[stepname] = step_parset
                step_parset_obj[stepname] = stepparset

    def _replace_output_keyword(self, inoutdict, argsparset, resdicts):
        for k in argsparset.keys():
            keystring = argsparset.getString(k)
            if keystring.__contains__('.output.'):
                if keystring.__contains__(','):
                    keystring = keystring.rstrip(']')
                    keystring = keystring.lstrip('[')
                    vec = []
                    for item in keystring.split(','):
                        if item.__contains__('.output.'):
                            step, outvar = item.split('.output.')
                            vec.append(resdicts[step][outvar])
                        else:
                            vec.append(item)
                    inoutdict[k] = vec
                else:
                    step, outvar = argsparset.getString(k).split('.output.')
                    if '+' in outvar:
                        tmplist = str(outvar).split('+')
                        inoutdict[k] = resdicts[step][tmplist[0]] + tmplist[1]
                    else:
                        inoutdict[k] = resdicts[step][outvar]
            else:
                inoutdict[k] = argsparset.getString(k)

    def _construct_step_parset(self, argsparset, resdicts, filename, stepname):
        addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []}
        # hack for original order of args
        tmp_keys = argsparset.keys()
        ordered_keys = []
        for orig in self.parset.keys:
            for item in tmp_keys:
                if (stepname + '.') in orig and (
                        'argument.' + item in orig
                        and not 'argument.' + item + '.' in orig):
                    ordered_keys.append(item)
                    continue
        # \hack
        for k in ordered_keys:
            valuestring = argsparset.getString(k)
            if valuestring.__contains__('.output.'):
                if valuestring.__contains__(','):
                    valuestring = valuestring.rstrip(']')
                    valuestring = valuestring.lstrip('[')
                    vec = []
                    for item in valuestring.split(','):
                        if item.__contains__('.output.'):
                            step, outvar = item.split('.output.')
                            vec.append(resdicts[step][outvar])
                            if 'mapfile' in str(outvar):
                                addvals['inputkeys'].append(
                                    resdicts[step][outvar])
                                addvals['mapfiles_in'].append(
                                    resdicts[step][outvar])
                        else:
                            vec.append(item)
                    argsparset.replace(k, str(vec))
                    if k == 'flags':
                        addvals['arguments'] = vec
                        argsparset.remove(k)
                else:
                    step, outvar = argsparset.getString(k).split('.output.')
                    #more ugly hacks... really needs clearly structured replacement method...
                    if '+' in outvar:
                        tmplist = str(outvar).split('+')
                        argsparset.replace(
                            k,
                            str(resdicts[step][tmplist[0]]) + tmplist[1])
                    else:
                        argsparset.replace(k, str(resdicts[step][outvar]))
                    #if isinstance(resdicts[step][outvar], str):
                    if 'mapfile' in str(outvar):
                        addvals['inputkeys'].append(resdicts[step][outvar])
                        addvals['mapfiles_in'].append(resdicts[step][outvar])
                    if k == 'flags':
                        addvals['arguments'] = str(argsparset[k])
                        argsparset.remove(k)
            else:
                if k == 'flags':
                    addvals['arguments'] = str(argsparset[k])
                    argsparset.remove(k)

            #direct usage of outputkey
            if valuestring.__contains__('outputkey'):
                addvals['outputkey'] = 'outputkey'

        argsparset.writeFile(filename)
        return addvals

    def _get_parset_dicts(self):
        return {}

    def show_tasks(self):
        tasklist = []
        tasklist = self.task_definitions.sections()
        for item in tasklist:
            print item
        #return tasklist

    def show_task(self, task):
        task_parset = Parset()
        if self.task_definitions.has_option(task, 'parset'):
            task_parset.adoptFile(self.task_definitions.get(task, 'parset'))
            print 'possible arguments: key    =    value'
            for k in task_parset.keys:
                print '                   ', k, '    ', '=', '    ', task_parset[
                    k]

    def _add_step(self):
        steplist = []

    def _replace_values(self):
        replacedict = {}
        try:
            import imp
            plugin = imp.load_source('main', str(self.parset['prepare']))
            replacedict = plugin.main()
        except:
            pass
        for check in self.parset.keys:
            if str(check).startswith('!'):
                replacedict[str(check).lstrip('!').lstrip(' ')] = str(
                    self.parset[check])
        #print 'REPLACEDICT: ',replacedict
        for check in self.parset.keys:
            if not str(check).startswith('#'):
                for k, v in replacedict.iteritems():
                    if '{{ ' + k + ' }}' in str(self.parset[check]):
                        replacestring = str(self.parset[check]).replace(
                            '{{ ' + k + ' }}', v)
                        self.parset.replace(check, replacestring)
예제 #13
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This function contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # hack the planet
        #executable = 'casa'

        # Time execution of this job
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % infile)
            else:
                self.logger.error("Dataset %s does not exist" % infile)
                return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            #print 'KWARGS: ', kwargs
            if not parsetasfile:
                for k, v in kwargs.items():
                    args.append('--' + k + '=' + v)
            else:
                nodeparset = Parset()
                sublist = []
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                    if str(k).find('.'):
                        #print 'DOTPOS: ',str(k).find('.')
                        #print 'SPLIT: ', str(k).split('.')[0]
                        #print 'SPLIT: ', str(k).split('.')[1]
                        if not str(k).split('.')[0] in sublist:
                            sublist.append(str(k).split('.')[0])
                #print 'SUBPARSETLIST: ', sublist

                #subpar = Parset()
                #quick hacks below. for proof of concept.
                subparsetlist = []
                casastring = ''
                for sub in sublist:
                    subpar = nodeparset.makeSubset(
                        nodeparset.fullModuleName(sub) + '.')
                    #print 'SUBPAR: ',subpar.keys()
                    casastring = sub + '('
                    for k in subpar.keys():
                        #print 'SUBPARSET: ',k ,' ',subpar[k]
                        #args.append('--' + k + '=' + subpar[k])
                        if str(subpar[k]).find('/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]) + "'" + ','
                        elif str(subpar[k]).find('/casastr/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]).strip('/casastr/') + "'" + ','
                        else:
                            casastring += str(k) + '=' + str(subpar[k]) + ','
                    casastring = casastring.rstrip(',')
                    casastring += ')\n'
                #print 'CASASTRING:'
                #print casastring
                # 1) return code of a casapy is not properly recognized by the pipeline
                # wrapping in shellscript works for succesful runs.
                # failed runs seem to hang the pipeline...
                # 2) casapy can not have two instances running from the same directory.
                # create tmp dirs
                casapydir = tempfile.mkdtemp(dir=work_dir)
                if casastring != '':
                    casafilename = os.path.join(
                        work_dir,
                        os.path.basename(infile) + '.casacommand.py')
                    casacommandfile = open(casafilename, 'w')
                    casacommandfile.write('try:\n')
                    casacommandfile.write('    ' + casastring)
                    casacommandfile.write('except SystemExit:\n')
                    casacommandfile.write('    pass\n')
                    casacommandfile.write('except:\n')
                    casacommandfile.write('    import os\n')
                    casacommandfile.write('    os._exit(1)\n')
                    casacommandfile.close()
                    args.append(casafilename)
                somename = os.path.join(
                    work_dir,
                    os.path.basename(infile) + '.casashell.sh')
                commandstring = ''
                commandstring += executable
                for item in args:
                    commandstring += ' ' + item

                #print 'COMMANDSTRING: ',commandstring
                crap = open(somename, 'w')
                crap.write('#!/bin/bash \n')
                crap.write('echo "Trying CASAPY command" \n')
                #crap.write('/home/zam/sfroehli/casapy-42.1.29047-001-1-64b/bin/casa' + ' --nologger'+' -c ' + casafilename)
                crap.write(commandstring)
                #                 crap.write('\nexit 0')
                crap.close()

                import stat
                st = os.stat(somename)
                #os.chmod(casafilename, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
                os.chmod(
                    somename,
                    st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

            try:
                # ****************************************************************
                # Run
                #cmd = [executable] + args
                cmd = [somename]
                with CatchLog4CPlus(
                        casapydir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, casapydir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #14
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''):
        """
        This function contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            if infile[0] == '[':
                infiles = [ms.strip(" []\'\"") for ms in infile.split(',')]
                reffile = infiles[0]
            else:
                reffile = infile

            if os.path.exists(reffile):
                self.logger.info("Processing %s" % reffile)
            else:
                self.logger.error("Dataset %s does not exist" % reffile)
                return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if not parsetasfile:
                self.logger.error("Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!")
                return 1
            else:
                nodeparset = Parset()
                sublist = []
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                    if str(k).find('.'):
                        if not str(k).split('.')[0] in sublist:
                            sublist.append(str(k).split('.')[0])

                #quick hacks below. for proof of concept.
                casastring = ''
                for sub in sublist:
                    subpar = nodeparset.makeSubset(nodeparset.fullModuleName(sub) + '.')
                    casastring = sub + '('
                    for k in subpar.keys():
                        if str(subpar[k]).find('/') == 0:
                            casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ','
                        elif str(subpar[k]).find('casastr/') == 0:
                            casastring += str(k) + '=' + "'" + str(subpar[k]).strip('casastr/') + "'" + ','
                        elif str(subpar[k]).lower() == 'false' or str(subpar[k]).lower() == 'true':
                            casastring += str(k) + '=' + str(subpar[k]) + ','
                        else:
                            # Test if int/float or list of int/float
                            try:
                                self.logger.info('value: {}'.format(subpar[k]))
                                test = float(str(subpar[k]))
                                is_int_float = True
                            except:
                                is_int_float = False
                            if is_int_float:
                                casastring += str(k) + '=' + str(subpar[k]) + ','
                            else:
                                if '[' in str(subpar[k]) or '(' in str(subpar[k]):
                                    # Check if list of int/float or strings
                                    list_vals = [f.strip() for f in str(subpar[k]).strip('[]()').split(',')]
                                    is_int_float = True
                                    for list_val in list_vals:
                                        try:
                                            test = float(list_val)
                                        except:
                                            is_int_float = False
                                            break
                                    if is_int_float:
                                        casastring += str(k) + '=' + str(subpar[k]) + ','
                                    else:
                                        casastring += str(k) + '=' + '[{}]'.format(','.join(["'"+list_val+"'" for list_val in list_vals])) + ','
                                else:
                                    # Simple string
                                    casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ','

                    casastring = casastring.rstrip(',')
                    casastring += ')\n'

                # 1) return code of a casapy is not properly recognized by the pipeline
                # wrapping in shellscript works for succesful runs.
                # failed runs seem to hang the pipeline...
                # 2) casapy can not have two instances running from the same directory.
                # create tmp dirs
                casapydir = tempfile.mkdtemp(dir=work_dir)
                if casastring != '':
                    casafilename = os.path.join(work_dir, os.path.basename(reffile) + '.casacommand.py')
                    casacommandfile = open(casafilename, 'w')
                    casacommandfile.write(casastring)
                    casacommandfile.close()
                    args.append(casafilename)

                somename = os.path.join(work_dir, os.path.basename(reffile) + '.casashell.sh')
                commandstring = ''
                commandstring += executable
                for item in args:
                    if str(item).find(' ') > -1 or str(item).find('[') > -1:
                        commandstring += ' "' + item + '"'
                    else:
                        commandstring += ' ' + item

                crap = open(somename, 'w')
                crap.write('#!/bin/bash \n')
                crap.write('echo "Trying CASAPY command" \n')
                crap.write(commandstring + ' >& casa.log\n')
                crap.close()

                # file permissions
                st = os.stat(somename)
                os.chmod(somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

            try:
                # ****************************************************************
                # Run
                cmd = [somename]
                with CatchLog4CPlus(
                    casapydir,
                    self.logger.name + "." + os.path.basename(reffile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, casapydir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #15
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)
           # else:
           #     self.logger.error("Dataset %s does not exist" % infile)
           #     return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            # deal with multiple input files for wsclean
            if argsformat == 'wsclean':
                for i in reversed(xrange(len(args))):
                    if str(args[i]).startswith('[') and str(args[i]).endswith(']'):
                        tmplist = args.pop(i).lstrip('[').rstrip(']').split(',')
                        for val in reversed(tmplist):
                            args.insert(i, val.strip(' \'\"'))
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        if str(v).startswith('[') and str(v).endswith(']'):
                            v = v.lstrip('[').rstrip(']').replace(' ', '')
                            multargs = v.split(',')
                        else:
                            multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-'+ k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                if argsformat == 'losoto':
                    args.append(parsetname)
                else: 
                    args.insert(0,parsetname)

            try:
            # ****************************************************************
            # Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                    work_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, work_dir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1