예제 #1
0
파일: cimager.py 프로젝트: jjdmol/LOFAR
 def populate_cimager_parset(parset):
     input_parset = Parset(parset)
     patch_dictionary = {
         'Cimager.dataset': dataset,
         'Cimager.restore': restore
     }
     image_names = []
     for image_name in input_parset.getStringVector('Cimager.Images.Names'):
         image_names.append("%s_%s" % (image_name, name))
         subset = input_parset.makeSubset(
             "Cimager.Images.%s" % image_name,
             "Cimager.Images.%s" % image_names[-1]
         )
         patch_dictionary[
             "Cimager.Images.%s.frequency" % image_names[-1]
         ] = frequency
         patch_dictionary[
             "Cimager.Images.%s.direction" % image_names[-1]
         ] = "[ %s,%s,%s ]" % (ms_dir_ra, ms_dir_dec, ms_dir_type)
         for key in subset:
             patch_dictionary[key] = subset[key].get()
     input_parset.subtractSubset('Cimager.Images.image')
     for key in input_parset:
         patch_dictionary[key] = input_parset[key].get()
     patch_dictionary['Cimager.Images.Names'] = "[ %s ]" % ", ".join(image_names)
     return patch_parset(
         None, patch_dictionary,
         self.config.get("layout", "job_directory")
     )
예제 #2
0
 def show_task(self, task):
     task_parset = Parset()
     if self.task_definitions.has_option(task,'parset'):
         task_parset.adoptFile(self.task_definitions.get(task,'parset'))
         print 'possible arguments: key    =    value'
         for k in task_parset.keywords():
             print '                   ',k,'    ','=','    ',task_parset[k]
예제 #3
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Script %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
            # ****************************************************************
            # Run
                # Change to working directory for the script
                pipedir = os.getcwd()
                os.chdir(work_dir)
                outdict = {}
                plugin = imp.load_source('main', executable)
                outdict = plugin.main(*args, **kwargs)
                os.chdir(pipedir)

            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #4
0
 def __init__(self):
     control.__init__(self)
     self.parset = Parset()
     self.input_data = {}
     self.output_data = {}
     self.parset_feedback_file = None
     #self.logger = None#logging.RootLogger('DEBUG')
     self.name = ''
예제 #5
0
 def show_task(self, task):
     task_parset = Parset()
     if self.task_definitions.has_option(task, 'parset'):
         task_parset.adoptFile(self.task_definitions.get(task, 'parset'))
         print 'possible arguments: key    =    value'
         for k in task_parset.keys:
             print '                   ', k, '    ', '=', '    ', task_parset[
                 k]
예제 #6
0
 def _construct_steps(self, step_name_list, step_control_dict,
                      step_parset_files, step_parset_obj, parset_dir):
     step_list_copy = (copy.deepcopy(step_name_list))
     counter = 0
     while step_list_copy:
         counter -= 1
         stepname = step_list_copy.pop(-1)
         fullparset = self.parset.makeSubset(
             self.parset.fullModuleName(str(stepname)) + '.')
         subparset = fullparset.makeSubset(
             fullparset.fullModuleName('control') + '.')
         number = 0
         for item in step_list_copy:
             if item == stepname:
                 number += 1
         if number != 0:
             stepname += str(number)
         step_name_list[counter] = stepname
         step_control_dict[stepname] = subparset
         if fullparset.fullModuleName('argument'):
             stepparset = fullparset.makeSubset(
                 fullparset.fullModuleName('argument') + '.')
             # *********************************************************************
             # save parsets
             # either a filename is given in the main parset
             # or files will be created from subsets with stepnames.parset as filenames
             # for name, parset in step_parset_dict.iteritems():
             try:
                 file_parset = Parset(stepparset.getString('parset'))
                 for k in file_parset.keywords():
                     if not k in stepparset.keys():
                         stepparset.add(k, str(file_parset[k]))
                 stepparset.remove('parset')
             except:
                 pass
             # parset from task.cfg
             try:
                 file_parset = Parset(
                     self.task_definitions.get(str(subparset['type']),
                                               'parset'))
                 for k in file_parset.keywords():
                     if not k in stepparset.keys():
                         stepparset.add(k, str(file_parset[k]))
             except:
                 pass
             # for parset in control section
             try:
                 file_parset = Parset(subparset.getString('parset'))
                 for k in file_parset.keywords():
                     if not k in stepparset.keys():
                         stepparset.add(k, str(file_parset[k]))
                 subparset.remove('parset')
             except:
                 pass
             step_parset = os.path.join(parset_dir, stepname + '.parset')
             stepparset.writeFile(step_parset)
             step_parset_files[stepname] = step_parset
             step_parset_obj[stepname] = stepparset
예제 #7
0
def _create_mapfile_from_parset(parset, identifier):
    pars = Parset()
    pars.adoptFile(parset)
    dps = pars.makeSubset(
        pars.fullModuleName('DataProducts') + '.'
    )
    datamap = DataMap([
        tuple(os.path.join(location, filename).split(':')) + (skip,)
        for location, filename, skip in zip(
            dps.getStringVector(identifier + '.locations'),
            dps.getStringVector(identifier + '.filenames'),
            dps.getBoolVector(identifier + '.skip'))
    ])
    return datamap
예제 #8
0
    def _create_mask(self, npix, cell_size, output_image,
                     concatenated_measurement_set, executable,
                     working_directory, log4_cplus_name, sourcedb_path,
                     mask_patch_size, image_path_directory):
        """
        (3) create a casa image containing an mask blocking out the
        sources in the provided sourcedb.
        
        It expects:
        
        a. the ms for which the mask will be created, it is used to de
           termine some image details: (eg. pointing)
        b. parameters for running within the catchsegfault framework
        c. and the size of the mask_pach.
           To create a mask, first a empty measurement set is created using
           awimager: ready to be filled with mask data 
           
        This function is a wrapper around some functionality written by:
        [email protected]
        
        steps: 
        1. Create a parset with image paramters used by:
        2. awimager run. Creating an empty casa image.
        3. Fill the casa image with mask data
           
        """
        # ********************************************************************
        # 1. Create the parset used to make a mask
        mask_file_path = output_image + ".mask"

        mask_patch_dictionary = {
            "npix": str(npix),
            "cellsize": str(cell_size),
            "image": str(mask_file_path),
            "ms": str(concatenated_measurement_set),
            "operation": "empty",
            "stokes": "'I'"
        }
        mask_parset = Parset.fromDict(mask_patch_dictionary)
        mask_parset_path = os.path.join(image_path_directory, "mask.par")
        mask_parset.writeFile(mask_parset_path)
        self.logger.debug(
            "Write parset for awimager mask creation: {0}".format(
                mask_parset_path))

        # *********************************************************************
        # 2. Create an empty mask using awimager
        cmd = [executable, mask_parset_path]
        self.logger.info(" ".join(cmd))
        try:
            with CatchLog4CPlus(
                    working_directory,
                    self.logger.name + "." + os.path.basename(log4_cplus_name),
                    os.path.basename(executable)) as logger:
                catch_segfaults(cmd, working_directory, self.environment,
                                logger)
        # Thrown by catch_segfault
        except CalledProcessError, exception:
            self.logger.error(str(exception))
            return 1
예제 #9
0
    def _create_mask(self, npix, cell_size, output_image,
                     concatenated_measurement_set, executable,
                     working_directory, log4_cplus_name, sourcedb_path,
                     mask_patch_size, image_path_directory):
        """
        (3) create a casa image containing an mask blocking out the
        sources in the provided sourcedb.
        
        It expects:
        
        a. the ms for which the mask will be created, it is used to de
           termine some image details: (eg. pointing)
        b. parameters for running within the catchsegfault framework
        c. and the size of the mask_pach.
           To create a mask, first a empty measurement set is created using
           awimager: ready to be filled with mask data 
           
        This function is a wrapper around some functionality written by:
        [email protected]
        
        steps: 
        1. Create a parset with image paramters used by:
        2. awimager run. Creating an empty casa image.
        3. Fill the casa image with mask data
           
        """
        # ********************************************************************
        # 1. Create the parset used to make a mask
        mask_file_path = output_image + ".mask"

        mask_patch_dictionary = {"npix": str(npix),
                                 "cellsize": str(cell_size),
                                 "image": str(mask_file_path),
                                 "ms": str(concatenated_measurement_set),
                                 "operation": "empty",
                                 "stokes": "'I'"
                                 }
        mask_parset = Parset.fromDict(mask_patch_dictionary)
        mask_parset_path = os.path.join(image_path_directory, "mask.par")
        mask_parset.writeFile(mask_parset_path)
        self.logger.debug(
                "Write parset for awimager mask creation: {0}".format(
                                                      mask_parset_path))

        # *********************************************************************
        # 2. Create an empty mask using awimager
        cmd = [executable, mask_parset_path]
        self.logger.info(" ".join(cmd))
        try:
            with CatchLog4CPlus(working_directory,
                    self.logger.name + "." + os.path.basename(log4_cplus_name),
                    os.path.basename(executable)
            ) as logger:
                catch_segfaults(cmd, working_directory, self.environment,
                                        logger)
        # Thrown by catch_segfault
        except CalledProcessError, exception:
            self.logger.error(str(exception))
            return 1
예제 #10
0
 def __init__(self):
     control.__init__(self)
     self.parset = Parset()
     self.input_data = {}
     self.output_data = {}
     self.parset_feedback_file = None
     #self.logger = None#logging.RootLogger('DEBUG')
     self.name = ''
예제 #11
0
파일: cimager.py 프로젝트: mfkiwl/lofar-1
 def populate_cimager_parset(parset):
     input_parset = Parset(parset)
     patch_dictionary = {
         'Cimager.dataset': dataset,
         'Cimager.restore': restore
     }
     image_names = []
     for image_name in input_parset.getStringVector(
             'Cimager.Images.Names'):
         image_names.append("%s_%s" % (image_name, name))
         subset = input_parset.makeSubset(
             "Cimager.Images.%s" % image_name,
             "Cimager.Images.%s" % image_names[-1])
         patch_dictionary["Cimager.Images.%s.frequency" %
                          image_names[-1]] = frequency
         patch_dictionary["Cimager.Images.%s.direction" %
                          image_names[-1]] = "[ %s,%s,%s ]" % (
                              ms_dir_ra, ms_dir_dec, ms_dir_type)
         for key in subset:
             patch_dictionary[key] = subset[key].get()
     input_parset.subtractSubset('Cimager.Images.image')
     for key in input_parset:
         patch_dictionary[key] = input_parset[key].get()
     patch_dictionary['Cimager.Images.Names'] = "[ %s ]" % ", ".join(
         image_names)
     return patch_parset(None, patch_dictionary,
                         self.config.get("layout", "job_directory"))
예제 #12
0
def gvds_iterator(gvds_file, nproc=4):
    """
    Reads a GVDS file.

    Provides a generator, which successively returns the contents of the GVDS
    file in the form (host, filename), in chunks suitable for processing
    across the cluster. Ie, no more than nproc files per host at a time.
    """
    parset = Parset(gvds_file)

    data = defaultdict(list)
    for part in range(parset.getInt('NParts')):
        host = parset.getString("Part%d.FileSys" % part).split(":")[0]
        file = parset.getString("Part%d.FileName" % part)
        vds  = parset.getString("Part%d.Name" % part)
        data[host].append((file, vds))

    for host, values in data.iteritems():
        data[host] = utilities.group_iterable(values, nproc)

    while True:
        yieldable = []
        for host, values in data.iteritems():
            try:
                for filename, vds in values.next():
                    yieldable.append((host, filename, vds))
            except StopIteration:
                pass
        if len(yieldable) == 0:
            raise StopIteration
        else:
            yield yieldable
예제 #13
0
    def go(self):
        self.logger.info("Starting storagemapper run")
        super(storagemapper, self).go()

        #                          We read the storage node name out of the path
        #     and append the local filename (ie, on the storage node) to the map
        # ----------------------------------------------------------------------
        data = defaultdict(list)
        for filename in self.inputs['args']:
            host = filename.split(os.path.sep)[3]
            data[host].append(filename.split(host)[-1])

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.iteritems():
            parset.addStringVector(host, filenames)

        create_directory(os.path.dirname(self.inputs['mapfile']))
        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
예제 #14
0
def _create_mapfile_from_parset(parset, identifier):
    pars = Parset()
    pars.adoptFile(parset)
    dps = pars.makeSubset(pars.fullModuleName('DataProducts') + '.')
    datamap = DataMap([
        tuple(os.path.join(location, filename).split(':')) + (skip, )
        for location, filename, skip in zip(
            dps.getStringVector(identifier + '.locations'),
            dps.getStringVector(identifier + '.filenames'),
            dps.getBoolVector(identifier + '.skip'))
    ])
    return datamap
예제 #15
0
 def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir):
     step_list_copy = (copy.deepcopy(step_name_list))
     counter = 0
     while step_list_copy:
         counter -= 1
         stepname = step_list_copy.pop(-1)
         fullparset = self.parset.makeSubset(self.parset.fullModuleName(str(stepname)) + '.')
         subparset = fullparset.makeSubset(fullparset.fullModuleName('control') + '.')
         number = 0
         for item in step_list_copy:
             if item == stepname:
                 number += 1
         if number != 0:
             stepname += str(number)
         step_name_list[counter] = stepname
         step_control_dict[stepname] = subparset
         if fullparset.fullModuleName('argument'):
             stepparset = fullparset.makeSubset(fullparset.fullModuleName('argument') + '.')
             # *********************************************************************
             # save parsets
             # either a filename is given in the main parset
             # or files will be created from subsets with stepnames.parset as filenames
             # for name, parset in step_parset_dict.iteritems():
             try:
                 file_parset = Parset(stepparset.getString('parset'))
                 for k in file_parset.keywords():
                     if not k in stepparset.keys():
                         stepparset.add(k, str(file_parset[k]))
                 stepparset.remove('parset')
             except:
                 pass
             # parset from task.cfg
             try:
                 file_parset = Parset(self.task_definitions.get(str(subparset['type']), 'parset'))
                 for k in file_parset.keywords():
                     if not k in stepparset.keys():
                         stepparset.add(k, str(file_parset[k]))
             except:
                 pass
             # for parset in control section
             try:
                 file_parset = Parset(subparset.getString('parset'))
                 for k in file_parset.keywords():
                     if not k in stepparset.keys():
                         stepparset.add(k, str(file_parset[k]))
                 subparset.remove('parset')
             except:
                 pass
             step_parset = os.path.join(parset_dir, stepname + '.parset')
             stepparset.writeFile(step_parset)
             step_parset_files[stepname] = step_parset
             step_parset_obj[stepname] = stepparset
예제 #16
0
def get_current_op_step_names(direction):
    """
    Returns lits of step names for current operation
    """
    current_op = get_current_op(direction)
    parset_file = os.path.join(direction.working_dir, 'results', current_op,
                               direction.name, 'pipeline.parset')
    parset = Parset()
    parset.adoptFile(parset_file)
    pipeline_args = parset.makeSubset(parset.fullModuleName('pipeline') + '.')
    step_name_list = pipeline_args.getStringVector('steps')

    # Filter out plugin steps
    filter_step_name_list = []
    for stepname in step_name_list:
        fullparset = parset.makeSubset(
            parset.fullModuleName(str(stepname)) + '.')
        subparset = fullparset.makeSubset(
            fullparset.fullModuleName('control') + '.')
        try:
            kind_of_step = subparset.getString('kind')
        except:
            kind_of_step = 'recipe'
        if kind_of_step != 'plugin':
            if kind_of_step == 'loop':
                loopsteps = subparset.getStringVector('loopsteps')
                for loopstep in loopsteps:
                    fullparset_loop = parset.makeSubset(
                        parset.fullModuleName(str(loopstep)) + '.')
                    subparset_loop = fullparset_loop.makeSubset(
                        fullparset_loop.fullModuleName('control') + '.')
                    try:
                        kind_of_loop_step = subparset_loop.getString('kind')
                    except:
                        kind_of_loop_step = 'recipe'
                    if kind_of_loop_step != 'plugin':
                        filter_step_name_list.append(loopstep)
            else:
                filter_step_name_list.append(stepname)

    return filter_step_name_list
예제 #17
0
    def go(self):
        self.logger.info("Starting storagemapper run")
        super(storagemapper, self).go()

        #                          We read the storage node name out of the path
        #     and append the local filename (ie, on the storage node) to the map
        # ----------------------------------------------------------------------
        data = defaultdict(list)
        for filename in self.inputs['args']:
            host = filename.split(os.path.sep)[3]
            data[host].append(filename.split(host)[-1])

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.iteritems():
            parset.addStringVector(host, filenames)

        create_directory(os.path.dirname(self.inputs['mapfile']))
        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
예제 #18
0
    def go(self):
        self.logger.info("Starting datamapper run")
        super(datamapper, self).go()

        #      We build lists of compute-nodes per cluster and data-per-cluster,
        #          then match them up to schedule jobs in a round-robin fashion.
        # ----------------------------------------------------------------------
        clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc"))
        if clusterdesc.subclusters:
            available_nodes = dict((cl.name, cycle(get_compute_nodes(cl)))
                                   for cl in clusterdesc.subclusters)
        else:
            available_nodes = {
                clusterdesc.name: cycle(get_compute_nodes(clusterdesc))
            }

        data = defaultdict(list)
        for filename in self.inputs['args']:
            subcluster = filename.split(os.path.sep)[2]
            try:
                host = next(available_nodes[subcluster])
            except KeyError as key:
                self.logger.error("%s is not a known cluster" % str(key))
                raise

            data[host].append(filename)

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.items():
            parset.addStringVector(host, filenames)

        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
예제 #19
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=True,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        self.msout_original = kwargs['msout'].rstrip('/')
        kwargs.pop('msout')
        self.msout_destination_dir = os.path.dirname(self.msout_original)
        self.scratch_dir = tempfile.mkdtemp(dir=kwargs['local_scratch_dir'])
        kwargs.pop('local_scratch_dir')
        self.logger.info('Using {} as scratch directory'.format(
            self.scratch_dir))

        # Set up scratch paths
        self.msout_scratch = os.path.join(
            self.scratch_dir, os.path.basename(self.msout_original))
        args.append('msout=' + self.msout_scratch)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-' + k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
                # ****************************************************************
                #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                        work_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, work_dir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                self.cleanup()
                return 1
            except Exception, err:
                self.logger.error(str(err))
                self.cleanup()
                return 1
예제 #20
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''):
        """
        This function contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            if infile[0] == '[':
                infiles = [ms.strip(" []\'\"") for ms in infile.split(',')]
                reffile = infiles[0]
            else:
                reffile = infile

            if os.path.exists(reffile):
                self.logger.info("Processing %s" % reffile)
            else:
                self.logger.error("Dataset %s does not exist" % reffile)
                return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if not parsetasfile:
                self.logger.error("Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!")
                return 1
            else:
                nodeparset = Parset()
                sublist = []
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                    if str(k).find('.'):
                        if not str(k).split('.')[0] in sublist:
                            sublist.append(str(k).split('.')[0])

                #quick hacks below. for proof of concept.
                casastring = ''
                for sub in sublist:
                    subpar = nodeparset.makeSubset(nodeparset.fullModuleName(sub) + '.')
                    casastring = sub + '('
                    for k in subpar.keys():
                        if str(subpar[k]).find('/') == 0:
                            casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ','
                        elif str(subpar[k]).find('casastr/') == 0:
                            casastring += str(k) + '=' + "'" + str(subpar[k]).strip('casastr/') + "'" + ','
                        elif str(subpar[k]).lower() == 'false' or str(subpar[k]).lower() == 'true':
                            casastring += str(k) + '=' + str(subpar[k]) + ','
                        else:
                            # Test if int/float or list of int/float
                            try:
                                self.logger.info('value: {}'.format(subpar[k]))
                                test = float(str(subpar[k]))
                                is_int_float = True
                            except:
                                is_int_float = False
                            if is_int_float:
                                casastring += str(k) + '=' + str(subpar[k]) + ','
                            else:
                                if '[' in str(subpar[k]) or '(' in str(subpar[k]):
                                    # Check if list of int/float or strings
                                    list_vals = [f.strip() for f in str(subpar[k]).strip('[]()').split(',')]
                                    is_int_float = True
                                    for list_val in list_vals:
                                        try:
                                            test = float(list_val)
                                        except:
                                            is_int_float = False
                                            break
                                    if is_int_float:
                                        casastring += str(k) + '=' + str(subpar[k]) + ','
                                    else:
                                        casastring += str(k) + '=' + '[{}]'.format(','.join(["'"+list_val+"'" for list_val in list_vals])) + ','
                                else:
                                    # Simple string
                                    casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ','

                    casastring = casastring.rstrip(',')
                    casastring += ')\n'

                # 1) return code of a casapy is not properly recognized by the pipeline
                # wrapping in shellscript works for succesful runs.
                # failed runs seem to hang the pipeline...
                # 2) casapy can not have two instances running from the same directory.
                # create tmp dirs
                casapydir = tempfile.mkdtemp(dir=work_dir)
                if casastring != '':
                    casafilename = os.path.join(work_dir, os.path.basename(reffile) + '.casacommand.py')
                    casacommandfile = open(casafilename, 'w')
                    casacommandfile.write(casastring)
                    casacommandfile.close()
                    args.append(casafilename)

                somename = os.path.join(work_dir, os.path.basename(reffile) + '.casashell.sh')
                commandstring = ''
                commandstring += executable
                for item in args:
                    if str(item).find(' ') > -1 or str(item).find('[') > -1:
                        commandstring += ' "' + item + '"'
                    else:
                        commandstring += ' ' + item

                crap = open(somename, 'w')
                crap.write('#!/bin/bash \n')
                crap.write('echo "Trying CASAPY command" \n')
                crap.write(commandstring + ' >& casa.log\n')
                crap.close()

                # file permissions
                st = os.stat(somename)
                os.chmod(somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

            try:
                # ****************************************************************
                # Run
                cmd = [somename]
                with CatchLog4CPlus(
                    casapydir,
                    self.logger.name + "." + os.path.basename(reffile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, casapydir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #21
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        if 'replace-sourcedb' in kwargs:
            self.replace_sourcedb = kwargs['replace-sourcedb']
            kwargs.pop('replace-sourcedb')
        if 'replace-parmdb' in kwargs:
            self.replace_parmdb = kwargs['replace-parmdb']
            kwargs.pop('replace-parmdb')
        if 'dry-run' in kwargs:
            self.dry_run = kwargs['dry-run']
            kwargs.pop('dry-run')
        if 'sourcedb' in kwargs:
            self.sourcedb = kwargs['sourcedb']
            kwargs.pop('sourcedb')
        if 'parmdb' in kwargs:
            self.parmdb = kwargs['parmdb']
            kwargs.pop('parmdb')
        if 'sourcedb-name' in kwargs:
            self.sourcedb_basename = kwargs['sourcedb-name']
            self.replace_sourcedb = True
            kwargs.pop('sourcedb-name')
        if 'parmdb-name' in kwargs:
            self.parmdb_basename = kwargs['parmdb-name']
            self.replace_parmdb = True
            kwargs.pop('parmdb-name')
        if 'force' in kwargs:
            self.replace_parmdb = True
            self.replace_sourcedb = True
            kwargs.pop('force')
        numthreads = 1
        if 'numthreads' in kwargs:
            numthreads = kwargs['numthreads']
            kwargs.pop('numthreads')
        args.append('--numthreads='+str(numthreads))
        if 'observation' in kwargs:
            self.observation = kwargs.pop('observation')
        if 'catalog' in kwargs:
            self.catalog = kwargs.pop('catalog')

        self.createsourcedb()
        self.createparmdb()
        if not 'no-columns' in kwargs:
            #if not kwargs['no-columns']:
            self.addcolumns()
        else:
            kwargs.pop('no-columns')

        args.append('--sourcedb=' + self.sourcedb_path)
        args.append('--parmdb=' + self.parmdb_path)

        args.append(self.observation)
        #catalog = None


        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                #args.insert(0, parsetname)
                args.append(parsetname)

            #if catalog is not None:
            #    args.append(catalog)

            try:
            # ****************************************************************
            #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                    work_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, work_dir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
def plugin_main(*args, **kwargs):
    parset = Parset(kwargs['first_parset'])
    parset.adoptFile(kwargs['second_parset'])
    parset.writeFile(kwargs['result_parset'] + '_feedback_file')
예제 #23
0
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')
        pipeline_steps = self.parset.makeSubset(
            self.parset.fullModuleName('steps') + '.')
        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        # construct the step name list if there were pipeline.steps.<subset>
        for item in pipeline_steps.keys():
            if item in step_name_list:
                loc = step_name_list.index(item)
                step_name_list[loc:loc] = pipeline_steps.getStringVector(item)
                step_name_list.remove(item)

        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        input_dictionary = {
            'parset': parset_file,
            'parsetobj': self.parset,
            'parset_dir': parset_dir,
            'mapfile_dir': mapfile_dir}

        resultdicts = {}
        for section in self.config.sections():
            tmp_dict = {}
            for entry in self.config.items(section):
                input_dictionary[entry[0]] = entry[1]
                tmp_dict[entry[0]] = entry[1]
            resultdicts.update({section: copy.deepcopy(tmp_dict)})

        resultdicts.update({'input': input_dictionary})
        resultdicts.update({self.name: input_dictionary})

        if 'pipeline.mapfile' in self.parset.keywords():
            resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            self.logger.info("Beginning step %s" % (stepname,))
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            adds = None
            if stepname in step_parset_obj:
                adds = self._construct_step_parset(inputdict,
                                             step_parset_obj[stepname],
                                             resultdicts,
                                             step_parset_files[stepname],
                                             stepname)
            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval, 'recipe') == 'executable_args':
                    inputdict['stepname'] = stepname
                    if adds:
                        inputdict.update(adds)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]


            self._construct_input(inputdict, step, resultdicts)
            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keywords():
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: {
                    'parset': typeval,
                    'mapfile': submapfile,
                }})
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keywords():
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in self._keys(subpipeline_parset):
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in self._keys(subpipeline_parset):
                    val = subpipeline_parset[k]
                    if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'):
                        for item in checklist:
                            if item+".output" in str(val):
                                val = str(val).replace(item, stepname + '-' + item)

                        self.parset.add(stepname + '-' + k, str(val))
                    else:
                        # remove replacements strings to prevent loading the same key twice
                        if k in self._keys(self.parset):
                            self.parset.remove(k)
                        self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self._keys(self.parset):
                        if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip():
                            self.parset.remove(k)
                            self.parset.add('! ' + item, str(step_parset_obj[stepname][item]))
                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)


            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(
                        typeval,
                        inputargs,
                        **inputdict
                    )

            # plugins
            if kind_of_step == 'plugin':
                bla = str(self.config.get('DEFAULT', 'recipe_directories'))
                pluginpath = bla.rstrip(']').lstrip('[').split(',')
                for i, item in enumerate(pluginpath):
                    pluginpath[i] = os.path.join(item, 'plugins')
                if 'pluginpath' in pipeline_args.keys():
                    pluginpath.append(pipeline_args.getString('pluginpath'))
                with duration(self, stepname):
                    resultdict = loader.call_plugin(typeval, pluginpath,
                                                    inputargs,
                                                    **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict:
                resultdicts[activeloop[0]]['break'] = resultdict['break']
예제 #24
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)
           # else:
           #     self.logger.error("Dataset %s does not exist" % infile)
           #     return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            # deal with multiple input files for wsclean
            if argsformat == 'wsclean':
                for i in reversed(xrange(len(args))):
                    if str(args[i]).startswith('[') and str(args[i]).endswith(']'):
                        tmplist = args.pop(i).lstrip('[').rstrip(']').split(',')
                        for val in reversed(tmplist):
                            args.insert(i, val.strip(' \'\"'))
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        if str(v).startswith('[') and str(v).endswith(']'):
                            v = v.lstrip('[').rstrip(']').replace(' ', '')
                            multargs = v.split(',')
                        else:
                            multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-'+ k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                if argsformat == 'losoto':
                    args.append(parsetname)
                else: 
                    args.insert(0,parsetname)

            try:
            # ****************************************************************
            # Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                    work_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, work_dir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #25
0
    def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time):
        #       imager_exec:                          path to cimager executable
        #               vds:           VDS file describing the data to be imaged
        #            parset:                                imager configuration
        #        resultsdir:                         place resulting images here
        #        start_time:                        )    time range to be imaged
        #          end_time:                        )   in seconds (may be None)
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            self.logger.info("Processing %s" % (vds, ))

            #    Bail out if destination exists (can thus resume a partial run).
            #                                            Should be configurable?
            # ------------------------------------------------------------------
            parset_data = Parset(parset)
            image_names = parset_data.getStringVector("Cimager.Images.Names")
            for image_name in image_names:
                outputfile = os.path.join(resultsdir, image_name + ".restored")
                self.logger.info(outputfile)
                if os.path.exists(outputfile):
                    self.logger.info("Image already exists: aborting.")
                    return 0
            try:
                working_dir = mkdtemp(suffix=".%s" %
                                      (os.path.basename(__file__), ))

                #   If a time range has been specified, copy that section of the
                #                                  input MS and only image that.
                # --------------------------------------------------------------
                query = []
                if start_time:
                    self.logger.debug("Start time is %s" % start_time)
                    start_time = quantity(float(start_time), 's')
                    query.append("TIME > %f" % start_time.get('s').get_value())
                if end_time:
                    self.logger.debug("End time is %s" % end_time)
                    end_time = quantity(float(end_time), 's')
                    query.append("TIME < %f" % end_time.get('s').get_value())
                query = " AND ".join(query)
                if query:
                    #                             Select relevant section of MS.
                    # ----------------------------------------------------------
                    self.logger.debug("Query is %s" % query)
                    output = os.path.join(working_dir, "timeslice.MS")
                    vds_parset = get_parset(vds)
                    t = table(vds_parset.getString("FileName"))
                    t.query(query, name=output)
                    #       Patch updated information into imager configuration.
                    # ----------------------------------------------------------
                    parset = patch_parset(parset, {'Cimager.dataset': output})
                else:
                    self.logger.debug("No time range selected")

                self.logger.debug("Running cimager")
                with CatchLog4CXX(
                        working_dir,
                        self.logger.name + "." + os.path.basename(vds)):
                    cimager_process = Popen([imager_exec, "-inputs", parset],
                                            stdout=PIPE,
                                            stderr=PIPE,
                                            cwd=working_dir)
                    sout, serr = cimager_process.communicate()
                log_process_output("cimager", sout, serr, self.logger)
                if cimager_process.returncode != 0:
                    raise CalledProcessError(cimager_process.returncode,
                                             imager_exec)

                #        Dump the resulting images in the pipeline results area.
                #    I'm not aware of a foolproof way to predict the image names
                #                that will be produced, so we read them from the
                #                      parset and add standard cimager prefixes.
                # --------------------------------------------------------------
                parset_data = Parset(parset)
                image_names = parset_data.getStringVector(
                    "Cimager.Images.Names")
                prefixes = [
                    "image", "psf", "residual", "weights", "sensitivity"
                ]
                self.logger.debug("Copying images to %s" % resultsdir)
                for image_name in image_names:
                    for prefix in prefixes:
                        filename = image_name.replace("image", prefix, 1)
                        shutil.move(os.path.join(working_dir, filename),
                                    os.path.join(resultsdir, filename))
                    if parset_data.getBool('Cimager.restore'):
                        shutil.move(
                            os.path.join(working_dir,
                                         image_name + ".restored"),
                            os.path.join(resultsdir, image_name + ".restored"))
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
예제 #26
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}

        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        prefix = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(prefix, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(prefix, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        prefix,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if not self.inputs['inputkeys'] and self.inputs['inputkey']:
            self.inputs['inputkeys'].append(self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              len(self.inputs['inputkeys']), len(inputmapfiles))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap in zip(self.inputs['inputkeys'], inputmapfiles):
                filedict[key] = []
                for inp in filemap:
                    filedict[key].append(inp.file)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            #if keylist:
                #for name, value in zip(keylist, inputlist):
            if filedict:
                for name, value in filedict.iteritems():
                    if arglist_copy and name in arglist_copy:
                        ind = arglist_copy.index(name)
                        arglist_copy[ind] = value[i]
                    elif name in parsetdict_copy.values():
                        for k, v in parsetdict_copy.iteritems():
                            if v == name:
                                parsetdict_copy[k] = value[i]
                    else:
                        parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        prefix,
                        self.inputs['parsetasfile'],
                        args_format,
                        #self.inputs['working_directory'],
                        self.environment
                    ]
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
            for k, v in job.results.items():
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        for k, v in jobresultdict.items():
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
예제 #27
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This function contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # hack the planet
        #executable = 'casa'

        # Time execution of this job
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % infile)
            else:
                self.logger.error("Dataset %s does not exist" % infile)
                return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            #print 'KWARGS: ', kwargs
            if not parsetasfile:
                for k, v in kwargs.items():
                    args.append('--' + k + '=' + v)
            else:
                nodeparset = Parset()
                sublist = []
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                    if str(k).find('.'):
                        #print 'DOTPOS: ',str(k).find('.')
                        #print 'SPLIT: ', str(k).split('.')[0]
                        #print 'SPLIT: ', str(k).split('.')[1]
                        if not str(k).split('.')[0] in sublist:
                            sublist.append(str(k).split('.')[0])
                #print 'SUBPARSETLIST: ', sublist

                #subpar = Parset()
                #quick hacks below. for proof of concept.
                subparsetlist = []
                casastring = ''
                for sub in sublist:
                    subpar = nodeparset.makeSubset(
                        nodeparset.fullModuleName(sub) + '.')
                    #print 'SUBPAR: ',subpar.keys()
                    casastring = sub + '('
                    for k in subpar.keys():
                        #print 'SUBPARSET: ',k ,' ',subpar[k]
                        #args.append('--' + k + '=' + subpar[k])
                        if str(subpar[k]).find('/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]) + "'" + ','
                        elif str(subpar[k]).find('/casastr/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]).strip('/casastr/') + "'" + ','
                        else:
                            casastring += str(k) + '=' + str(subpar[k]) + ','
                    casastring = casastring.rstrip(',')
                    casastring += ')\n'
                #print 'CASASTRING:'
                #print casastring
                # 1) return code of a casapy is not properly recognized by the pipeline
                # wrapping in shellscript works for succesful runs.
                # failed runs seem to hang the pipeline...
                # 2) casapy can not have two instances running from the same directory.
                # create tmp dirs
                casapydir = tempfile.mkdtemp(dir=work_dir)
                if casastring != '':
                    casafilename = os.path.join(
                        work_dir,
                        os.path.basename(infile) + '.casacommand.py')
                    casacommandfile = open(casafilename, 'w')
                    casacommandfile.write('try:\n')
                    casacommandfile.write('    ' + casastring)
                    casacommandfile.write('except SystemExit:\n')
                    casacommandfile.write('    pass\n')
                    casacommandfile.write('except:\n')
                    casacommandfile.write('    import os\n')
                    casacommandfile.write('    os._exit(1)\n')
                    casacommandfile.close()
                    args.append(casafilename)
                somename = os.path.join(
                    work_dir,
                    os.path.basename(infile) + '.casashell.sh')
                commandstring = ''
                commandstring += executable
                for item in args:
                    commandstring += ' ' + item

                #print 'COMMANDSTRING: ',commandstring
                crap = open(somename, 'w')
                crap.write('#!/bin/bash \n')
                crap.write('echo "Trying CASAPY command" \n')
                #crap.write('/home/zam/sfroehli/casapy-42.1.29047-001-1-64b/bin/casa' + ' --nologger'+' -c ' + casafilename)
                crap.write(commandstring)
                #                 crap.write('\nexit 0')
                crap.close()

                import stat
                st = os.stat(somename)
                #os.chmod(casafilename, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
                os.chmod(
                    somename,
                    st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

            try:
                # ****************************************************************
                # Run
                #cmd = [executable] + args
                cmd = [somename]
                with CatchLog4CPlus(
                        casapydir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, casapydir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #28
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=True,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        if 'replace-sourcedb' in kwargs:
            self.replace_sourcedb = kwargs['replace-sourcedb']
            kwargs.pop('replace-sourcedb')
        if 'replace-parmdb' in kwargs:
            self.replace_parmdb = kwargs['replace-parmdb']
            kwargs.pop('replace-parmdb')
        if 'dry-run' in kwargs:
            self.dry_run = kwargs['dry-run']
            kwargs.pop('dry-run')
        if 'sourcedb' in kwargs:
            self.sourcedb = kwargs['sourcedb']
            kwargs.pop('sourcedb')
        if 'parmdb' in kwargs:
            self.parmdb = kwargs['parmdb']
            kwargs.pop('parmdb')
        if 'sourcedb-name' in kwargs:
            self.sourcedb_basename = kwargs['sourcedb-name']
            self.replace_sourcedb = True
            kwargs.pop('sourcedb-name')
        if 'parmdb-name' in kwargs:
            self.parmdb_basename = kwargs['parmdb-name']
            self.replace_parmdb = True
            kwargs.pop('parmdb-name')
        if 'force' in kwargs:
            self.replace_parmdb = True
            self.replace_sourcedb = True
            kwargs.pop('force')
        numthreads = 1
        if 'numthreads' in kwargs:
            numthreads = kwargs['numthreads']
            kwargs.pop('numthreads')
        args.append('--numthreads=' + str(numthreads))
        if 'observation' in kwargs:
            self.observation = kwargs.pop('observation')
        if 'catalog' in kwargs:
            self.catalog = kwargs.pop('catalog')

        self.createsourcedb()
        self.createparmdb()
        if not 'no-columns' in kwargs:
            #if not kwargs['no-columns']:
            self.addcolumns()
        else:
            kwargs.pop('no-columns')

        args.append('--sourcedb=' + self.sourcedb_path)
        args.append('--parmdb=' + self.parmdb_path)

        args.append(self.observation)
        #catalog = None

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in list(kwargs.items()):
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                #args.insert(0, parsetname)
                args.append(parsetname)

            #if catalog is not None:
            #    args.append(catalog)

            try:
                # ****************************************************************
                #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                        work_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, work_dir, self.environment, logger)
            except CalledProcessError as err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception as err:
                self.logger.error(str(err))
                return 1
        # We need some signal to the master script that the script ran ok.
        self.outputs['ok'] = True
        return 0
예제 #29
0
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')

        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict,
                              step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        resultdicts = {
            'input': {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        }

        resultdicts.update({
            self.name: {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        })

        if 'pipeline.mapfile' in self.parset.keys:
            resultdicts['input']['mapfile'] = str(
                self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(
                self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            #self._construct_cmdline(inputargs, step, resultdicts)

            additional_input = {}

            if stepname in step_parset_obj:
                additional_input = self._construct_step_parset(
                    step_parset_obj[stepname], resultdicts,
                    step_parset_files[stepname], stepname)

            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval,
                                             'recipe') == 'executable_args':
                    inputdict = {'stepname': stepname}
                    inputdict.update(additional_input)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]

            self._construct_input(inputdict, step, resultdicts)

            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(
                    ' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector(
                    'pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keys:
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({
                    os.path.splitext(os.path.basename(typeval))[0]: {
                        'parset': typeval,
                        'mapfile': submapfile,
                    }
                })
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keys:
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in subpipeline_parset.keys:
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in subpipeline_parset.keys:
                    if not str(k).startswith('#'):
                        val = subpipeline_parset[k]
                        if not str(k).startswith('!'):
                            for item in checklist:
                                if item in str(val):
                                    val = str(val).replace(
                                        item, stepname + '-' + item)

                            self.parset.add(stepname + '-' + k, str(val))
                        else:
                            self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self.parset.keys:
                        if str(k).startswith('!') and item in k:
                            self.parset.remove(k)
                            self.parset.add(
                                '! ' + item,
                                str(step_parset_obj[stepname][item]))

                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict,
                                      step_parset_files, step_parset_obj,
                                      parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)

                # remove replacements strings to prevent loading the same key twice
                for k in copy.deepcopy(self.parset.keys):
                    if str(k).startswith('!'):
                        self.parset.remove(k)

            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict,
                                          step_parset_files, step_parset_obj,
                                          parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(typeval, inputargs, **inputdict)

            # plugins
            if kind_of_step == 'plugin':
                with duration(self, stepname):
                    resultdict = loader.call_plugin(
                        typeval, pipeline_args.getString('pluginpath'),
                        inputargs, **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if resultdict is not None and 'break' in resultdict:
                if resultdict['break']:
                    resultdicts[activeloop[0]]['break'] = resultdict['break']
예제 #30
0
class GenericPipeline(control):

    inputs = {
        'loglevel':
        ingredient.StringField('--loglevel',
                               help="loglevel",
                               default='INFO',
                               optional=True)
    }

    def __init__(self):
        control.__init__(self)
        self.parset = Parset()
        self.input_data = {}
        self.output_data = {}
        self.parset_feedback_file = None
        #self.logger = None#logging.RootLogger('DEBUG')
        self.name = ''

        #if not overwrite:
        #    self.inputs['job_name'] = 'generic-pipeline'
        # if not self.inputs.has_key("start_time"):
        #     import datetime
        #     self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        # if not hasattr(self, "config"):
        #     self.config = self._read_config()
        # #self._read_config()
        # # ...and task files, if applicable
        # if not self.inputs.has_key("task_files"):
        #     try:
        #         self.inputs["task_files"] = utilities.string_to_list(
        #             self.config.get('DEFAULT', "task_files")
        #         )
        #     except NoOptionError:
        #         self.inputs["task_files"] = []
        # self.task_definitions = ConfigParser(self.config.defaults())
        # print >> sys.stderr, "Reading task definition file(s): %s" % \
        #                      ",".join(self.inputs["task_files"])
        # self.task_definitions.read(self.inputs["task_files"])
        #    self.go()

    def usage(self):
        """
        Display usage
        """
        print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0]
        print >> sys.stderr, "Parset structure should look like:\n" \
                             "NYI"
        #return 1

    def go(self):
        #"""
        #Read the parset-file that was given as input argument, and set the
        #jobname before calling the base-class's `go()` method.
        #"""
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            #return self.usage()
            self.usage()

        # Set job-name to basename of parset-file w/o extension, if it's not
        # set on the command-line with '-j' or '--job-name'
        if not 'job_name' in self.inputs:
            self.inputs['job_name'] = (os.path.splitext(
                os.path.basename(parset_file))[0])
            self.name = self.inputs['job_name']
        try:
            self.logger
        except:
            self.logger = getSearchingLogger(self.name)
            self.logger.setLevel(self.inputs['loglevel'])
        # Call the base-class's `go()` method.
        return super(GenericPipeline, self).go()


#    def pipeline_logic(self):
#        print 'Dummy because of stupid wrapping inside the framework'
#        if overwrite:
#            self.execute_pipeline()

#def execute_pipeline(self):

    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')

        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict,
                              step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        resultdicts = {
            'input': {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        }

        resultdicts.update({
            self.name: {
                'parset': parset_file,
                'parsetobj': self.parset,
                'job_dir': job_dir,
                'parset_dir': parset_dir,
                'mapfile_dir': mapfile_dir
            }
        })

        if 'pipeline.mapfile' in self.parset.keys:
            resultdicts['input']['mapfile'] = str(
                self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(
                self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            #self._construct_cmdline(inputargs, step, resultdicts)

            additional_input = {}

            if stepname in step_parset_obj:
                additional_input = self._construct_step_parset(
                    step_parset_obj[stepname], resultdicts,
                    step_parset_files[stepname], stepname)

            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval,
                                             'recipe') == 'executable_args':
                    inputdict = {'stepname': stepname}
                    inputdict.update(additional_input)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]

            self._construct_input(inputdict, step, resultdicts)

            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(
                    ' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector(
                    'pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keys:
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({
                    os.path.splitext(os.path.basename(typeval))[0]: {
                        'parset': typeval,
                        'mapfile': submapfile,
                    }
                })
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keys:
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in subpipeline_parset.keys:
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in subpipeline_parset.keys:
                    if not str(k).startswith('#'):
                        val = subpipeline_parset[k]
                        if not str(k).startswith('!'):
                            for item in checklist:
                                if item in str(val):
                                    val = str(val).replace(
                                        item, stepname + '-' + item)

                            self.parset.add(stepname + '-' + k, str(val))
                        else:
                            self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self.parset.keys:
                        if str(k).startswith('!') and item in k:
                            self.parset.remove(k)
                            self.parset.add(
                                '! ' + item,
                                str(step_parset_obj[stepname][item]))

                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict,
                                      step_parset_files, step_parset_obj,
                                      parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)

                # remove replacements strings to prevent loading the same key twice
                for k in copy.deepcopy(self.parset.keys):
                    if str(k).startswith('!'):
                        self.parset.remove(k)

            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict,
                                          step_parset_files, step_parset_obj,
                                          parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(typeval, inputargs, **inputdict)

            # plugins
            if kind_of_step == 'plugin':
                with duration(self, stepname):
                    resultdict = loader.call_plugin(
                        typeval, pipeline_args.getString('pluginpath'),
                        inputargs, **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if resultdict is not None and 'break' in resultdict:
                if resultdict['break']:
                    resultdicts[activeloop[0]]['break'] = resultdict['break']

    # *********************************************************************
    # build the inputs for the master recipes.
    def _construct_input(self, inoutdict, controlparset, resdicts):
        # intermediate backward compatibility for opts subparset
        if controlparset.fullModuleName('opts'):
            argsparset = controlparset.makeSubset(
                controlparset.fullModuleName('opts') + '.')
        # hack
        elif 'loopcount' not in controlparset.keys():
            argsparset = controlparset
        else:
            argsparset = controlparset.makeSubset(
                controlparset.fullModuleName('imaginary') + '.')
        # \hack

        self._replace_output_keyword(inoutdict, argsparset, resdicts)

    def _construct_cmdline(self, inoutargs, controlparset, resdicts):
        argsparset = controlparset.makeSubset(
            controlparset.fullModuleName('cmdline') + '.')
        for k in argsparset.keys():
            if argsparset.getString(k).__contains__('.output.'):
                step, outvar = argsparset.getString(k).split('.output.')
                inoutargs.append(resdicts[step][outvar])
            else:
                inoutargs.append(argsparset.getString(k))
        try:
            controlparset.remove('cmdline.inmap')
        except:
            pass

    def _construct_steps(self, step_name_list, step_control_dict,
                         step_parset_files, step_parset_obj, parset_dir):
        step_list_copy = (copy.deepcopy(step_name_list))
        counter = 0
        while step_list_copy:
            counter -= 1
            stepname = step_list_copy.pop(-1)
            fullparset = self.parset.makeSubset(
                self.parset.fullModuleName(str(stepname)) + '.')
            subparset = fullparset.makeSubset(
                fullparset.fullModuleName('control') + '.')
            number = 0
            for item in step_list_copy:
                if item == stepname:
                    number += 1
            if number != 0:
                stepname += str(number)
            step_name_list[counter] = stepname
            step_control_dict[stepname] = subparset
            # double implementation for intermediate backward compatibility
            if fullparset.fullModuleName(
                    'parsetarg') or fullparset.fullModuleName('argument'):
                if fullparset.fullModuleName('parsetarg'):
                    stepparset = fullparset.makeSubset(
                        fullparset.fullModuleName('parsetarg') + '.')
                if fullparset.fullModuleName('argument'):
                    stepparset = fullparset.makeSubset(
                        fullparset.fullModuleName('argument') + '.')
                # *********************************************************************
                # save parsets
                # either a filename is given in the main parset
                # or files will be created from subsets with stepnames.parset as filenames
                # for name, parset in step_parset_dict.iteritems():
                try:
                    file_parset = Parset(stepparset.getString('parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    stepparset.remove('parset')
                except:
                    pass
                # parset from task.cfg
                try:
                    file_parset = Parset(
                        self.task_definitions.get(str(subparset['type']),
                                                  'parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                except:
                    pass
                # for parset in control section
                try:
                    file_parset = Parset(subparset.getString('parset'))
                    for k in file_parset.keys:
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    subparset.remove('parset')
                except:
                    pass
                step_parset = os.path.join(parset_dir, stepname + '.parset')
                stepparset.writeFile(step_parset)
                step_parset_files[stepname] = step_parset
                step_parset_obj[stepname] = stepparset

    def _replace_output_keyword(self, inoutdict, argsparset, resdicts):
        for k in argsparset.keys():
            keystring = argsparset.getString(k)
            if keystring.__contains__('.output.'):
                if keystring.__contains__(','):
                    keystring = keystring.rstrip(']')
                    keystring = keystring.lstrip('[')
                    vec = []
                    for item in keystring.split(','):
                        if item.__contains__('.output.'):
                            step, outvar = item.split('.output.')
                            vec.append(resdicts[step][outvar])
                        else:
                            vec.append(item)
                    inoutdict[k] = vec
                else:
                    step, outvar = argsparset.getString(k).split('.output.')
                    if '+' in outvar:
                        tmplist = str(outvar).split('+')
                        inoutdict[k] = resdicts[step][tmplist[0]] + tmplist[1]
                    else:
                        inoutdict[k] = resdicts[step][outvar]
            else:
                inoutdict[k] = argsparset.getString(k)

    def _construct_step_parset(self, argsparset, resdicts, filename, stepname):
        addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []}
        # hack for original order of args
        tmp_keys = argsparset.keys()
        ordered_keys = []
        for orig in self.parset.keys:
            for item in tmp_keys:
                if (stepname + '.') in orig and (
                        'argument.' + item in orig
                        and not 'argument.' + item + '.' in orig):
                    ordered_keys.append(item)
                    continue
        # \hack
        for k in ordered_keys:
            valuestring = argsparset.getString(k)
            if valuestring.__contains__('.output.'):
                if valuestring.__contains__(','):
                    valuestring = valuestring.rstrip(']')
                    valuestring = valuestring.lstrip('[')
                    vec = []
                    for item in valuestring.split(','):
                        if item.__contains__('.output.'):
                            step, outvar = item.split('.output.')
                            vec.append(resdicts[step][outvar])
                            if 'mapfile' in str(outvar):
                                addvals['inputkeys'].append(
                                    resdicts[step][outvar])
                                addvals['mapfiles_in'].append(
                                    resdicts[step][outvar])
                        else:
                            vec.append(item)
                    argsparset.replace(k, str(vec))
                    if k == 'flags':
                        addvals['arguments'] = vec
                        argsparset.remove(k)
                else:
                    step, outvar = argsparset.getString(k).split('.output.')
                    #more ugly hacks... really needs clearly structured replacement method...
                    if '+' in outvar:
                        tmplist = str(outvar).split('+')
                        argsparset.replace(
                            k,
                            str(resdicts[step][tmplist[0]]) + tmplist[1])
                    else:
                        argsparset.replace(k, str(resdicts[step][outvar]))
                    #if isinstance(resdicts[step][outvar], str):
                    if 'mapfile' in str(outvar):
                        addvals['inputkeys'].append(resdicts[step][outvar])
                        addvals['mapfiles_in'].append(resdicts[step][outvar])
                    if k == 'flags':
                        addvals['arguments'] = str(argsparset[k])
                        argsparset.remove(k)
            else:
                if k == 'flags':
                    addvals['arguments'] = str(argsparset[k])
                    argsparset.remove(k)

            #direct usage of outputkey
            if valuestring.__contains__('outputkey'):
                addvals['outputkey'] = 'outputkey'

        argsparset.writeFile(filename)
        return addvals

    def _get_parset_dicts(self):
        return {}

    def show_tasks(self):
        tasklist = []
        tasklist = self.task_definitions.sections()
        for item in tasklist:
            print item
        #return tasklist

    def show_task(self, task):
        task_parset = Parset()
        if self.task_definitions.has_option(task, 'parset'):
            task_parset.adoptFile(self.task_definitions.get(task, 'parset'))
            print 'possible arguments: key    =    value'
            for k in task_parset.keys:
                print '                   ', k, '    ', '=', '    ', task_parset[
                    k]

    def _add_step(self):
        steplist = []

    def _replace_values(self):
        replacedict = {}
        try:
            import imp
            plugin = imp.load_source('main', str(self.parset['prepare']))
            replacedict = plugin.main()
        except:
            pass
        for check in self.parset.keys:
            if str(check).startswith('!'):
                replacedict[str(check).lstrip('!').lstrip(' ')] = str(
                    self.parset[check])
        #print 'REPLACEDICT: ',replacedict
        for check in self.parset.keys:
            if not str(check).startswith('#'):
                for k, v in replacedict.iteritems():
                    if '{{ ' + k + ' }}' in str(self.parset[check]):
                        replacestring = str(self.parset[check]).replace(
                            '{{ ' + k + ' }}', v)
                        self.parset.replace(check, replacestring)
예제 #31
0
    def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''):
        """
        This method contains all the needed functionality
        """

        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        self.work_dir = work_dir
        self.infile = infile
        self.executable = executable

        self.msout_original = kwargs['msout'].rstrip('/')
        kwargs.pop('msout')
        self.msout_destination_dir = os.path.dirname(self.msout_original)

        # Set up scratch paths
        scratch_dir = kwargs['local_scratch_dir']
        kwargs.pop('local_scratch_dir')
        try:
            os.mkdir(scratch_dir)
        except OSError:
            pass
        self.scratch_dir = tempfile.mkdtemp(dir=scratch_dir)
        self.logger.info('Using {} as scratch directory'.format(self.scratch_dir))
        self.msout_scratch = os.path.join(self.scratch_dir, os.path.basename(self.msout_original))
        args.append('msout=' + self.msout_scratch)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-'+ k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
            # ****************************************************************
            #Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                    work_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(
                        cmd, work_dir, self.environment, logger
                    )
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                self.cleanup()
                return 1
            except Exception, err:
                self.logger.error(str(err))
                self.cleanup()
                return 1
예제 #32
0
파일: cimager.py 프로젝트: jjdmol/LOFAR
    def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time):
        #       imager_exec:                          path to cimager executable
        #               vds:           VDS file describing the data to be imaged
        #            parset:                                imager configuration
        #        resultsdir:                         place resulting images here
        #        start_time:                        )    time range to be imaged
        #          end_time:                        )   in seconds (may be None)
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            self.logger.info("Processing %s" % (vds,))

            #    Bail out if destination exists (can thus resume a partial run).
            #                                            Should be configurable?
            # ------------------------------------------------------------------
            parset_data = Parset(parset)
            image_names = parset_data.getStringVector("Cimager.Images.Names")
            for image_name in image_names:
                outputfile = os.path.join(resultsdir, image_name + ".restored")
                self.logger.info(outputfile)
                if os.path.exists(outputfile):
                    self.logger.info("Image already exists: aborting.")
                    return 0
            try:
                working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))

                #   If a time range has been specified, copy that section of the
                #                                  input MS and only image that.
                # --------------------------------------------------------------
                query = []
                if start_time:
                    self.logger.debug("Start time is %s" % start_time)
                    start_time = quantity(float(start_time), 's')
                    query.append("TIME > %f" % start_time.get('s').get_value())
                if end_time:
                    self.logger.debug("End time is %s" % end_time)
                    end_time = quantity(float(end_time), 's')
                    query.append("TIME < %f" % end_time.get('s').get_value())
                query = " AND ".join(query)
                if query:
                    #                             Select relevant section of MS.
                    # ----------------------------------------------------------
                    self.logger.debug("Query is %s" % query)
                    output = os.path.join(working_dir, "timeslice.MS")
                    vds_parset = get_parset(vds)
                    t = table(vds_parset.getString("FileName"))
                    t.query(query, name=output)
                    #       Patch updated information into imager configuration.
                    # ----------------------------------------------------------
                    parset = patch_parset(parset,
                        {
                            'Cimager.dataset': output
                        }
                    )
                else:
                    self.logger.debug("No time range selected")

                self.logger.debug("Running cimager")
                with CatchLog4CXX(
                    working_dir,
                    self.logger.name + "." + os.path.basename(vds)
                ):
                    cimager_process = Popen(
                        [imager_exec, "-inputs", parset],
                        stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = cimager_process.communicate()
                log_process_output("cimager", sout, serr, self.logger)
                if cimager_process.returncode != 0:
                    raise CalledProcessError(
                        cimager_process.returncode, imager_exec
                    )

                #        Dump the resulting images in the pipeline results area.
                #    I'm not aware of a foolproof way to predict the image names
                #                that will be produced, so we read them from the
                #                      parset and add standard cimager prefixes.
                # --------------------------------------------------------------
                parset_data = Parset(parset)
                image_names = parset_data.getStringVector("Cimager.Images.Names")
                prefixes = [
                    "image", "psf", "residual", "weights", "sensitivity"
                ]
                self.logger.debug("Copying images to %s" % resultsdir)
                for image_name in image_names:
                    for prefix in prefixes:
                        filename = image_name.replace("image", prefix, 1)
                        shutil.move(
                            os.path.join(working_dir, filename),
                            os.path.join(resultsdir, filename)
                        )
                    if parset_data.getBool('Cimager.restore'):
                        shutil.move(
                            os.path.join(working_dir, image_name + ".restored"),
                            os.path.join(resultsdir, image_name + ".restored")
                        )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
예제 #33
0
파일: bbs.py 프로젝트: jjdmol/LOFAR
    def run(
        self, executable, initscript, infile, key, db_name, db_user, db_host
    ):
        #                           executable: path to KernelControl executable
        #                           initscript:             path to lofarinit.sh
        #                               infile:    MeasurementSet for processing
        #       key, db_name, db_user, db_host:   database connection parameters
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % (infile))
            else:
                self.logger.error("Dataset %s does not exist" % (infile))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up kernel parset")
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile))
            fd, parset_filename = mkstemp()
            kernel_parset = Parset()
            for key, value in {
                "ObservationPart.Filesystem": filesystem,
                "ObservationPart.Path": infile,
                "BBDB.Key": key,
                "BBDB.Name": db_name,
                "BBDB.User": db_user,
                "BBDB.Host": db_host,
                "ParmLog": "",
                "ParmLoglevel": "",
                "ParmDB.Sky": infile + ".sky",
                "ParmDB.Instrument": infile + ".instrument"
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_filename)
            os.close(fd)
            self.logger.debug("Parset written to %s" % (parset_filename,))


            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            env = read_initscript(self.logger, initscript)
            try:
                cmd = [executable, parset_filename, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                    working_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(
                        cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(
                        bbs_kernel_process.returncode, executable
                    )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
예제 #34
0
파일: cimager.py 프로젝트: mfkiwl/lofar-1
    def go(self):
        self.logger.info("Starting cimager run")
        super(cimager, self).go()
        self.outputs['images'] = []

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for cimager")
        gvds_file = os.path.join(self.config.get("layout", "job_directory"),
                                 "vds", "cimager.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = gvds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(gvds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("cimager GVDS is %s" % (gvds_file, ))

        #                            Read data for processing from the GVDS file
        # ----------------------------------------------------------------------
        parset = Parset(gvds_file)

        data = []
        for part in range(parset.getInt('NParts')):
            host = parset.getString("Part%d.FileSys" % part).split(":")[0]
            vds = parset.getString("Part%d.Name" % part)
            data.append((host, vds))

        #                                 Divide data into timesteps for imaging
        #          timesteps is a list of (start, end, results directory) tuples
        # ----------------------------------------------------------------------
        timesteps = []
        results_dir = self.inputs['results_dir']
        if self.inputs['timestep'] == 0:
            self.logger.info("No timestep specified; imaging all data")
            timesteps = [(None, None, results_dir)]
        else:
            self.logger.info("Using timestep of %s s" %
                             self.inputs['timestep'])
            gvds = get_parset(gvds_file)
            start_time = quantity(gvds['StartTime'].get()).get('s').get_value()
            end_time = quantity(gvds['EndTime'].get()).get('s').get_value()
            step = float(self.inputs['timestep'])
            while start_time < end_time:
                timesteps.append((start_time, start_time + step,
                                  os.path.join(results_dir, str(start_time))))
                start_time += step

        #                          Run each cimager process in a separate thread
        # ----------------------------------------------------------------------
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        for label, timestep in enumerate(timesteps):
            self.logger.info("Processing timestep %d" % label)
            jobs = []
            parsets = []
            start_time, end_time, resultsdir = timestep
            for host, vds in data:
                vds_data = Parset(vds)
                frequency_range = [
                    vds_data.getDoubleVector("StartFreqs")[0],
                    vds_data.getDoubleVector("EndFreqs")[-1]
                ]
                parsets.append(
                    self.__get_parset(
                        os.path.basename(
                            vds_data.getString('FileName')).split('.')[0],
                        vds_data.getString("FileName"),
                        str(frequency_range),
                        vds_data.getStringVector("Extra.FieldDirectionType")
                        [0],
                        vds_data.getStringVector("Extra.FieldDirectionRa")[0],
                        vds_data.getStringVector("Extra.FieldDirectionDec")[0],
                        'True',  # cimager bug: non-restored image unusable
                    ))
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[
                                   self.inputs['imager_exec'], vds,
                                   parsets[-1], resultsdir, start_time,
                                   end_time
                               ]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for parset in parsets:
                parset = Parset(parset)
                image_names = parset.getStringVector("Cimager.Images.Names")
                self.outputs['images'].extend(image_names)
            [os.unlink(parset) for parset in parsets]

        #                Check if we recorded a failing process before returning
        # ----------------------------------------------------------------------
        if self.error.isSet():
            self.logger.warn("Failed imager process detected")
            return 1
        else:
            return 0
예제 #35
0
파일: cimager.py 프로젝트: jjdmol/LOFAR
    def go(self):
        self.logger.info("Starting cimager run")
        super(cimager, self).go()
        self.outputs['images' ] = []

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for cimager")
        gvds_file = os.path.join(
            self.config.get("layout", "job_directory"),
            "vds",
            "cimager.gvds"
        )
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = gvds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(gvds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("cimager GVDS is %s" % (gvds_file,))

        #                            Read data for processing from the GVDS file
        # ----------------------------------------------------------------------
        parset = Parset(gvds_file)

        data = []
        for part in range(parset.getInt('NParts')):
            host = parset.getString("Part%d.FileSys" % part).split(":")[0]
            vds  = parset.getString("Part%d.Name" % part)
            data.append((host, vds))

        #                                 Divide data into timesteps for imaging
        #          timesteps is a list of (start, end, results directory) tuples
        # ----------------------------------------------------------------------
        timesteps = []
        results_dir = self.inputs['results_dir']
        if self.inputs['timestep'] == 0:
            self.logger.info("No timestep specified; imaging all data")
            timesteps = [(None, None, results_dir)]
        else:
            self.logger.info("Using timestep of %s s" % self.inputs['timestep'])
            gvds = get_parset(gvds_file)
            start_time = quantity(gvds['StartTime'].get()).get('s').get_value()
            end_time = quantity(gvds['EndTime'].get()).get('s').get_value()
            step = float(self.inputs['timestep'])
            while start_time < end_time:
                timesteps.append(
                    (
                        start_time, start_time+step,
                        os.path.join(results_dir, str(start_time))
                    )
                )
                start_time += step

        #                          Run each cimager process in a separate thread
        # ----------------------------------------------------------------------
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        for label, timestep in enumerate(timesteps):
            self.logger.info("Processing timestep %d" % label)
            jobs = []
            parsets = []
            start_time, end_time, resultsdir = timestep
            for host, vds in data:
                vds_data = Parset(vds)
                frequency_range = [
                    vds_data.getDoubleVector("StartFreqs")[0],
                    vds_data.getDoubleVector("EndFreqs")[-1]
                ]
                parsets.append(
                    self.__get_parset(
                        os.path.basename(vds_data.getString('FileName')).split('.')[0],
                        vds_data.getString("FileName"),
                        str(frequency_range),
                        vds_data.getStringVector("Extra.FieldDirectionType")[0],
                        vds_data.getStringVector("Extra.FieldDirectionRa")[0],
                        vds_data.getStringVector("Extra.FieldDirectionDec")[0],
                        'True', # cimager bug: non-restored image unusable
                    )
                )
                jobs.append(
                    ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['imager_exec'],
                            vds,
                            parsets[-1],
                            resultsdir,
                            start_time,
                            end_time
                        ]
                    )
                )
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for parset in parsets:
                parset = Parset(parset)
                image_names = parset.getStringVector("Cimager.Images.Names")
                self.outputs['images'].extend(image_names)
            [os.unlink(parset) for parset in parsets]

        #                Check if we recorded a failing process before returning
        # ----------------------------------------------------------------------
        if self.error.isSet():
            self.logger.warn("Failed imager process detected")
            return 1
        else:
            return 0
예제 #36
0
    def run(self, executable, initscript, infile, key, db_name, db_user,
            db_host):
        #                           executable: path to KernelControl executable
        #                           initscript:             path to lofarinit.sh
        #                               infile:    MeasurementSet for processing
        #       key, db_name, db_user, db_host:   database connection parameters
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % (infile))
            else:
                self.logger.error("Dataset %s does not exist" % (infile))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up kernel parset")
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile))
            fd, parset_filename = mkstemp()
            kernel_parset = Parset()
            for key, value in {
                    "ObservationPart.Filesystem": filesystem,
                    "ObservationPart.Path": infile,
                    "BBDB.Key": key,
                    "BBDB.Name": db_name,
                    "BBDB.User": db_user,
                    "BBDB.Host": db_host,
                    "ParmLog": "",
                    "ParmLoglevel": "",
                    "ParmDB.Sky": infile + ".sky",
                    "ParmDB.Instrument": infile + ".instrument"
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_filename)
            os.close(fd)
            self.logger.debug("Parset written to %s" % (parset_filename, ))

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp()
            env = read_initscript(self.logger, initscript)
            try:
                cmd = [executable, parset_filename, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                        working_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(cmd,
                                               stdout=PIPE,
                                               stderr=PIPE,
                                               cwd=working_dir)
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(bbs_kernel_process.returncode,
                                             executable)
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
예제 #37
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)

            # Check if script is present
            if not os.path.isfile(executable):
                self.logger.error("Script %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if parsetasfile:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                args.insert(0, parsetname)

            try:
                # ****************************************************************
                # Run
                # Change to working directory for the script
                pipedir = os.getcwd()
                os.chdir(work_dir)
                outdict = {}
                plugin = imp.load_source('main', executable)
                outdict = plugin.main(*args, **kwargs)
                os.chdir(pipedir)

            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #38
0
class GenericPipeline(control):

    inputs = {
        'loglevel': ingredient.StringField(
            '--loglevel',
            help="loglevel",
            default='INFO',
            optional=True
        )
    }

    def __init__(self):
        control.__init__(self)
        self.parset = Parset()
        self.input_data = {}
        self.output_data = {}
        self.parset_feedback_file = None
        #self.logger = None#logging.RootLogger('DEBUG')
        self.name = ''

        #if not overwrite:
        #    self.inputs['job_name'] = 'generic-pipeline'
        # if not self.inputs.has_key("start_time"):
        #     import datetime
        #     self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        # if not hasattr(self, "config"):
        #     self.config = self._read_config()
        # #self._read_config()
        # # ...and task files, if applicable
        # if not self.inputs.has_key("task_files"):
        #     try:
        #         self.inputs["task_files"] = utilities.string_to_list(
        #             self.config.get('DEFAULT', "task_files")
        #         )
        #     except NoOptionError:
        #         self.inputs["task_files"] = []
        # self.task_definitions = ConfigParser(self.config.defaults())
        # print >> sys.stderr, "Reading task definition file(s): %s" % \
        #                      ",".join(self.inputs["task_files"])
        # self.task_definitions.read(self.inputs["task_files"])
        #    self.go()

    def usage(self):
        """
        Display usage
        """
        print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0]
        print >> sys.stderr, "Parset structure should look like:\n" \
                             "NYI"
        #return 1

    def go(self):
        #"""
        #Read the parset-file that was given as input argument, and set the
        #jobname before calling the base-class's `go()` method.
        #"""
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            #return self.usage()
            self.usage()

        # Set job-name to basename of parset-file w/o extension, if it's not
        # set on the command-line with '-j' or '--job-name'
        if not 'job_name' in self.inputs:
            self.inputs['job_name'] = (
                os.path.splitext(os.path.basename(parset_file))[0])
            self.name = self.inputs['job_name']
        try:
            self.logger
        except:
            self.logger = getSearchingLogger(self.name)
            self.logger.setLevel(self.inputs['loglevel'])
        # Call the base-class's `go()` method.
        return super(GenericPipeline, self).go()

#    def pipeline_logic(self):
#        print 'Dummy because of wrapping inside the framework'
#        if overwrite:
#            self.execute_pipeline()

    #def execute_pipeline(self):
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')
        pipeline_steps = self.parset.makeSubset(
            self.parset.fullModuleName('steps') + '.')
        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        # construct the step name list if there were pipeline.steps.<subset>
        for item in pipeline_steps.keys():
            if item in step_name_list:
                loc = step_name_list.index(item)
                step_name_list[loc:loc] = pipeline_steps.getStringVector(item)
                step_name_list.remove(item)

        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        input_dictionary = {
            'parset': parset_file,
            'parsetobj': self.parset,
            'parset_dir': parset_dir,
            'mapfile_dir': mapfile_dir}

        resultdicts = {}
        for section in self.config.sections():
            tmp_dict = {}
            for entry in self.config.items(section):
                input_dictionary[entry[0]] = entry[1]
                tmp_dict[entry[0]] = entry[1]
            resultdicts.update({section: copy.deepcopy(tmp_dict)})

        resultdicts.update({'input': input_dictionary})
        resultdicts.update({self.name: input_dictionary})

        if 'pipeline.mapfile' in self.parset.keywords():
            resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            self.logger.info("Beginning step %s" % (stepname,))
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            adds = None
            if stepname in step_parset_obj:
                adds = self._construct_step_parset(inputdict,
                                             step_parset_obj[stepname],
                                             resultdicts,
                                             step_parset_files[stepname],
                                             stepname)
            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval, 'recipe') == 'executable_args':
                    inputdict['stepname'] = stepname
                    if adds:
                        inputdict.update(adds)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]


            self._construct_input(inputdict, step, resultdicts)
            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keywords():
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: {
                    'parset': typeval,
                    'mapfile': submapfile,
                }})
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keywords():
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in self._keys(subpipeline_parset):
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in self._keys(subpipeline_parset):
                    val = subpipeline_parset[k]
                    if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'):
                        for item in checklist:
                            if item+".output" in str(val):
                                val = str(val).replace(item, stepname + '-' + item)

                        self.parset.add(stepname + '-' + k, str(val))
                    else:
                        # remove replacements strings to prevent loading the same key twice
                        if k in self._keys(self.parset):
                            self.parset.remove(k)
                        self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self._keys(self.parset):
                        if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip():
                            self.parset.remove(k)
                            self.parset.add('! ' + item, str(step_parset_obj[stepname][item]))
                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)


            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(
                        typeval,
                        inputargs,
                        **inputdict
                    )

            # plugins
            if kind_of_step == 'plugin':
                bla = str(self.config.get('DEFAULT', 'recipe_directories'))
                pluginpath = bla.rstrip(']').lstrip('[').split(',')
                for i, item in enumerate(pluginpath):
                    pluginpath[i] = os.path.join(item, 'plugins')
                if 'pluginpath' in pipeline_args.keys():
                    pluginpath.append(pipeline_args.getString('pluginpath'))
                with duration(self, stepname):
                    resultdict = loader.call_plugin(typeval, pluginpath,
                                                    inputargs,
                                                    **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict:
                resultdicts[activeloop[0]]['break'] = resultdict['break']

    # *********************************************************************
    # build the inputs for the master recipes.
    def _construct_input(self, inoutdict, controlparset, resdicts):
        # intermediate backward compatibility for opts subparset
        if controlparset.fullModuleName('opts'):
            argsparset = controlparset.makeSubset(controlparset.fullModuleName('opts') + '.')
        # hack
        elif 'loopcount' not in controlparset.keys():
            argsparset = controlparset
        else:
            argsparset = controlparset.makeSubset(controlparset.fullModuleName('imaginary') + '.')
        # \hack
        self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts)

    def _construct_cmdline(self, inoutargs, controlparset, resdicts):
        inoutdict = {}
        argsparset = controlparset.makeSubset(controlparset.fullModuleName('cmdline') + '.')
        self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts)
        for k in inoutdict.keys():
            inoutargs.append(inoutdict[k])
        for k in controlparset.keys():
            if 'cmdline' in k:
                controlparset.remove(k)

    def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir):
        step_list_copy = (copy.deepcopy(step_name_list))
        counter = 0
        while step_list_copy:
            counter -= 1
            stepname = step_list_copy.pop(-1)
            fullparset = self.parset.makeSubset(self.parset.fullModuleName(str(stepname)) + '.')
            subparset = fullparset.makeSubset(fullparset.fullModuleName('control') + '.')
            number = 0
            for item in step_list_copy:
                if item == stepname:
                    number += 1
            if number != 0:
                stepname += str(number)
            step_name_list[counter] = stepname
            step_control_dict[stepname] = subparset
            if fullparset.fullModuleName('argument'):
                stepparset = fullparset.makeSubset(fullparset.fullModuleName('argument') + '.')
                # *********************************************************************
                # save parsets
                # either a filename is given in the main parset
                # or files will be created from subsets with stepnames.parset as filenames
                # for name, parset in step_parset_dict.iteritems():
                try:
                    file_parset = Parset(stepparset.getString('parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    stepparset.remove('parset')
                except:
                    pass
                # parset from task.cfg
                try:
                    file_parset = Parset(self.task_definitions.get(str(subparset['type']), 'parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                except:
                    pass
                # for parset in control section
                try:
                    file_parset = Parset(subparset.getString('parset'))
                    for k in file_parset.keywords():
                        if not k in stepparset.keys():
                            stepparset.add(k, str(file_parset[k]))
                    subparset.remove('parset')
                except:
                    pass
                step_parset = os.path.join(parset_dir, stepname + '.parset')
                stepparset.writeFile(step_parset)
                step_parset_files[stepname] = step_parset
                step_parset_obj[stepname] = stepparset

    def _replace_output_keyword(self, inoutdict, argsparset, keyorder, resdicts):
        addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []}
        regobj = re.compile('([\w\+_-]+)\.output\.([\w\+._-]+)')
        for k in keyorder:
            keystring = argsparset.getString(k)
            hitlist = regobj.findall(keystring)
            if hitlist:
                for hit in hitlist:
                    keystring = regobj.sub(str(resdicts[hit[0]][hit[1]]), keystring, 1)
                    if 'mapfile' in hit[1] and not 'mapfile' in k:
                        addvals['inputkeys'].append(resdicts[hit[0]][hit[1]])
                        addvals['mapfiles_in'].append(resdicts[hit[0]][hit[1]])
                inoutdict[k] = keystring
            else:
                inoutdict[k] = argsparset.getString(k)
            if k == 'flags':
                addvals['arguments'] = keystring
            if 'outputkey' in keystring:
                addvals['outputkey'] = 'outputkey'
        return addvals

    def _construct_step_parset(self, inoutdict, argsparset, resdicts, filename, stepname):
        tmp_keys = argsparset.keys()
        ordered_keys = []
        parsetdict = {}
        for orig in self._keys(self.parset):
            for item in tmp_keys:
                if (stepname + '.') in orig and ('argument.'+item in orig and not 'argument.'+item+'.' in orig):
                    ordered_keys.append(item)
                    continue
        # add keys from parset files that were not in the original list
        for item in argsparset.keys():
            if not item in ordered_keys:
                ordered_keys.append(item)
        additional = self._replace_output_keyword(parsetdict, argsparset, ordered_keys, resdicts)
        for k in argsparset.keys():
            argsparset.replace(k, parsetdict[k])
            if k == 'flags':
                argsparset.remove(k)
        argsparset.writeFile(filename)
        return additional
        #inoutdict.update(additional)

    def _keys(self, inparset):
        outlist = []
        for k in inparset.keys:
            for l in inparset.keywords():
                if k == l:
                    outlist.append(l)
        return outlist

    def _get_parset_dicts(self):
        return {}

    def show_tasks(self):
        tasklist = []
        tasklist = self.task_definitions.sections()
        for item in tasklist:
            print item
        #return tasklist

    def show_task(self, task):
        task_parset = Parset()
        if self.task_definitions.has_option(task,'parset'):
            task_parset.adoptFile(self.task_definitions.get(task,'parset'))
            print 'possible arguments: key    =    value'
            for k in task_parset.keywords():
                print '                   ',k,'    ','=','    ',task_parset[k]

    def _add_step(self):
        steplist = []

    def _replace_values(self):
        replacedict = OrderedDict()
        for check in self._keys(self.parset):
            if str(check).startswith('!'):
                replacedict[str(check).lstrip('!').lstrip(' ')] = str(self.parset[check])
            if str(check).startswith('pipeline.replace.'):
                replacedict[str(check).replace('pipeline.replace.', '').lstrip(' ')] = str(self.parset[check])
        #expand environment variables
        for k, v in replacedict.items():
            replacedict[k] = os.path.expandvars(v)

        for check in self._keys(self.parset):
            for k, v in reversed(replacedict.items()):
                if '{{ '+k+' }}' in str(self.parset[check]):
                    replacestring = str(self.parset[check]).replace('{{ '+k+' }}',v)
                    self.parset.replace(check,replacestring)
예제 #39
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This method contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            #if os.path.exists(infile):
            self.logger.info("Processing %s" % infile)
            # else:
            #     self.logger.error("Dataset %s does not exist" % infile)
            #     return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # hurray! race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            argsformat = args_format['args_format']
            # deal with multiple input files for wsclean
            if argsformat == 'wsclean':
                for i in reversed(xrange(len(args))):
                    if str(args[i]).startswith('[') and str(
                            args[i]).endswith(']'):
                        tmplist = args.pop(i).lstrip('[').rstrip(']').split(
                            ',')
                        for val in reversed(tmplist):
                            args.insert(i, val.strip(' \'\"'))
            if not parsetasfile:
                if argsformat == 'gnu':
                    for k, v in kwargs.items():
                        args.append('--' + k + '=' + v)
                if argsformat == 'lofar':
                    for k, v in kwargs.items():
                        args.append(k + '=' + v)
                if argsformat == 'argparse':
                    for k, v in kwargs.items():
                        args.append('--' + k + ' ' + v)
                if argsformat == 'wsclean':
                    for k, v in kwargs.items():
                        if str(v).startswith('[') and str(v).endswith(']'):
                            v = v.lstrip('[').rstrip(']').replace(' ', '')
                            multargs = v.split(',')
                        else:
                            multargs = v.split(' ')
                        if multargs:
                            multargs.reverse()
                            for item in multargs:
                                args.insert(0, item)
                        else:
                            args.insert(0, v)
                        args.insert(0, '-' + k)

            else:
                nodeparset = Parset()
                parsetname = os.path.join(work_dir,
                                          os.path.basename(infile) + '.parset')
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                nodeparset.writeFile(parsetname)
                if argsformat == 'losoto':
                    args.append(parsetname)
                else:
                    args.insert(0, parsetname)

            try:
                # ****************************************************************
                # Run
                cmd = [executable] + args
                with CatchLog4CPlus(
                        work_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, work_dir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #40
0
    def run(self,
            infile,
            executable,
            args,
            kwargs,
            work_dir='/tmp',
            parsetasfile=False,
            args_format='',
            environment=''):
        """
        This function contains all the needed functionality
        """
        # Debugging info
        self.logger.debug("infile            = %s" % infile)
        self.logger.debug("executable        = %s" % executable)
        self.logger.debug("working directory = %s" % work_dir)
        self.logger.debug("arguments         = %s" % args)
        self.logger.debug("arg dictionary    = %s" % kwargs)
        self.logger.debug("environment       = %s" % environment)

        self.environment.update(environment)

        # Time execution of this job
        with log_time(self.logger):
            if infile[0] == '[':
                infiles = [ms.strip(" []\'\"") for ms in infile.split(',')]
                reffile = infiles[0]
            else:
                reffile = infile

            if os.path.exists(reffile):
                self.logger.info("Processing %s" % reffile)
            else:
                self.logger.error("Dataset %s does not exist" % reffile)
                return 1

            # Check if executable is present
            if not os.access(executable, os.X_OK):
                self.logger.error("Executable %s not found" % executable)
                return 1

            # race condition when running with more than one process on one filesystem
            if not os.path.isdir(work_dir):
                try:
                    os.mkdir(work_dir, )
                except OSError as exc:  # Python >2.5
                    if exc.errno == errno.EEXIST and os.path.isdir(work_dir):
                        pass
                    else:
                        raise

            if not parsetasfile:
                self.logger.error(
                    "Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!"
                )
                return 1
            else:
                nodeparset = Parset()
                sublist = []
                for k, v in kwargs.items():
                    nodeparset.add(k, v)
                    if str(k).find('.'):
                        if not str(k).split('.')[0] in sublist:
                            sublist.append(str(k).split('.')[0])

                #quick hacks below. for proof of concept.
                casastring = ''
                for sub in sublist:
                    subpar = nodeparset.makeSubset(
                        nodeparset.fullModuleName(sub) + '.')
                    casastring = sub + '('
                    for k in subpar.keys():
                        if str(subpar[k]).find('/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]) + "'" + ','
                        elif str(subpar[k]).find('casastr/') == 0:
                            casastring += str(k) + '=' + "'" + str(
                                subpar[k]).strip('casastr/') + "'" + ','
                        elif str(subpar[k]).lower() == 'false' or str(
                                subpar[k]).lower() == 'true':
                            casastring += str(k) + '=' + str(subpar[k]) + ','
                        else:
                            # Test if int/float or list of int/float
                            try:
                                self.logger.info('value: {}'.format(subpar[k]))
                                test = float(str(subpar[k]))
                                is_int_float = True
                            except:
                                is_int_float = False
                            if is_int_float:
                                casastring += str(k) + '=' + str(
                                    subpar[k]) + ','
                            else:
                                if '[' in str(subpar[k]) or '(' in str(
                                        subpar[k]):
                                    # Check if list of int/float or strings
                                    list_vals = [
                                        f.strip() for f in str(
                                            subpar[k]).strip('[]()').split(',')
                                    ]
                                    is_int_float = True
                                    for list_val in list_vals:
                                        try:
                                            test = float(list_val)
                                        except:
                                            is_int_float = False
                                            break
                                    if is_int_float:
                                        casastring += str(k) + '=' + str(
                                            subpar[k]) + ','
                                    else:
                                        casastring += str(
                                            k) + '=' + '[{}]'.format(','.join([
                                                "'" + list_val + "'"
                                                for list_val in list_vals
                                            ])) + ','
                                else:
                                    # Simple string
                                    casastring += str(k) + '=' + "'" + str(
                                        subpar[k]) + "'" + ','

                    casastring = casastring.rstrip(',')
                    casastring += ')\n'

                # 1) return code of a casapy is not properly recognized by the pipeline
                # wrapping in shellscript works for succesful runs.
                # failed runs seem to hang the pipeline...
                # 2) casapy can not have two instances running from the same directory.
                # create tmp dirs
                casapydir = tempfile.mkdtemp(dir=work_dir)
                if casastring != '':
                    casafilename = os.path.join(
                        work_dir,
                        os.path.basename(reffile) + '.casacommand.py')
                    casacommandfile = open(casafilename, 'w')
                    casacommandfile.write(casastring)
                    casacommandfile.close()
                    args.append(casafilename)

                somename = os.path.join(
                    work_dir,
                    os.path.basename(reffile) + '.casashell.sh')
                commandstring = ''
                commandstring += executable
                for item in args:
                    if str(item).find(' ') > -1 or str(item).find('[') > -1:
                        commandstring += ' "' + item + '"'
                    else:
                        commandstring += ' ' + item

                crap = open(somename, 'w')
                crap.write('#!/bin/bash \n')
                crap.write('echo "Trying CASAPY command" \n')
                crap.write(commandstring + ' >& casa.log\n')
                crap.close()

                # file permissions
                st = os.stat(somename)
                os.chmod(
                    somename,
                    st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

            try:
                # ****************************************************************
                # Run
                cmd = [somename]
                with CatchLog4CPlus(
                        casapydir,
                        self.logger.name + "." + os.path.basename(reffile),
                        os.path.basename(executable),
                ) as logger:
                    # Catch segfaults and retry
                    catch_segfaults(cmd, casapydir, self.environment, logger)
            except CalledProcessError, err:
                # CalledProcessError isn't properly propagated by IPython
                self.logger.error(str(err))
                return 1
            except Exception, err:
                self.logger.error(str(err))
                return 1
예제 #41
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
예제 #42
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.iteritems():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in parsetdict_copy.values():
                            for k, v in parsetdict_copy.iteritems():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in job.results.items():
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in jobresultdict.items():
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
예제 #43
0
class datamapper(BaseRecipe):
    """
    Parses a list of filenames and attempts to map them to appropriate compute
    nodes (ie, which can access the files) on the LOFAR CEP cluster. Mapping
    by filename in this way is fragile, but is the best we can do for now.

    **Arguments**

    None.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help=
            "Full path (including filename) of mapfile to produce (clobbered if exists)"
        )
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="Full path (including filename) of generated mapfile")
    }

    def go(self):
        self.logger.info("Starting datamapper run")
        super(datamapper, self).go()

        #      We build lists of compute-nodes per cluster and data-per-cluster,
        #          then match them up to schedule jobs in a round-robin fashion.
        # ----------------------------------------------------------------------
        clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc"))
        if clusterdesc.subclusters:
            available_nodes = dict((cl.name, cycle(get_compute_nodes(cl)))
                                   for cl in clusterdesc.subclusters)
        else:
            available_nodes = {
                clusterdesc.name: cycle(get_compute_nodes(clusterdesc))
            }

        data = defaultdict(list)
        for filename in self.inputs['args']:
            subcluster = filename.split(os.path.sep)[2]
            try:
                host = available_nodes[subcluster].next()
            except KeyError, key:
                self.logger.error("%s is not a known cluster" % str(key))
                raise

            data[host].append(filename)

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.iteritems():
            parset.addStringVector(host, filenames)

        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0