Python ListField Examples

Programming Language: Python

Namespace/Package Name: lofarpipe.support.lofaringredient

Method/Function: ListField

Examples at hotexamples.com: 7

Python ListField - 7 examples found. These are the top rated real world Python examples of lofarpipe.support.lofaringredient.ListField extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: flag_baseline.py Project: mfkiwl/lofar-1

class flag_baseline(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Accept a list of baselines (in the format used by NDPPP logging).

    Flag them in all MeasurementSets.
    """
    inputs = {
        'baselines':
        ingredient.ListField('--baselines',
                             help="Baselines (in NDPPP format, eg 1&1)"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8)
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting flag_baseline run")
        super(flag_baseline, self).go()

        #       Serialise list of baselines to disk for compute nodes to pick up
        # ----------------------------------------------------------------------
        fd, baseline_filename = mkstemp(
            dir=self.config.get("layout", "job_directory"))
        baseline_file = os.fdopen(fd, "w")
        dump(self.inputs["baselines"], baseline_file)
        baseline_file.close()

        #                 try block ensures baseline_filename is always unlinked
        # ----------------------------------------------------------------------
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            self.logger.debug("Loading map from %s" % self.inputs['args'][0])
            data = load_data_map(self.inputs['args'][0])

            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobs = []
            for host, ms in data:
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[ms, baseline_filename]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        finally:
            os.unlink(baseline_filename)

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0

Example #2

Show file

class thumbnail_combine(BaseRecipe, RemoteCommandRecipeMixIn):
    inputs = {
        'executable':
        ingredient.ExecField('--executable',
                             default="/usr/bin/montage",
                             help="montage executable"),
        'file_pattern':
        ingredient.StringField(
            '--file-pattern',
            default="*.th.png",
            help="File search pattern (glob)",
        ),
        'input_dir':
        ingredient.StringField('--input-dir',
                               help="Directory containing input files"),
        'output_file':
        ingredient.StringField('--output-file', help="Output filename"),
        'clobber':
        ingredient.BoolField('--clobber',
                             default=False,
                             help="Clobber pre-existing output files"),
        'target_hosts':
        ingredient.ListField('--target-hosts',
                             help="Remote hosts on which to execute")
    }

    def go(self):
        self.logger.info("Starting thumbnail_combine run")
        super(thumbnail_combine, self).go()

        hosts = self.inputs['target_hosts']
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host in hosts:
            jobs.append(
                ComputeJob(host,
                           command,
                           arguments=[
                               self.inputs['executable'],
                               self.inputs['file_pattern'],
                               self.inputs['input_dir'],
                               self.inputs['output_file'],
                               self.inputs['clobber']
                           ]))
        self._schedule_jobs(jobs)

        if self.error.isSet():
            self.logger.warn("Failed compute job process detected")
            return 1
        else:
            return 0

Example #3

Show file

File: cimager.py Project: mfkiwl/lofar-1

class cimager(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Provides a convenient, pipeline-based mechanism of running the cimager on
    a dataset.

    Can ingest either an MWimager-style parset, converting to cimager format
    as required, or a cimager parset directly.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'imager_exec':
        ingredient.ExecField('--imager-exec', help="cimager executable"),
        'convert_exec':
        ingredient.ExecField('--convert-exec',
                             help="convertimagerparset executable"),
        'parset':
        ingredient.FileField(
            '--parset',
            help="Imager configuration parset (mwimager or cimager format)"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'timestep':
        ingredient.FloatField(
            '--timestep',
            help=
            "If non-zero, multiple images will be made, each using timestep seconds of data",
            default=0.0),
        'results_dir':
        ingredient.DirectoryField(
            '--results-dir',
            help="Directory in which resulting images will be placed",
        ),
        'parset_type':
        ParsetTypeField('--parset-type',
                        default="mwimager",
                        help="cimager or mwimager"),
        'makevds':
        ingredient.ExecField('--makevds',
                             help="makevds executable",
                             default="/opt/LofIm/daily/lofar/bin/makevds"),
        'combinevds':
        ingredient.ExecField('--comebinevds',
                             help="combinevds executable",
                             default="/opt/LofIm/daily/lofar/bin/combinevds")
    }

    outputs = {'images': ingredient.ListField()}

    def go(self):
        self.logger.info("Starting cimager run")
        super(cimager, self).go()
        self.outputs['images'] = []

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for cimager")
        gvds_file = os.path.join(self.config.get("layout", "job_directory"),
                                 "vds", "cimager.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = gvds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(gvds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("cimager GVDS is %s" % (gvds_file, ))

        #                            Read data for processing from the GVDS file
        # ----------------------------------------------------------------------
        parset = Parset(gvds_file)

        data = []
        for part in range(parset.getInt('NParts')):
            host = parset.getString("Part%d.FileSys" % part).split(":")[0]
            vds = parset.getString("Part%d.Name" % part)
            data.append((host, vds))

        #                                 Divide data into timesteps for imaging
        #          timesteps is a list of (start, end, results directory) tuples
        # ----------------------------------------------------------------------
        timesteps = []
        results_dir = self.inputs['results_dir']
        if self.inputs['timestep'] == 0:
            self.logger.info("No timestep specified; imaging all data")
            timesteps = [(None, None, results_dir)]
        else:
            self.logger.info("Using timestep of %s s" %
                             self.inputs['timestep'])
            gvds = get_parset(gvds_file)
            start_time = quantity(gvds['StartTime'].get()).get('s').get_value()
            end_time = quantity(gvds['EndTime'].get()).get('s').get_value()
            step = float(self.inputs['timestep'])
            while start_time < end_time:
                timesteps.append((start_time, start_time + step,
                                  os.path.join(results_dir, str(start_time))))
                start_time += step

        #                          Run each cimager process in a separate thread
        # ----------------------------------------------------------------------
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        for label, timestep in enumerate(timesteps):
            self.logger.info("Processing timestep %d" % label)
            jobs = []
            parsets = []
            start_time, end_time, resultsdir = timestep
            for host, vds in data:
                vds_data = Parset(vds)
                frequency_range = [
                    vds_data.getDoubleVector("StartFreqs")[0],
                    vds_data.getDoubleVector("EndFreqs")[-1]
                ]
                parsets.append(
                    self.__get_parset(
                        os.path.basename(
                            vds_data.getString('FileName')).split('.')[0],
                        vds_data.getString("FileName"),
                        str(frequency_range),
                        vds_data.getStringVector("Extra.FieldDirectionType")
                        [0],
                        vds_data.getStringVector("Extra.FieldDirectionRa")[0],
                        vds_data.getStringVector("Extra.FieldDirectionDec")[0],
                        'True',  # cimager bug: non-restored image unusable
                    ))
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[
                                   self.inputs['imager_exec'], vds,
                                   parsets[-1], resultsdir, start_time,
                                   end_time
                               ]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for parset in parsets:
                parset = Parset(parset)
                image_names = parset.getStringVector("Cimager.Images.Names")
                self.outputs['images'].extend(image_names)
            [os.unlink(parset) for parset in parsets]

        #                Check if we recorded a failing process before returning
        # ----------------------------------------------------------------------
        if self.error.isSet():
            self.logger.warn("Failed imager process detected")
            return 1
        else:
            return 0

    def __get_parset(self, name, dataset, frequency, ms_dir_type, ms_dir_ra,
                     ms_dir_dec, restore):
        def convert_mwimager_parset(parset):
            try:
                with patched_parset(
                        parset,
                    {
                        'dataset': dataset,
                        'Images.frequency': frequency,
                        'msDirType': ms_dir_type,
                        'msDirRa': ms_dir_ra,
                        'msDirDec': ms_dir_dec,
                        'restore':
                        restore  # cimager bug: non-restored image unusable
                    }) as cimager_parset:
                    fd, converted_parset = tempfile.mkstemp(
                        dir=self.config.get("layout", "job_directory"))
                    convert_process = spawn_process([
                        self.inputs['convert_exec'], cimager_parset,
                        converted_parset
                    ], self.logger)
                    os.close(fd)
                    sout, serr = convert_process.communicate()
                    log_process_output(self.inputs['convert_exec'], sout, serr,
                                       self.logger)
                    if convert_process.returncode != 0:
                        raise subprocess.CalledProcessError(
                            convert_process.returncode, convert_exec)
                    return converted_parset
            except OSError as e:
                self.logger.error("Failed to spawn convertimagerparset (%s)" %
                                  str(e))
                raise
            except subprocess.CalledProcessError as e:
                self.logger.error(str(e))
                raise

        def populate_cimager_parset(parset):
            input_parset = Parset(parset)
            patch_dictionary = {
                'Cimager.dataset': dataset,
                'Cimager.restore': restore
            }
            image_names = []
            for image_name in input_parset.getStringVector(
                    'Cimager.Images.Names'):
                image_names.append("%s_%s" % (image_name, name))
                subset = input_parset.makeSubset(
                    "Cimager.Images.%s" % image_name,
                    "Cimager.Images.%s" % image_names[-1])
                patch_dictionary["Cimager.Images.%s.frequency" %
                                 image_names[-1]] = frequency
                patch_dictionary["Cimager.Images.%s.direction" %
                                 image_names[-1]] = "[ %s,%s,%s ]" % (
                                     ms_dir_ra, ms_dir_dec, ms_dir_type)
                for key in subset:
                    patch_dictionary[key] = subset[key].get()
            input_parset.subtractSubset('Cimager.Images.image')
            for key in input_parset:
                patch_dictionary[key] = input_parset[key].get()
            patch_dictionary['Cimager.Images.Names'] = "[ %s ]" % ", ".join(
                image_names)
            return patch_parset(None, patch_dictionary,
                                self.config.get("layout", "job_directory"))

        try:
            if self.inputs['parset_type'] == "mwimager":
                cimager_parset = convert_mwimager_parset(self.inputs['parset'])
            elif self.inputs['parset_type'] == "cimager":
                cimager_parset = populate_cimager_parset(self.inputs['parset'])
        except Exception as e:
            self.logger.exception("Failed to generate imager parset")
            raise

        return cimager_parset

Example #4

Show file

File: dppp.py Project: mfkiwl/LOFAR

class dppp(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Runs ``NDPPP`` on a number of MeasurementSets. This is used for averaging,
    and/or flagging, and/or demixing of data.

    1. Load input data files
    2. Load parmdb and sourcedb
    3. Call the node side of the recipe
    4. Create mapfile with successful noderecipe runs

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. Optionally, a mapfile with target output locations.

    """
    inputs = {
        'parset':
        ingredient.FileField(
            '-p',
            '--parset',
            help="The full path to a DPPP configuration parset. The ``msin`` "
            "and ``msout`` keys will be added by this recipe"),
        'executable':
        ingredient.ExecField(
            '--executable',
            help="The full path to the relevant DPPP executable"),
        'suffix':
        ingredient.StringField(
            '--suffix',
            default=".dppp",
            help="Added to the input filename to generate the output filename"
        ),
        'working_directory':
        ingredient.StringField(
            '-w',
            '--working-directory',
            help="Working directory used on output nodes. Results will be "
            "written here"),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Name of the output mapfile containing the names of the "
            "MS-files produced by the DPPP recipe"),
        'parmdb_mapfile':
        ingredient.StringField(
            '--parmdb-mapfile',
            optional=True,
            help="Path to mapfile containing the parmdb files "
            "(used by demixing step only)"),
        'sourcedb_mapfile':
        ingredient.StringField(
            '--sourcedb-mapfile',
            optional=True,
            help="Path to mapfile containing the sourcedb files "
            "(used by demixing step only)"),
        'demix_always':
        ingredient.ListField(
            '--demix-always',
            help="List of sources that must always be demixed "
            "(used by demixing step only)",
            default=[]),
        'demix_if_needed':
        ingredient.ListField(
            '--demix-if-needed',
            help="List of sources that will only be demixed if needed, "
            "based on some heuristics (used by demixing step only)",
            default=[]),
        # NB times are read from vds file as string
        'data_start_time':
        ingredient.StringField(
            '--data-start-time',
            default="",
            help="Start time to be passed to DPPP; used to pad data"),
        'data_end_time':
        ingredient.StringField(
            '--data-end-time',
            default="",
            help="End time to be passed to DPPP; used to pad data"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            default=8,
            help="Maximum number of simultaneous processes per output node"),
        'nthreads':
        ingredient.IntField('--nthreads',
                            default=2,
                            help="Number of threads per (N)DPPP process"),
        'clobber':
        ingredient.BoolField(
            '--clobber',
            default=False,
            help="If ``True``, pre-existing output files will be removed "
            "before processing starts. If ``False``, the pipeline will "
            "abort if files already exist with the appropriate output "
            "filenames")
        # Keys that are present in the original demixing recipe.
        # Don't know yet if we still need them.
        #        'timestep': ingredient.IntField(
        #            '--timestep',
        #            help="Time step for averaging",
        #            default=10
        #        ),
        #        'freqstep': ingredient.IntField(
        #            '--freqstep',
        #            help="Frequency step for averaging",
        #            default=60
        #        ),
        #        'half_window': ingredient.IntField(
        #            '--half-window',
        #            help="Window size of median filter",
        #            default=20
        #        ),
        #        'threshold': ingredient.FloatField(
        #            '--threshold',
        #            help="Solutions above/below threshold*rms are smoothed",
        #            default=2.5
        #        ),
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="The full path to a mapfile describing the processed data"
            #        ),
            #        'fullyflagged': ingredient.ListField(
            #            help="A list of all baselines which were completely flagged in any "
            #                 "of the input MeasurementSets"
        )
    }

    def go(self):
        self.logger.info("Starting DPPP run")
        super(dppp, self).go()

        #        #                Keep track of "Total flagged" messages in the DPPP logs
        #        # ----------------------------------------------------------------------
        #        self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines"

        # *********************************************************************
        # 1. load input data file, validate output vs the input location if
        #    output locations are provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix'])

        # ********************************************************************
        # 2. Load parmdb and sourcedb
        # Load parmdb-mapfile, if one was given.
        if self.inputs.has_key('parmdb_mapfile'):
            self.logger.debug("Loading parmdb mapfile: %s" %
                              self.inputs['parmdb_mapfile'])
            parmdbdata = DataMap.load(self.inputs['parmdb_mapfile'])
        else:
            parmdbdata = copy.deepcopy(indata)
            for item in parmdbdata:
                item.file = ''

        # Load sourcedb-mapfile, if one was given.
        if self.inputs.has_key('sourcedb_mapfile'):
            self.logger.debug("Loading sourcedb mapfile: %s" %
                              self.inputs['sourcedb_mapfile'])
            sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile'])
        else:
            sourcedbdata = copy.deepcopy(indata)
            for item in sourcedbdata:
                item.file = ''

        # Validate all the data maps.
        if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata):
            self.logger.error("Validation of data mapfiles failed!")
            return 1

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata):
            w.skip = x.skip = y.skip = z.skip = (w.skip or x.skip or y.skip
                                                 or z.skip)

        # ********************************************************************
        # 3. Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp, pdb, sdb in zip(indata, outdata, parmdbdata,
                                       sourcedbdata):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, pdb.file, sdb.file,
                               self.inputs['parset'],
                               self.inputs['executable'], self.environment,
                               self.inputs['demix_always'],
                               self.inputs['demix_if_needed'],
                               self.inputs['data_start_time'],
                               self.inputs['data_end_time'],
                               self.inputs['nthreads'], self.inputs['clobber']
                           ],
                           resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True


#        # *********************************************************************
#        # 4. parse logfile for fully flagged baselines
#        matches = self.logger.searchpatterns["fullyflagged"].results
#        self.logger.searchpatterns.clear() # finished searching
#        stripchars = "".join(set("Fully flagged baselines: "))
#        baselinecounter = defaultdict(lambda: 0)
#        for match in matches:
#            for pair in (
#                pair.strip(stripchars) for pair in match.getMessage().split(";")
#            ):
#                baselinecounter[pair] += 1
#        self.outputs['fullyflagged'] = baselinecounter.keys()

# *********************************************************************
# 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing data map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0

Example #5

Show file

File: vdsreader.py Project: mfkiwl/lofar-1

class vdsreader(BaseRecipe):
    """
    Read a GVDS file and return a list of the MS filenames referenced therein
    together with selected metadata.
    
    This recipe performs it's functionality at the master side of the recipe:
    
    1. Open the gvds file as a parameterset
    2. Convert all part FileNames to mss
    3. Parse start and end time and pointing information

    **no command line arguments:**

    """
    inputs = {
        'gvds': ingredient.FileField(
            '-g', '--gvds',
            help="GVDS file to process"
        )
    }

    outputs = {
        'data': ingredient.ListField(help="List of MeasurementSet paths"),
        'start_time': ingredient.StringField(help="Start time of observation"),
        'end_time': ingredient.StringField(help="End time of observation"),
        'pointing': ingredient.DictField(help="Observation pointing direction")
    }

    def go(self):
        self.logger.info("Starting vdsreader run")
        super(vdsreader, self).go()

        # *********************************************************************
        # 1. Open the gvds file as a parameterset
        try:
            gvds = parameterset(self.inputs['gvds'])
        except:
            self.logger.error("Unable to read G(V)DS file")
            raise

        self.logger.info("Building list of measurementsets")

        # **********************************************************************
        # 2. convert al partx.FileName values to ms
        ms_names = [
            gvds.getString("Part%d.FileName" % (part_no,))
            for part_no in range(gvds.getInt("NParts"))
        ]
        self.logger.debug(ms_names)

        self.outputs['data'] = ms_names

        # **********************************************************************\
        # 3. parse start and end time and pointing information
        try:
            self.outputs['start_time'] = gvds.getString('StartTime')
            self.outputs['end_time'] = gvds.getString('EndTime')
        except:
            self.logger.warn("Failed to read start/end time from GVDS file")
        try:
            self.outputs['pointing'] = {
                'type': gvds.getStringVector('Extra.FieldDirectionType')[0],
                'dec': gvds.getStringVector('Extra.FieldDirectionDec')[0],
                'ra': gvds.getStringVector('Extra.FieldDirectionRa')[0]
            }
        except:
            self.logger.warn("Failed to read pointing information from GVDS file")
        return 0

Example #6

Show file

File: executable_args.py Project: mfkiwl/lofar-1

class executable_args(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Basic script for running an executable with arguments.
    Passing a mapfile along so the executable can process MS.
    """
    inputs = {
        'executable': ingredient.ExecField(
            '--executable',
            help="The full path to the relevant executable",
            optional=True
        ),
        'arguments': ingredient.ListField(
            '-a', '--arguments',
            help="List of arguments for the executable. Will be added as ./exe arg0 arg1...",
            default='',
            optional=True
        ),
        'nthreads': ingredient.IntField(
            '--nthreads',
            default=8,
            help="Number of threads per process"
        ),
        'nodescript': ingredient.StringField(
            '--nodescript',
            help="Name of the node script to execute",
            default='executable_args',
            optional=True
        ),
        'parset': ingredient.FileField(
            '-p', '--parset',
            help="Path to the arguments for this executable. Will be converted to --key=value",
            optional=True
        ),
        'inputkey': ingredient.StringField(
            '-i', '--inputkey',
            help="Parset key that the executable will recognize as key for inputfile",
            default='',
            optional=True
        ),
        'outputkey': ingredient.StringField(
            '-0', '--outputkey',
            help="Parset key that the executable will recognize as key for outputfile",
            default='',
            optional=True
        ),
        'inputkeys': ingredient.ListField(
            '--inputkeys',
            help="List of parset keys that the executable will recognize as key for inputfile",
            default=[],
            optional=True
        ),
        'outputkeys': ingredient.ListField(
            '--outputkeys',
            help="List of parset keys that the executable will recognize as key for outputfile",
            default=[],
            optional=True
        ),
        'mapfiles_in': ingredient.ListField(
            '--mapfiles-in',
            help="List of the input mapfiles containing the names of the "
                 "data to run the recipe on",
            default=[],
            optional=True
        ),
        'mapfiles_as_string': ingredient.ListField(
            '--mapfiles_as_string',
            help="List of the input mapfiles to ignore and just use the name string instead.",
            default=[],
            optional=True
        ),
        'mapfiles_out': ingredient.ListField(
            '--mapfiles-out',
            help="List of the output mapfiles containing the names of the "
                 "data produced by the recipe",
            default=[],
            optional=True
        ),
        'mapfile_in': ingredient.StringField(
            '--mapfile-in',
            help="Name of the input mapfile containing the names of the "
                 "MS-files to run the recipe",
            default='',
            optional=True
        ),
        'mapfile_out': ingredient.StringField(
            '--mapfile-out',
            help="Name of the output mapfile containing the names of the "
                 "MS-files produced by the recipe",
            default='',
            optional=True
        ),
        'skip_infile': ingredient.BoolField(
            '--skip-infile',
            help="Dont give the input file to the executable.",
            default=False,
            optional=True
        ),
        'skip_outfile': ingredient.BoolField(
            '--skip-outfile',
            help="Dont produce an output file",
            default=False,
            optional=True
        ),
        'inplace': ingredient.BoolField(
            '--inplace',
            help="Manipulate input files inplace",
            default=False,
            optional=True
        ),
        'outputsuffixes': ingredient.ListField(
            '--outputsuffixes',
            help="Suffixes for the outputfiles",
            default=[]
        ),
        'parsetasfile': ingredient.BoolField(
            '--parsetasfile',
            help="Will the argument be a parsetfile or --opt=var",
            default=False
        ),
        'args_format': ingredient.StringField(
            '--args_format',
            help="Will change the format of the arguments. Standard definitions are...dont know yet",
            default='gnu'
        ),
        'args_format_argument': ingredient.StringField(
            '--args_format_argument',
            help="Will change the format of the arguments without option fields.",
            default=''
        ),
        'args_format_option': ingredient.StringField(
            '--args_format_option',
            help="Will change the format of option fields.",
            default='-'
        ),
        'args_format_longoption': ingredient.StringField(
            '--args_format_longoption',
            help="Will change the format of long option fields. Typically '--'",
            default='--'
        ),
        'args_format_option_argument': ingredient.StringField(
            '--args_format_option_argument',
            help="Will change the format of the arguments without option fields.",
            default='='
        ),
        'max_per_node': ingredient.IntField(
            '--max_per_node',
            help="Sets the number of jobs per node",
            default=0
        ),
        'stepname': ingredient.StringField(
            '--stepname',
            help="stepname for individual naming of results",
            optional=True
        ),
        'environment': ingredient.DictField(
            '--environment',
            help="Update environment variables for this step.",
            optional=True
        ),
        'error_tolerance': ingredient.BoolField(
            '--error_tolerance',
            help="Controls if the program exits on the first error or continues with succeeded MS.",
            default=True,
            optional=True
        )
    }

    outputs = {
        'mapfile': ingredient.FileField(
            help="The full path to a mapfile describing the processed data"
        )
    }

    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0

Example #7

Show file

class demixing(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Run the demixer on the MS's on the compute nodes.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Name of the output mapfile containing the names of the "
            "MS-files produced by the demixing recipe"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here"),
        'initscript':
        ingredient.FileField(
            '--initscript',
            help="The full path to an (Bourne) shell script which will "
            "intialise the environment (ie, ``lofarinit.sh``)"),
        'demix_parset_dir':
        ingredient.DirectoryField(
            '--demix-parset-dir',
            dest='demixdir',
            help="Directory containing the demixing parset-files",
        ),
        'db_host':
        ingredient.StringField(
            '--db-host',
            dest="db_host",
            help="Database host with optional port (e.g. ldb001)"),
        'skymodel':
        ingredient.FileField(
            '--skymodel',
            help="File containing the sky model to use",
        ),
        'demix_sources':
        ingredient.ListField(
            '--demix-sources',
            dest='remove',
            help="List of sources to remove e.g. 'CygA, CasA'; "
            "will be determined automatically if not specified.",
            default=[]),
        'ms_target':
        ingredient.StringField(
            '--ms-target',
            dest='target',
            help="Substring in the output MS name that replaces the "
            "substring 'uv' (default: 'target')",
            default="target"),
        'timestep':
        ingredient.IntField('--timestep',
                            help="Time step for averaging",
                            default=10),
        'freqstep':
        ingredient.IntField('--freqstep',
                            help="Frequency step for averaging",
                            default=60),
        'half_window':
        ingredient.IntField('--half-window',
                            help="Window size of median filter",
                            default=20),
        'threshold':
        ingredient.FloatField(
            '--threshold',
            help="Solutions above/below threshold*rms are smoothed",
            default=2.5),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=1)
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs['working_directory'],
                               self.inputs['job_name'])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [(host,
                        os.path.join(
                            job_dir,
                            os.path.basename(infile).replace(
                                '_uv',
                                '_' + self.inputs['ms_target'] + '_sub')))
                       for host, infile in indata]

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile, job_dir, self.inputs['initscript'],
                        self.inputs['demix_sources'], self.inputs['ms_target'],
                        self.config.get('cluster', 'clusterdesc'),
                        self.inputs['timestep'], self.inputs['freqstep'],
                        self.inputs['half_window'], self.inputs['threshold'],
                        self.inputs['demix_parset_dir'],
                        self.inputs['skymodel'], self.inputs['db_host']
                    ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs['mapfile'])
            store_data_map(self.inputs['mapfile'], outdata)
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0