Exemple #1
0
class flag_baseline(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Accept a list of baselines (in the format used by NDPPP logging).

    Flag them in all MeasurementSets.
    """
    inputs = {
        'baselines':
        ingredient.ListField('--baselines',
                             help="Baselines (in NDPPP format, eg 1&1)"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8)
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting flag_baseline run")
        super(flag_baseline, self).go()

        #       Serialise list of baselines to disk for compute nodes to pick up
        # ----------------------------------------------------------------------
        fd, baseline_filename = mkstemp(
            dir=self.config.get("layout", "job_directory"))
        baseline_file = os.fdopen(fd, "w")
        dump(self.inputs["baselines"], baseline_file)
        baseline_file.close()

        #                 try block ensures baseline_filename is always unlinked
        # ----------------------------------------------------------------------
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            self.logger.debug("Loading map from %s" % self.inputs['args'][0])
            data = load_data_map(self.inputs['args'][0])

            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobs = []
            for host, ms in data:
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[ms, baseline_filename]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        finally:
            os.unlink(baseline_filename)

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Exemple #2
0
class storagemapper(BaseRecipe):
    """
    Parses a list of filenames and generates a mapfile suitable for processing
    on storage nodes.

    **Arguments**

    None.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help=
            "Full path (including filename) of mapfile to produce (clobbered if exists)"
        )
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="Full path (including filename) of generated mapfile")
    }

    def go(self):
        self.logger.info("Starting storagemapper run")
        super(storagemapper, self).go()

        #                          We read the storage node name out of the path
        #     and append the local filename (ie, on the storage node) to the map
        # ----------------------------------------------------------------------
        data = defaultdict(list)
        for filename in self.inputs['args']:
            host = filename.split(os.path.sep)[3]
            data[host].append(filename.split(host)[-1])

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.iteritems():
            parset.addStringVector(host, filenames)

        create_directory(os.path.dirname(self.inputs['mapfile']))
        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
class make_flaggable(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Update the storage manager on an MS to make the flag column writable.
    """
    inputs = {
        'makeflagwritable':
        ingredient.ExecField(
            '--makeFLAGwritable',
            help="Path to makeFLAGwritable script",
            default='/opt/LofIm/daily/lofar/bin/makeFLAGwritable'),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8)
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting make_flaggable run")
        super(make_flaggable, self).go()

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(
                ComputeJob(host,
                           command,
                           arguments=[ms, self.inputs['makeflagwritable']]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Exemple #4
0
class new_bbs(BaseRecipe):
    """
    **This bbs recipe still uses the oldstyle bbs with global control**
    **New versions will have stand alone capability**

    The bbs recipe coordinates running BBS on a group of MeasurementSets. It
    runs both GlobalControl and KernelControl; as yet, SolverControl has not
    been integrated.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'control_exec': ingredient.ExecField(
            '--control-exec',
            dest="control_exec",
            help="BBS Control executable"
        ),
        'kernel_exec': ingredient.ExecField(
            '--kernel-exec',
            dest="kernel_exec",
            help="BBS Kernel executable"
        ),
        'parset': ingredient.FileField(
            '-p', '--parset',
            dest="parset",
            help="BBS configuration parset"
        ),
        'db_key': ingredient.StringField(
            '--db-key',
            dest="db_key",
            help="Key to identify BBS session"
        ),
        'db_host': ingredient.StringField(
            '--db-host',
            dest="db_host",
            help="Database host with optional port (e.g. ldb001:5432)"
        ),
        'db_user': ingredient.StringField(
            '--db-user',
            dest="db_user",
            help="Database user"
        ),
        'db_name': ingredient.StringField(
            '--db-name',
            dest="db_name",
            help="Database name"
        ),
        'instrument_mapfile': ingredient.FileField(
            '--instrument-mapfile',
            help="Full path to the mapfile containing the names of the "
                 "instrument model files generated by the `parmdb` recipe"
        ),
        'sky_mapfile': ingredient.FileField(
            '--sky-mapfile',
            help="Full path to the mapfile containing the names of the "
                 "sky model files generated by the `sourcedb` recipe"
        ),
        'data_mapfile': ingredient.StringField(
            '--data-mapfile',
            help="Full path to the mapfile containing the names of the "
                 "data files that were processed by BBS (clobbered if exists)"
        ),
        'gvds': ingredient.StringField(
            '-g', '--gvds',
            help="Path for output GVDS file"
        )
    }
    outputs = {
        'mapfile': ingredient.FileField(
            help="Full path to a mapfile describing the processed data"
        )
    }

    def __init__(self):
        super(new_bbs, self).__init__()
        self.bbs_map = list()
        self.parset = parameterset()
        self.killswitch = threading.Event()

    def _set_input(self, in_key, ps_key):
        """
        Set the input-key `in_key` to the value of `ps_key` in the parset, if
        that is defined.
        """
        try:
            self.inputs[in_key] = self.parset.getString(ps_key)
        except RuntimeError, exceptionobject:
            self.logger.warn(str(exceptionobject))
Exemple #5
0
class bbs_reducer(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Run bbs-reducer in a non-distributed way on a number of MeasurementSets.
    
    **Arguments**
    
    A mapfile describing the data to be processed.
    """
    inputs = {
        'parset':
        ingredient.FileField('-p', '--parset',
                             help="BBS configuration parset"),
        'nthreads':
        ingredient.IntField('--nthreads',
                            default=8,
                            help="Number of threads per process"),
        'executable':
        ingredient.ExecField(
            '--executable',
            help="The full path to the BBS-reducer executable"),
        'instrument_mapfile':
        ingredient.FileField(
            '--instrument-mapfile',
            help="Full path to the mapfile containing the names of the "
            "instrument model files generated by the `parmdb` recipe"),
        'sky_mapfile':
        ingredient.FileField(
            '--sky-mapfile',
            help="Full path to the mapfile containing the names of the "
            "sky model files generated by the `sourcedb` recipe"),
        'data_mapfile':
        ingredient.StringField(
            '--data-mapfile',
            help="Full path to the mapfile that will contain the names of the "
            "data files that were processed by BBS"),
    }
    outputs = {
        'data_mapfile':
        ingredient.FileField(
            help="Full path to a mapfile describing the processed data"),
        'instrument_mapfile':
        ingredient.FileField(
            help="Full path to the (updated) mapfile containing the names of "
            "the instrument model files that were processed by BBS")
    }

    def __init__(self):
        """
        Initialize our data members.
        """
        super(bbs_reducer, self).__init__()
        self.bbs_map = list()
        self.jobs = list()
        self.data_map = DataMap()
        self.inst_map = DataMap()
        self.sky_map = DataMap()

    def _load_mapfiles(self):
        """
        Load data map file, instrument map file, and sky map file.
        Update the 'skip' fields in these map files: if 'skip' is True in any
        of the maps, then 'skip' must be set to True in all maps.
        """
        self.logger.debug(
            "Loading map files:"
            "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" %
            (self.inputs['args'][0], self.inputs['instrument_mapfile'],
             self.inputs['sky_mapfile']))
        self.data_map = DataMap.load(self.inputs['args'][0])
        self.inst_map = DataMap.load(self.inputs['instrument_mapfile'])
        self.sky_map = DataMap.load(self.inputs['sky_mapfile'])

        if not validate_data_maps(self.data_map, self.inst_map, self.sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Update the skip fields of the three maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y, z in zip(self.data_map, self.inst_map, self.sky_map):
            x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip)

        return True

    def _run_jobs(self):
        """
        Create and schedule the compute jobs
        """
        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        self.data_map.iterator = DataMap.SkipIterator
        self.inst_map.iterator = DataMap.SkipIterator
        self.sky_map.iterator = DataMap.SkipIterator
        for data, inst, sky in zip(self.data_map, self.inst_map, self.sky_map):
            self.jobs.append(
                ComputeJob(data.host,
                           command,
                           arguments=[(data.file, inst.file, sky.file),
                                      self.inputs['executable'],
                                      self.inputs['parset'], self.environment],
                           resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(self.jobs)

    def _update_mapfiles(self):
        """
        Update the data- and instrument- map files, taking into account any
        failed runs.
        """
        self.logger.debug(
            "Updating map files:"
            "\n\tdata map: %s\n\tinstrument map: %s" %
            (self.inputs['args'][0], self.inputs['instrument_mapfile']))
        for job, data, inst in zip(self.jobs, self.data_map, self.inst_map):
            if job.results['returncode'] != 0:
                data.skip = inst.skip = True
        self.data_map.save(self.inputs['data_mapfile'])
        self.inst_map.save(self.inputs['instrument_mapfile'])
        self.outputs['data_mapfile'] = self.inputs['args'][0]
        self.outputs['instrument_mapfile'] = self.inputs['instrument_mapfile']

    def _handle_errors(self):
        """
        Handle errors from the node scripts. If all jobs returned a (fatal)
        error, then the recipe should abort; return 1.
        Otherwise it should report that some jobs failed and continue with the
        remaining, successfully processed Measurement Set files; return 0.
        """
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in self.jobs):
                self.logger.error("All BBS-reducer jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn("Some BBS-reducer jobs failed, "
                                 "continuing with succeeded runs")
        return 0

    def go(self):
        """
        This it the actual workhorse. It is called by the framework. We pass
        three arguments to the node script: a tuple of file names (MS-file,
        parmdb-file, sourcedb-file), the path to the BBS-reducer executable,
        and the environment variables that are stored in self.environment.
        """
        self.logger.info("Starting BBS-reducer run")
        super(bbs_reducer, self).go()

        # Load the required map-files.
        if not self._load_mapfiles():
            return 1

        # Create and schedule the compute jobs
        self._run_jobs()

        # Update the instrument map file, taking failed runs into account.
        self._update_mapfiles()

        # Handle errors, if any.
        return self._handle_errors()
Exemple #6
0
class dppp(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Runs ``NDPPP`` on a number of MeasurementSets. This is used for averaging,
    and/or flagging, and/or demixing of data.

    1. Load input data files
    2. Load parmdb and sourcedb
    3. Call the node side of the recipe
    4. Create mapfile with successful noderecipe runs

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. Optionally, a mapfile with target output locations.

    """
    inputs = {
        'parset':
        ingredient.FileField(
            '-p',
            '--parset',
            help="The full path to a DPPP configuration parset. The ``msin`` "
            "and ``msout`` keys will be added by this recipe"),
        'executable':
        ingredient.ExecField(
            '--executable',
            help="The full path to the relevant DPPP executable"),
        'suffix':
        ingredient.StringField(
            '--suffix',
            default=".dppp",
            help="Added to the input filename to generate the output filename"
        ),
        'working_directory':
        ingredient.StringField(
            '-w',
            '--working-directory',
            help="Working directory used on output nodes. Results will be "
            "written here"),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Name of the output mapfile containing the names of the "
            "MS-files produced by the DPPP recipe"),
        'parmdb_mapfile':
        ingredient.StringField(
            '--parmdb-mapfile',
            optional=True,
            help="Path to mapfile containing the parmdb files "
            "(used by demixing step only)"),
        'sourcedb_mapfile':
        ingredient.StringField(
            '--sourcedb-mapfile',
            optional=True,
            help="Path to mapfile containing the sourcedb files "
            "(used by demixing step only)"),
        'demix_always':
        ingredient.ListField(
            '--demix-always',
            help="List of sources that must always be demixed "
            "(used by demixing step only)",
            default=[]),
        'demix_if_needed':
        ingredient.ListField(
            '--demix-if-needed',
            help="List of sources that will only be demixed if needed, "
            "based on some heuristics (used by demixing step only)",
            default=[]),
        # NB times are read from vds file as string
        'data_start_time':
        ingredient.StringField(
            '--data-start-time',
            default="",
            help="Start time to be passed to DPPP; used to pad data"),
        'data_end_time':
        ingredient.StringField(
            '--data-end-time',
            default="",
            help="End time to be passed to DPPP; used to pad data"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            default=8,
            help="Maximum number of simultaneous processes per output node"),
        'nthreads':
        ingredient.IntField('--nthreads',
                            default=2,
                            help="Number of threads per (N)DPPP process"),
        'clobber':
        ingredient.BoolField(
            '--clobber',
            default=False,
            help="If ``True``, pre-existing output files will be removed "
            "before processing starts. If ``False``, the pipeline will "
            "abort if files already exist with the appropriate output "
            "filenames")
        # Keys that are present in the original demixing recipe.
        # Don't know yet if we still need them.
        #        'timestep': ingredient.IntField(
        #            '--timestep',
        #            help="Time step for averaging",
        #            default=10
        #        ),
        #        'freqstep': ingredient.IntField(
        #            '--freqstep',
        #            help="Frequency step for averaging",
        #            default=60
        #        ),
        #        'half_window': ingredient.IntField(
        #            '--half-window',
        #            help="Window size of median filter",
        #            default=20
        #        ),
        #        'threshold': ingredient.FloatField(
        #            '--threshold',
        #            help="Solutions above/below threshold*rms are smoothed",
        #            default=2.5
        #        ),
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="The full path to a mapfile describing the processed data"
            #        ),
            #        'fullyflagged': ingredient.ListField(
            #            help="A list of all baselines which were completely flagged in any "
            #                 "of the input MeasurementSets"
        )
    }

    def go(self):
        self.logger.info("Starting DPPP run")
        super(dppp, self).go()

        #        #                Keep track of "Total flagged" messages in the DPPP logs
        #        # ----------------------------------------------------------------------
        #        self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines"

        # *********************************************************************
        # 1. load input data file, validate output vs the input location if
        #    output locations are provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix'])

        # ********************************************************************
        # 2. Load parmdb and sourcedb
        # Load parmdb-mapfile, if one was given.
        if self.inputs.has_key('parmdb_mapfile'):
            self.logger.debug("Loading parmdb mapfile: %s" %
                              self.inputs['parmdb_mapfile'])
            parmdbdata = DataMap.load(self.inputs['parmdb_mapfile'])
        else:
            parmdbdata = copy.deepcopy(indata)
            for item in parmdbdata:
                item.file = ''

        # Load sourcedb-mapfile, if one was given.
        if self.inputs.has_key('sourcedb_mapfile'):
            self.logger.debug("Loading sourcedb mapfile: %s" %
                              self.inputs['sourcedb_mapfile'])
            sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile'])
        else:
            sourcedbdata = copy.deepcopy(indata)
            for item in sourcedbdata:
                item.file = ''

        # Validate all the data maps.
        if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata):
            self.logger.error("Validation of data mapfiles failed!")
            return 1

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata):
            w.skip = x.skip = y.skip = z.skip = (w.skip or x.skip or y.skip
                                                 or z.skip)

        # ********************************************************************
        # 3. Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp, pdb, sdb in zip(indata, outdata, parmdbdata,
                                       sourcedbdata):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, pdb.file, sdb.file,
                               self.inputs['parset'],
                               self.inputs['executable'], self.environment,
                               self.inputs['demix_always'],
                               self.inputs['demix_if_needed'],
                               self.inputs['data_start_time'],
                               self.inputs['data_end_time'],
                               self.inputs['nthreads'], self.inputs['clobber']
                           ],
                           resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True


#        # *********************************************************************
#        # 4. parse logfile for fully flagged baselines
#        matches = self.logger.searchpatterns["fullyflagged"].results
#        self.logger.searchpatterns.clear() # finished searching
#        stripchars = "".join(set("Fully flagged baselines: "))
#        baselinecounter = defaultdict(lambda: 0)
#        for match in matches:
#            for pair in (
#                pair.strip(stripchars) for pair in match.getMessage().split(";")
#            ):
#                baselinecounter[pair] += 1
#        self.outputs['fullyflagged'] = baselinecounter.keys()

# *********************************************************************
# 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing data map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #7
0
class selfcal_finalize(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    The Imager_finalizer performs a number of steps needed for integrating the
    msss_imager_pipeline in the LOFAR framework: It places the image on the
    output location in the correcy image type (hdf5).
    It also adds some meta data collected from the individual measurement sets
    and the found data.

    This recipe does not have positional commandline arguments
    """
    inputs = {
        'awimager_output_map':
        ingredient.FileField(
            '--awimager-output-mapfile',
            help="""Mapfile containing (host, path) pairs of created sky
                   images """),
        'ms_per_image_map':
        ingredient.FileField(
            '--ms-per-image-map',
            help='''Mapfile containing (host, path) pairs of mapfiles used
            to create image on that node'''),
        'sourcelist_map':
        ingredient.FileField(
            '--sourcelist-map',
            help='''mapfile containing (host, path) pairs to a list of sources
            found in the image'''),
        'sourcedb_map':
        ingredient.FileField(
            '--sourcedb_map',
            help='''mapfile containing (host, path) pairs to a db of sources
            found in the image'''),
        'target_mapfile':
        ingredient.FileField(
            '--target-mapfile',
            help="Mapfile containing (host, path) pairs to the concatenated and"
            "combined measurement set, the source for the actual sky image"),
        'minbaseline':
        ingredient.FloatField(
            '--minbaseline',
            help='''Minimum length of the baseline used for the images'''),
        'maxbaseline':
        ingredient.FloatField(
            '--maxbaseline',
            help='''Maximum length of the baseline used for the images'''),
        'output_image_mapfile':
        ingredient.FileField(
            '--output-image-mapfile',
            help='''mapfile containing (host, path) pairs with the final
            output image (hdf5) location'''),
        'processed_ms_dir':
        ingredient.StringField(
            '--processed-ms-dir',
            help='''Path to directory for processed measurment sets'''),
        'fillrootimagegroup_exec':
        ingredient.ExecField(
            '--fillrootimagegroup_exec',
            help='''Full path to the fillRootImageGroup executable'''),
        'placed_image_mapfile':
        ingredient.FileField(
            '--placed-image-mapfile',
            help="location of mapfile with processed and correctly placed,"
            " hdf5 images"),
        'placed_correlated_mapfile':
        ingredient.FileField(
            '--placed-correlated-mapfile',
            help="location of mapfile with processedd and correctly placed,"
            " correlated ms"),
        'concat_ms_map_path':
        ingredient.FileField('--concat-ms-map-path',
                             help="Output of the concat MS file"),
        'output_correlated_mapfile':
        ingredient.FileField(
            '--output-correlated-mapfile',
            help="location of mapfile where output paths for mss are located"),
        'msselect_executable':
        ingredient.ExecField('--msselect-executable',
                             help="The full path to the msselect executable "),
    }

    outputs = {
        'placed_image_mapfile': ingredient.StringField(),
        'placed_correlated_mapfile': ingredient.StringField(),
    }

    def go(self):
        """
        Steps:

        1. Load and validate the input datamaps
        2. Run the node parts of the recipe
        3. Validate node output and format the recipe output
        """
        super(selfcal_finalize, self).go()
        # *********************************************************************
        # 1. Load the datamaps
        awimager_output_map = DataMap.load(self.inputs["awimager_output_map"])
        ms_per_image_map = DataMap.load(self.inputs["ms_per_image_map"])
        sourcelist_map = DataMap.load(self.inputs["sourcelist_map"])
        sourcedb_map = DataMap.load(self.inputs["sourcedb_map"])
        target_mapfile = DataMap.load(self.inputs["target_mapfile"])
        output_image_mapfile = DataMap.load(
            self.inputs["output_image_mapfile"])
        concat_ms_mapfile = DataMap.load(self.inputs["concat_ms_map_path"])
        output_correlated_map = DataMap.load(
            self.inputs["output_correlated_mapfile"])
        processed_ms_dir = self.inputs["processed_ms_dir"]
        fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"]

        # Align the skip fields
        align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map,
                        target_mapfile, output_image_mapfile, sourcedb_map,
                        concat_ms_mapfile, output_correlated_map)

        # Set the correct iterator
        sourcelist_map.iterator = awimager_output_map.iterator = \
            ms_per_image_map.iterator = target_mapfile.iterator = \
            output_image_mapfile.iterator = sourcedb_map.iterator = \
            concat_ms_mapfile.iterator = output_correlated_map.iterator = \
            DataMap.SkipIterator

        # *********************************************************************
        # 2. Run the node side of the recupe
        command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []
        for (awimager_output_item, ms_per_image_item, sourcelist_item,
             target_item, output_image_item, sourcedb_item, concat_ms_item,
             correlated_item) in zip(awimager_output_map, ms_per_image_map,
                                     sourcelist_map, target_mapfile,
                                     output_image_mapfile, sourcedb_map,
                                     concat_ms_mapfile, output_correlated_map):
            # collect the files as argument
            arguments = [
                awimager_output_item.file,
                ms_per_image_item.file,
                sourcelist_item.file,
                target_item.file,
                output_image_item.file,
                self.inputs["minbaseline"],
                self.inputs["maxbaseline"],
                processed_ms_dir,
                fillrootimagegroup_exec,
                self.environment,
                sourcedb_item.file,
                concat_ms_item.file,
                correlated_item.file,
                self.inputs["msselect_executable"],
            ]

            self.logger.info(
                "Starting finalize with the folowing args: {0}".format(
                    arguments))
            jobs.append(ComputeJob(target_item.host, command, arguments))

        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Validate the performance of the node script and assign output
        succesful_run = False
        for (job, output_image_item,
             output_correlated_item) in zip(jobs, output_image_mapfile,
                                            output_correlated_map):
            if not "hdf5" in job.results:
                # If the output failed set the skip to True
                output_image_item.skip = True
                output_correlated_item = True
            else:
                succesful_run = True
                # signal that we have at least a single run finished ok.
                # No need to set skip in this case

        if not succesful_run:
            self.logger.warn("Not a single finalizer succeeded")
            return 1

        # Save the location of the output images
        output_image_mapfile.save(self.inputs['placed_image_mapfile'])
        self.logger.debug(
            "Wrote mapfile containing placed hdf5 images: {0}".format(
                self.inputs['placed_image_mapfile']))

        # save the location of measurements sets
        output_correlated_map.save(self.inputs['placed_correlated_mapfile'])
        self.logger.debug("Wrote mapfile containing placed mss: {0}".format(
            self.inputs['placed_correlated_mapfile']))

        self.outputs["placed_image_mapfile"] = self.inputs[
            'placed_image_mapfile']
        self.outputs["placed_correlated_mapfile"] = self.inputs[
            'placed_correlated_mapfile']

        return 0
Exemple #8
0
class imager_bbs(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Imager_bbs master performs a bbs run based on the supplied parset it is a
    shallow wrapper around bbs. Additional functionality compared to the default
    bbs recipe is the capability to add an id that allows multiple
    runs to have different output files

    1. Load and validates that the input mapfiles are correct
    2. and then starts the node script, use indexed path names for the
       communication
    3. Check if all nodes succeeded. If so return a mapfile with calibrated
       ms

    **Command line Arguments**

    1. Path to a mapfile with measurement sets to calibrate

    """
    inputs = {
        'parset': ingredient.FileField(
            '-p', '--parset',
            help = "BBS configuration parset"
        ),
        'nthreads': ingredient.IntField(
            '--nthreads',
            default = 8,
            help = "Number of threads per process"
        ),
        'bbs_executable': ingredient.StringField(
            '--bbs-executable',
            help = "BBS standalone executable (bbs-reducer)"
        ),
        'instrument_mapfile': ingredient.FileField(
            '--instrument-mapfile',
            help = "Full path to the mapfile containing the names of the "
                 "instrument model files generated by the `parmdb` recipe"
        ),
        'sourcedb_mapfile': ingredient.FileField(
            '--sourcedb-mapfile',
            help = "Full path to the mapfile containing the names of the "
                 "sourcedbs generated by the `sourcedb` recipe"
        ),
        'id': ingredient.IntField(
            '--id',
            default = 0,
            help = "Optional integer id for distinguishing multiple runs"
        ),
        'mapfile': ingredient.StringField(
            '--mapfile',
            help = "Full path to the file containing the output data products"
        ),
    }

    outputs = {
        'mapfile': ingredient.FileField(
            help = "Full path to a mapfile describing the processed data"
        )
    }

    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(imager_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data

        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])

        # TODO: DataMap extention
#        #Check if the input has equal length and on the same nodes
#        if not validate_data_maps(ms_map, parmdb_map):
#            self.logger.error("The combination of mapfiles failed validation:")
#            self.logger.error("ms_map: \n{0}".format(ms_map))
#            self.logger.error("parmdb_map: \n{0}".format(parmdb_map))
#            return 1

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        map_dir = os.path.join(
                        self.config.get("layout", "job_directory"), "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y in zip(ms_map, parmdb_map, sourcedb_map):
            w.skip = x.skip = y.skip = (
                w.skip or x.skip or y.skip
            )

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            DataMap.SkipIterator
        for (idx, (ms, parmdb, sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)):
            # host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(
                    map_dir, "%s-%s_map_%s.map" % (host, idx, run_id))
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                    map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id))
            MultiDataMap(
                [tuple([host, parmdb.file, False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(
                    map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id))
            MultiDataMap(
                [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path)

            arguments = [self.inputs['bbs_executable'],
                         self.inputs['parset'],
                         ms_list_path, parmdb_list_path, sourcedb_list_path]
            jobs.append(ComputeJob(host, node_command, arguments,
                    resources = {
                        "cores": self.inputs['nthreads']
                    }))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():    # if one of the nodes failed
            self.logger.error("One of the nodes failed while performing"
                              "a BBS run. Aborting: concat.ms corruption")
            return 1

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #9
0
class imager_source_finding(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Master side of imager_source_finder. Collects arguments from command line
    and pipeline inputs. (for the implementation details see node):
    
    1. load mapfiles with input images and collect some parameters from
       The input ingredients.
    2. Call the node recipe.
    3. Validate performance of the node recipe and construct output value.   
    
    **CommandLine Arguments**
    
    A mapfile containing (node, image_path) pairs. The image to look for sources
    in.    
    """
    inputs = {
        'bdsm_parset_file_run1': ingredient.FileField(
            '--bdsm-parset-file-run1',
            help="Path to bdsm parameter set for the first sourcefinding run"
        ),
        'bdsm_parset_file_run2x': ingredient.FileField(
            '--bdsm-parset-file-run2x',
            help="Path to bdsm parameter set for the second and later" \
                   " sourcefinding runs"
        ),
        'catalog_output_path': ingredient.StringField(
            '--catalog-output-path',
            help="Path to write the catalog created by bdsm)"
        ),
        'mapfile': ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile; containing the succesfull generated"
            "source list"
        ),
        'working_directory': ingredient.StringField(
            '--working-directory',
            help="Working directory used by the nodes: local data"
        ),
        'sourcedb_target_path': ingredient.StringField(
            '--sourcedb-target-path',
            help="Target path for the sourcedb created based on the"
                 " found sources"
        ),
        'makesourcedb_path': ingredient.ExecField(
             '--makesourcedb-path',
             help="Path to makesourcedb executable."
        ),
        'sourcedb_map_path': ingredient.StringField(
            '--sourcedb-map-path',
            help="Full path of mapfile; containing the succesfull generated"
            "sourcedbs"
        ),

    }

    outputs = {
        'mapfile':
        ingredient.StringField(
            help="Full path of mapfile; containing the succesfull generated"),
        'sourcedb_map_path':
        ingredient.StringField(
            help="Full path of mapfile; containing the succesfull generated"
            "sourcedbs")
    }

    def go(self):
        """
        """
        super(imager_source_finding, self).go()
        self.logger.info("Starting imager_source_finding run")
        # ********************************************************************
        # 1. load mapfiles with input images and collect some parameters from
        # The input ingredients
        input_map = DataMap.load(self.inputs['args'][0])
        catalog_output_path = self.inputs["catalog_output_path"]

        # ********************************************************************
        # 2. Start the node script
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        jobs = []
        input_map.iterator = DataMap.SkipIterator
        for idx, item in enumerate(input_map):
            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'],
                                       "imager_source_finding_{0}".format(idx))

            arguments = [
                item.file, self.inputs["bdsm_parset_file_run1"],
                self.inputs["bdsm_parset_file_run2x"],
                "%s-%s" % (catalog_output_path, idx),
                os.path.join(self.inputs["working_directory"],
                             "bdsm_output-%s.img" % (idx, )),
                "%s-%s" % (self.inputs['sourcedb_target_path'], idx),
                self.environment, working_dir, self.inputs['makesourcedb_path']
            ]

            jobs.append(ComputeJob(item.host, node_command, arguments))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # ********************************************************************
        # 3. Test for errors and return output
        if self.error.isSet():
            self.logger.warn("Failed imager_source_finding run detected")

        # Collect the nodes that succeeded
        source_dbs_from_nodes = copy.deepcopy(input_map)
        catalog_output_path_from_nodes = copy.deepcopy(input_map)
        source_dbs_from_nodes.iterator = \
            catalog_output_path_from_nodes.iterator = DataMap.SkipIterator

        for job, sourcedb_item, catalog_item in zip(
                jobs, source_dbs_from_nodes, catalog_output_path_from_nodes):

            if "source_db" in job.results:
                succesfull_job = True
                sourcedb_item.file = job.results["source_db"]
                catalog_item.file = job.results["catalog_output_path"]
            else:
                sourcedb_item.file = "failed"
                sourcedb_item.skip = True
                catalog_item.file = "failed"
                catalog_item.skip = True
                # We now also have catalog path

        # Abort if none of the recipes succeeded
        if not succesfull_job:
            self.logger.error("None of the source finding recipes succeeded")
            self.logger.error("Exiting with a failure status")
            return 1

        self._store_data_map(self.inputs['mapfile'],
                             catalog_output_path_from_nodes,
                             "datamap with created sourcelists")
        self._store_data_map(self.inputs['sourcedb_map_path'],
                             source_dbs_from_nodes,
                             " datamap with created sourcedbs")

        self.outputs["mapfile"] = self.inputs['mapfile']
        self.outputs["sourcedb_map_path"] = self.inputs['sourcedb_map_path']

        return 0
Exemple #10
0
class executable_args(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Basic script for running an executable with arguments.
    Passing a mapfile along so the executable can process MS.
    """
    inputs = {
        'executable': ingredient.ExecField(
            '--executable',
            help="The full path to the relevant executable",
            optional=True
        ),
        'arguments': ingredient.ListField(
            '-a', '--arguments',
            help="List of arguments for the executable. Will be added as ./exe arg0 arg1...",
            default='',
            optional=True
        ),
        'nthreads': ingredient.IntField(
            '--nthreads',
            default=8,
            help="Number of threads per process"
        ),
        'nodescript': ingredient.StringField(
            '--nodescript',
            help="Name of the node script to execute",
            default='executable_args',
            optional=True
        ),
        'parset': ingredient.FileField(
            '-p', '--parset',
            help="Path to the arguments for this executable. Will be converted to --key=value",
            optional=True
        ),
        'inputkey': ingredient.StringField(
            '-i', '--inputkey',
            help="Parset key that the executable will recognize as key for inputfile",
            default='',
            optional=True
        ),
        'outputkey': ingredient.StringField(
            '-0', '--outputkey',
            help="Parset key that the executable will recognize as key for outputfile",
            default='',
            optional=True
        ),
        'inputkeys': ingredient.ListField(
            '--inputkeys',
            help="List of parset keys that the executable will recognize as key for inputfile",
            default=[],
            optional=True
        ),
        'outputkeys': ingredient.ListField(
            '--outputkeys',
            help="List of parset keys that the executable will recognize as key for outputfile",
            default=[],
            optional=True
        ),
        'mapfiles_in': ingredient.ListField(
            '--mapfiles-in',
            help="List of the input mapfiles containing the names of the "
                 "data to run the recipe on",
            default=[],
            optional=True
        ),
        'mapfiles_as_string': ingredient.ListField(
            '--mapfiles_as_string',
            help="List of the input mapfiles to ignore and just use the name string instead.",
            default=[],
            optional=True
        ),
        'mapfiles_out': ingredient.ListField(
            '--mapfiles-out',
            help="List of the output mapfiles containing the names of the "
                 "data produced by the recipe",
            default=[],
            optional=True
        ),
        'mapfile_in': ingredient.StringField(
            '--mapfile-in',
            help="Name of the input mapfile containing the names of the "
                 "MS-files to run the recipe",
            default='',
            optional=True
        ),
        'mapfile_out': ingredient.StringField(
            '--mapfile-out',
            help="Name of the output mapfile containing the names of the "
                 "MS-files produced by the recipe",
            default='',
            optional=True
        ),
        'skip_infile': ingredient.BoolField(
            '--skip-infile',
            help="Dont give the input file to the executable.",
            default=False,
            optional=True
        ),
        'skip_outfile': ingredient.BoolField(
            '--skip-outfile',
            help="Dont produce an output file",
            default=False,
            optional=True
        ),
        'inplace': ingredient.BoolField(
            '--inplace',
            help="Manipulate input files inplace",
            default=False,
            optional=True
        ),
        'outputsuffixes': ingredient.ListField(
            '--outputsuffixes',
            help="Suffixes for the outputfiles",
            default=[]
        ),
        'parsetasfile': ingredient.BoolField(
            '--parsetasfile',
            help="Will the argument be a parsetfile or --opt=var",
            default=False
        ),
        'args_format': ingredient.StringField(
            '--args_format',
            help="Will change the format of the arguments. Standard definitions are...dont know yet",
            default='gnu'
        ),
        'args_format_argument': ingredient.StringField(
            '--args_format_argument',
            help="Will change the format of the arguments without option fields.",
            default=''
        ),
        'args_format_option': ingredient.StringField(
            '--args_format_option',
            help="Will change the format of option fields.",
            default='-'
        ),
        'args_format_longoption': ingredient.StringField(
            '--args_format_longoption',
            help="Will change the format of long option fields. Typically '--'",
            default='--'
        ),
        'args_format_option_argument': ingredient.StringField(
            '--args_format_option_argument',
            help="Will change the format of the arguments without option fields.",
            default='='
        ),
        'max_per_node': ingredient.IntField(
            '--max_per_node',
            help="Sets the number of jobs per node",
            default=0
        ),
        'stepname': ingredient.StringField(
            '--stepname',
            help="stepname for individual naming of results",
            optional=True
        ),
        'environment': ingredient.DictField(
            '--environment',
            help="Update environment variables for this step.",
            optional=True
        ),
        'error_tolerance': ingredient.BoolField(
            '--error_tolerance',
            help="Controls if the program exits on the first error or continues with succeeded MS.",
            default=True,
            optional=True
        )
    }

    outputs = {
        'mapfile': ingredient.FileField(
            help="The full path to a mapfile describing the processed data"
        )
    }

    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
Exemple #11
0
class selfcal_bbs(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Imager_bbs master performs a bbs run based on the supplied parset it is a
    shallow wrapper around bbs. Additional functionality compared to the default
    bbs recipe is the capability to add an id that allows multiple
    runs to have different output files
    
    1. Load and validates that the input mapfiles are correct 
    2. and then starts the node script, use indexed path names for the 
       communication
    3. Check if all nodes succeeded. If so return a mapfile with calibrated
       ms
       
    **Command line Arguments**
    
    1. Path to a mapfile with measurement sets to calibrate
    
    """
    inputs = {
        'parset':
        ingredient.FileField('-p', '--parset',
                             help="BBS configuration parset"),
        'bbs_executable':
        ingredient.StringField('--bbs-executable',
                               help="BBS standalone executable (bbs-reducer)"),
        'instrument_mapfile':
        ingredient.FileField(
            '--instrument-mapfile',
            help="Full path to the mapfile containing the names of the "
            "instrument model files generated by the `parmdb` recipe"),
        'sourcedb_mapfile':
        ingredient.FileField(
            '--sourcedb-mapfile',
            help="Full path to the mapfile containing the names of the "
            "sourcedbs generated by the `sourcedb` recipe"),
        'id':
        ingredient.IntField(
            '--id',
            default=0,
            help="Optional integer id for distinguishing multiple runs"),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path to the file containing the output data products"),
        'concat_ms_map_path':
        ingredient.FileField('--concat-ms-map-path',
                             help="Output of the concat MS file"),
        'major_cycle':
        ingredient.IntField('--major_cycle',
                            help="ID for the current major cycle")
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="Full path to a mapfile describing the processed data")
    }

    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(selfcal_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data
        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])
        concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path'])

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        map_dir = os.path.join(self.config.get("layout", "job_directory"),
                               "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map)

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            concat_ms_map.iterator = DataMap.SkipIterator

        # *********************************************************************
        for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map,
                                                     sourcedb_map,
                                                     concat_ms_map):
            #host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(map_dir,
                                        host + "_ms_" + run_id + ".map")
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                map_dir, host + "_parmdb_" + run_id + ".map")
            MultiDataMap([tuple([host, parmdb.file,
                                 False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(map_dir,
                                              host + "_sky_" + run_id + ".map")
            MultiDataMap([tuple([host, [sourcedb.file],
                                 False])]).save(sourcedb_list_path)

            # THe concat ms does not have to be written: It already is a
            # singular item (it is the output of the reduce step)
            # redmine issue #6021
            arguments = [
                self.inputs['bbs_executable'], self.inputs['parset'],
                ms_list_path, parmdb_list_path, sourcedb_list_path,
                concat_ms.file, self.inputs['major_cycle']
            ]
            jobs.append(ComputeJob(host, node_command, arguments))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed bbs node run detected, skipping work"
                             "on this work item for further computations")

        # find failed job and set the skip field
        for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs):
            if job.results["returncode"] == 0:
                continue
            else:
                ms_item.skip = True
                concat_item.skip = True
                self.logger.warn("bbs failed on item: {0}".format(
                    ms_item.file))

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        # also save the concat_ms map with possible skips
        DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #12
0
class bbs(BaseRecipe):
    """
    The bbs recipe coordinates running BBS on a group of MeasurementSets. It
    runs both GlobalControl and KernelControl; as yet, SolverControl has not
    been integrated.

    The recipe will also run the sourcedb and parmdb recipes on each of the
    input MeasuementSets.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'control_exec':
        ingredient.ExecField('--control-exec',
                             dest="control_exec",
                             help="BBS Control executable"),
        'kernel_exec':
        ingredient.ExecField('--kernel-exec',
                             dest="kernel_exec",
                             help="BBS Kernel executable"),
        'initscript':
        ingredient.FileField('--initscript',
                             dest="initscript",
                             help="Initscript to source (ie, lofarinit.sh)"),
        'parset':
        ingredient.FileField('-p',
                             '--parset',
                             dest="parset",
                             help="BBS configuration parset"),
        'key':
        ingredient.StringField('--key',
                               dest="key",
                               help="Key to identify BBS session"),
        'db_host':
        ingredient.StringField('--db-host',
                               dest="db_host",
                               help="Database host with optional port"),
        'db_user':
        ingredient.StringField('--db-user',
                               dest="db_user",
                               help="Database user"),
        'db_name':
        ingredient.StringField('--db-name',
                               dest="db_name",
                               help="Database name"),
        'makevds':
        ingredient.ExecField('--makevds', help="makevds executable"),
        'combinevds':
        ingredient.ExecField('--combinevds', help="combinevds executable"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'makesourcedb':
        ingredient.ExecField('--makesourcedb', help="makesourcedb executable"),
        'parmdbm':
        ingredient.ExecField('--parmdbm', help="parmdbm executable"),
        'skymodel':
        ingredient.FileField('-s',
                             '--skymodel',
                             dest="skymodel",
                             help="Input sky catalogue")
    }

    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory")
        inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(self.config.get("layout", "job_directory"),
                                "vds", "bbs.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file, ))

        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug("Cleaning BBS database for key %s" %
                              (self.inputs["key"]))
            with closing(
                    psycopg2.connect(
                        host=self.inputs["db_host"],
                        user=self.inputs["db_user"],
                        database=self.inputs["db_name"])) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"], ))

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process([
                self.inputs['combinevds'],
                vds_file,
            ] + vds_files, self.logger)
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr,
                               self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #                'BBDB.Port': self.inputs['db_name'],
                })
            self.logger.debug("BBS control parset is %s" % (bbs_parset, ))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(target=self._run_bbs_control,
                                               args=(bbs_parset, run_flag))
                bbs_control.start()
                run_flag.wait()  # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace(
                    'master', 'nodes'))
                env = {
                    "LOFARROOT":
                    utilities.read_initscript(
                        self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH":
                    self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH":
                    self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool,
                                self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" %
                                      (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host,
                            command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'], file,
                                self.inputs['key'], self.inputs['db_name'],
                                self.inputs['db_user'], self.inputs['db_host']
                            ])
                        bbs_kernels.append(
                            threading.Thread(target=self._run_bbs_kernel,
                                             args=(host, command, env, job_id,
                                                   jobhost, str(jobport))))
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]

                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0

    def _run_bbs_kernel(self, host, command, env, *arguments):
        """
        Run command with arguments on the specified host using ssh. Return its
        return code.

        The resultant process is monitored for failure; see
        _monitor_process() for details.
        """
        try:
            bbs_kernel_process = run_remote_command(self.config,
                                                    self.logger,
                                                    host,
                                                    command,
                                                    env,
                                                    arguments=arguments)
        except Exception, e:
            self.logger.exception("BBS Kernel failed to start")
            self.killswitch.set()
            return 1
        result = self._monitor_process(bbs_kernel_process,
                                       "BBS Kernel on %s" % host)
        sout, serr = bbs_kernel_process.communicate()
        serr = serr.replace("Connection to %s closed.\r\n" % host, "")
        log_process_output("SSH session (BBS kernel)", sout, serr, self.logger)
        return result
Exemple #13
0
class datamapper(BaseRecipe):
    """
    Parses a list of filenames and attempts to map them to appropriate compute
    nodes (ie, which can access the files) on the LOFAR CEP cluster. Mapping
    by filename in this way is fragile, but is the best we can do for now.

    **Arguments**

    None.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help=
            "Full path (including filename) of mapfile to produce (clobbered if exists)"
        )
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="Full path (including filename) of generated mapfile")
    }

    def go(self):
        self.logger.info("Starting datamapper run")
        super(datamapper, self).go()

        #      We build lists of compute-nodes per cluster and data-per-cluster,
        #          then match them up to schedule jobs in a round-robin fashion.
        # ----------------------------------------------------------------------
        clusterdesc = ClusterDesc(self.config.get('cluster', "clusterdesc"))
        if clusterdesc.subclusters:
            available_nodes = dict((cl.name, cycle(get_compute_nodes(cl)))
                                   for cl in clusterdesc.subclusters)
        else:
            available_nodes = {
                clusterdesc.name: cycle(get_compute_nodes(clusterdesc))
            }

        data = defaultdict(list)
        for filename in self.inputs['args']:
            subcluster = filename.split(os.path.sep)[2]
            try:
                host = next(available_nodes[subcluster])
            except KeyError as key:
                self.logger.error("%s is not a known cluster" % str(key))
                raise

            data[host].append(filename)

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.items():
            parset.addStringVector(host, filenames)

        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
Exemple #14
0
class vdsreader(BaseRecipe):
    """
    Read a GVDS file and return a list of the MS filenames referenced therein
    together with selected metadata.
    
    This recipe performs it's functionality at the master side of the recipe:
    
    1. Open the gvds file as a parameterset
    2. Convert all part FileNames to mss
    3. Parse start and end time and pointing information

    **no command line arguments:**

    """
    inputs = {
        'gvds': ingredient.FileField(
            '-g', '--gvds',
            help="GVDS file to process"
        )
    }

    outputs = {
        'data': ingredient.ListField(help="List of MeasurementSet paths"),
        'start_time': ingredient.StringField(help="Start time of observation"),
        'end_time': ingredient.StringField(help="End time of observation"),
        'pointing': ingredient.DictField(help="Observation pointing direction")
    }

    def go(self):
        self.logger.info("Starting vdsreader run")
        super(vdsreader, self).go()

        # *********************************************************************
        # 1. Open the gvds file as a parameterset
        try:
            gvds = parameterset(self.inputs['gvds'])
        except:
            self.logger.error("Unable to read G(V)DS file")
            raise

        self.logger.info("Building list of measurementsets")

        # **********************************************************************
        # 2. convert al partx.FileName values to ms
        ms_names = [
            gvds.getString("Part%d.FileName" % (part_no,))
            for part_no in range(gvds.getInt("NParts"))
        ]
        self.logger.debug(ms_names)

        self.outputs['data'] = ms_names

        # **********************************************************************\
        # 3. parse start and end time and pointing information
        try:
            self.outputs['start_time'] = gvds.getString('StartTime')
            self.outputs['end_time'] = gvds.getString('EndTime')
        except:
            self.logger.warn("Failed to read start/end time from GVDS file")
        try:
            self.outputs['pointing'] = {
                'type': gvds.getStringVector('Extra.FieldDirectionType')[0],
                'dec': gvds.getStringVector('Extra.FieldDirectionDec')[0],
                'ra': gvds.getStringVector('Extra.FieldDirectionRa')[0]
            }
        except:
            self.logger.warn("Failed to read pointing information from GVDS file")
        return 0
Exemple #15
0
class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Prepare phase master:

    1. Validate input
    2. Create mapfiles with input for work to be perform on the individual nodes
       based on the structured input mapfile. The input mapfile contains a list 
       of measurement sets. 
       Each node computes a single subband group but needs this for all
       timeslices. 
    3. Call the node scripts with correct input
    4. validate performance
       Only output the measurement nodes that finished succesfull

    **Command Line arguments:**

    The only command line argument is the a to a mapfile containing "all"
    the measurement sets needed for creating the sky images. First ordered on 
    timeslice then on subband group and finaly on index in the frequency
    range.

    **Arguments:**
    """

    inputs = {
        'ndppp_exec':
        ingredient.ExecField('--ndppp-exec',
                             help="The full path to the ndppp executable"),
        'parset':
        ingredient.FileField('-p',
                             '--parset',
                             help="The full path to a prepare parset"),
        'working_directory':
        ingredient.StringField(
            '-w',
            '--working-directory',
            help="Working directory used by the nodes: local data"),
        'nthreads':
        ingredient.IntField('--nthreads',
                            default=8,
                            help="Number of threads per process"),
        'target_mapfile':
        ingredient.StringField(
            '--target-mapfile',
            help="Contains the node and path to target files, defines"
            " the number of nodes the script will start on."),
        'slices_per_image':
        ingredient.IntField(
            '--slices-per-image',
            help="The number of (time) slices for each output image"),
        'subbands_per_image':
        ingredient.IntField(
            '--subbands-per-image',
            help="The number of subbands to be collected in each output image"
        ),
        'asciistat_executable':
        ingredient.ExecField('--asciistat-executable',
                             help="full path to the ascii stat executable"),
        'statplot_executable':
        ingredient.ExecField('--statplot-executable',
                             help="The full path to the statplot executable"),
        'msselect_executable':
        ingredient.ExecField('--msselect-executable',
                             help="The full path to the msselect executable "),
        'rficonsole_executable':
        ingredient.ExecField(
            '--rficonsole-executable',
            help="The full path to the rficonsole executable "),
        'do_rficonsole':
        ingredient.BoolField(
            '--do_rficonsole',
            default=True,
            help="toggle the rficonsole step in preprocessing (default True)"),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile; contains a list of the "
            "successfully generated and concatenated sub-band groups"),
        'slices_mapfile':
        ingredient.StringField(
            '--slices-mapfile',
            help="Path to mapfile containing the produced subband groups"),
        'ms_per_image_mapfile':
        ingredient.StringField(
            '--ms-per-image-mapfile',
            help="Path to mapfile containing the ms for each produced"
            "image"),
        'processed_ms_dir':
        ingredient.StringField(
            '--processed-ms-dir',
            help="Path to directory for processed measurment sets"),
        'add_beam_tables':
        ingredient.BoolField('--add_beam_tables',
                             default=False,
                             help="Developer option, adds beamtables to ms")
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="path to a mapfile Which contains a list of the"
            "successfully generated and concatenated measurement set"),
        'slices_mapfile':
        ingredient.FileField(
            help="Path to mapfile containing the produced subband groups"),
        'ms_per_image_mapfile':
        ingredient.FileField(
            help="Path to mapfile containing the used ms for each produced"
            "image")
    }

    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        job_directory = self.config.get("layout", "job_directory")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)  # needed for subsets in sb list

        globalfs = self.config.has_option(
            "remote", "globalfs") and self.config.getboolean(
                "remote", "globalfs")

        for idx_sb_group, item in enumerate(output_map):
            #create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}.map".format(idx_sb_group))

            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            # skip the current step if skip is set, cannot use skip due to
            # the enumerate: dependency on the index in the map
            if item.skip == True:
                # assure that the mapfile is correct
                paths_to_image_mapfiles.append(tuple([item.host, [], True]))
                continue

            #save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(
                self.inputs['working_directory'],
                "imager_prepare_{0}".format(idx_sb_group))

            arguments = [
                self.environment, self.inputs['parset'], working_dir,
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['do_rficonsole'], self.inputs['add_beam_tables'],
                globalfs
            ]

            jobs.append(
                ComputeJob(item.host,
                           node_command,
                           arguments,
                           resources={"cores": self.inputs['nthreads']}))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        #scan the return dict for completed key
        # loop over the potential jobs including the skipped
        # If we have a skipped item, add the item to the slices with skip set
        jobs_idx = 0
        for item in concat_ms:
            # If this is an item that is skipped via the skip parameter in
            # the parset, append a skipped
            if item.skip:
                slices.append(tuple([item.host, [], True]))
                continue

            # we cannot use the skip iterator so we need to manually get the
            # current job from the list
            job = jobs[jobs_idx]

            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, [], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

            # we have a non skipped workitem, increase the job idx
            jobs_idx += 1

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        #map with actual input mss.
        self._store_data_map(self.inputs["ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (used) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['ms_per_image_mapfile'] = \
            self.inputs["ms_per_image_mapfile"]
        return 0

    def _create_input_map_for_sbgroup(self, slices_per_image, n_subband_groups,
                                      subbands_per_image, idx_sb_group,
                                      input_mapfile):
        """
        Creates an input mapfile:
        This is a subset of the complete input_mapfile based on the subband 
        details suplied: The input_mapfile is structured: First all subbands for
        a complete timeslice and the the next timeslice. The result value 
        contains all the information needed for a single subbandgroup to be
        computed on a single compute node
        """
        inputs_for_image = []
        # collect the inputs: first step over the time slices
        for idx_slice in range(slices_per_image):
            # calculate the first line for current time slice and subband group
            line_idx_start = idx_slice * \
                (n_subband_groups * subbands_per_image) + \
                (idx_sb_group * subbands_per_image)
            line_idx_end = line_idx_start + subbands_per_image

            #extend inputs with the files for the current time slice
            inputs_for_image.extend(input_mapfile[line_idx_start:line_idx_end])

        return DataMap(inputs_for_image)

    def _validate_input_map(self, input_map, output_map, slices_per_image,
                            subbands_per_image):
        """
        Return False if the inputs supplied are incorrect:
        the number if inputs and  output does not match. 
        Return True if correct.              
        The number of inputs is correct iff.
        len(input_map) == 
        len(output_map) * slices_per_image * subbands_per_image
        """
        # The output_map contains a number of path/node pairs. The final data
        # dataproduct of the prepare phase: The 'input' for each of these pairs
        # is a number of measurement sets: The number of time slices times
        # the number of subbands collected into each of these time slices.
        # The total length of the input map should match this.
        if len(input_map) != len(output_map) * \
                                   (slices_per_image * subbands_per_image):
            self.logger.error(
                "Incorrect number of input ms for supplied parameters:\n\t"
                "len(input_map) = {0}\n\t"
                "len(output_map) * slices_per_image * subbands_per_image = "
                "{1} * {2} * {3} = {4}".format(
                    len(input_map), len(output_map), slices_per_image,
                    subbands_per_image,
                    len(output_map) * slices_per_image * subbands_per_image))
            return False

        return True
Exemple #16
0
class new_bbs(BaseRecipe):
    """
    **This bbs recipe still uses the oldstyle bbs with global control**
    **New versions will have stand alone capability**

    The bbs recipe coordinates running BBS on a group of MeasurementSets. It
    runs both GlobalControl and KernelControl; as yet, SolverControl has not
    been integrated.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'control_exec':
        ingredient.ExecField('--control-exec',
                             dest="control_exec",
                             help="BBS Control executable"),
        'kernel_exec':
        ingredient.ExecField('--kernel-exec',
                             dest="kernel_exec",
                             help="BBS Kernel executable"),
        'parset':
        ingredient.FileField('-p',
                             '--parset',
                             dest="parset",
                             help="BBS configuration parset"),
        'db_key':
        ingredient.StringField('--db-key',
                               dest="db_key",
                               help="Key to identify BBS session"),
        'db_host':
        ingredient.StringField(
            '--db-host',
            dest="db_host",
            help="Database host with optional port (e.g. ldb001:5432)"),
        'db_user':
        ingredient.StringField('--db-user',
                               dest="db_user",
                               help="Database user"),
        'db_name':
        ingredient.StringField('--db-name',
                               dest="db_name",
                               help="Database name"),
        'instrument_mapfile':
        ingredient.FileField(
            '--instrument-mapfile',
            help="Full path to the mapfile containing the names of the "
            "instrument model files generated by the `parmdb` recipe"),
        'sky_mapfile':
        ingredient.FileField(
            '--sky-mapfile',
            help="Full path to the mapfile containing the names of the "
            "sky model files generated by the `sourcedb` recipe"),
        'data_mapfile':
        ingredient.StringField(
            '--data-mapfile',
            help="Full path to the mapfile containing the names of the "
            "data files that were processed by BBS (clobbered if exists)"),
        'gvds':
        ingredient.StringField('-g',
                               '--gvds',
                               help="Path for output GVDS file")
    }
    outputs = {
        'mapfile':
        ingredient.FileField(
            help="Full path to a mapfile describing the processed data")
    }

    def __init__(self):
        super(new_bbs, self).__init__()
        self.bbs_map = list()
        self.parset = parameterset()
        self.killswitch = threading.Event()

    def _set_input(self, in_key, ps_key):
        """
        Set the input-key `in_key` to the value of `ps_key` in the parset, if
        that is defined.
        """
        try:
            self.inputs[in_key] = self.parset.getString(ps_key)
        except RuntimeError as exceptionobject:
            self.logger.warn(str(exceptionobject))

    def _make_bbs_map(self):
        """
        This method bundles the contents of three different map-files.
        All three map-files contain a list of tuples of hostname and filename.
        The contents of these files are related by index in the list. They
        form triplets of MS-file, its associated instrument model and its
        associated sky model.

        The data structure `self.bbs_map` is a list of tuples, where each
        tuple is a pair of hostname and the aforementioned triplet.

        For example:
        bbs_map[0] = ('locus001',
            ('/data/L29697/L29697_SAP000_SB000_uv.MS',
            '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.instrument',
            '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.sky')
        )

        Returns `False` if validation of the three map-files fails, otherwise
        returns `True`.
        """
        self.logger.debug(
            "Creating BBS map-file using: %s, %s, %s" %
            (self.inputs['args'][0], self.inputs['instrument_mapfile'],
             self.inputs['sky_mapfile']))
        data_map = load_data_map(self.inputs['args'][0])
        instrument_map = load_data_map(self.inputs['instrument_mapfile'])
        sky_map = load_data_map(self.inputs['sky_mapfile'])

        if not validate_data_maps(data_map, instrument_map, sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Store data mapfile containing list of files to be processed by BBS.
        store_data_map(self.inputs['data_mapfile'], data_map)

        self.bbs_map = [
            (dat[0], (dat[1], ins[1], sky[1]))
            for dat, ins, sky in zip(data_map, instrument_map, sky_map)
        ]

        return True

    def go(self):
        self.logger.info("Starting BBS run")
        super(new_bbs, self).go()

        #                Check for relevant input parameters in the parset-file
        # ---------------------------------------------------------------------
        self.logger.debug("Reading parset from %s" % self.inputs['parset'])
        self.parset = parameterset(self.inputs['parset'])

        self._set_input('db_host', 'BBDB.Host')
        self._set_input('db_user', 'BBDB.User')
        self._set_input('db_name', 'BBDB.Name')
        self._set_input('db_key', 'BBDB.Key')

        #self.logger.debug("self.inputs = %s" % self.inputs)

        #                                         Clean the blackboard database
        # ---------------------------------------------------------------------
        self.logger.info("Cleaning BBS database for key '%s'" %
                         (self.inputs['db_key']))
        command = [
            "psql", "-h", self.inputs['db_host'], "-U", self.inputs['db_user'],
            "-d", self.inputs['db_name'], "-c",
            "DELETE FROM blackboard.session WHERE key='%s';" %
            self.inputs['db_key']
        ]
        self.logger.debug(command)
        if subprocess.call(command) != 0:
            self.logger.warning("Failed to clean BBS database for key '%s'" %
                                self.inputs['db_key'])

        #                  Create a bbs_map describing the file mapping on disk
        # ---------------------------------------------------------------------
        if not self._make_bbs_map():
            return 1

        # Produce a GVDS file, describing the data that must be processed.
        gvds_file = self.run_task("vdsmaker",
                                  self.inputs['data_mapfile'],
                                  gvds=self.inputs['gvds'])['gvds']

        #      Construct a parset for BBS GlobalControl by patching the GVDS
        #           file and database information into the supplied template
        # ------------------------------------------------------------------
        self.logger.debug("Building parset for BBS control")
        # Create a location for parsets
        job_directory = self.config.get("layout", "job_directory")
        parset_directory = os.path.join(job_directory, "parsets")
        create_directory(parset_directory)

        # patch the parset and copy result to target location remove tempfile
        try:
            bbs_parset = utilities.patch_parset(
                self.parset,
                {
                    'Observation': gvds_file,
                    'BBDB.Key': self.inputs['db_key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #'BBDB.Port': self.inputs['db_name'],
                })
            bbs_parset_path = os.path.join(parset_directory,
                                           "bbs_control.parset")
            shutil.copyfile(bbs_parset, bbs_parset_path)
            self.logger.debug("BBS control parset is %s" % (bbs_parset_path, ))

        finally:
            # Always remove the file in the tempdir
            os.remove(bbs_parset)

        try:
            #        When one of our processes fails, we set the killswitch.
            #      Everything else will then come crashing down, rather than
            #                                         hanging about forever.
            # --------------------------------------------------------------
            self.killswitch = threading.Event()
            self.killswitch.clear()
            signal.signal(signal.SIGTERM, self.killswitch.set)

            #                           GlobalControl runs in its own thread
            # --------------------------------------------------------------
            run_flag = threading.Event()
            run_flag.clear()
            bbs_control = threading.Thread(target=self._run_bbs_control,
                                           args=(bbs_parset, run_flag))
            bbs_control.start()
            run_flag.wait()  # Wait for control to start before proceeding

            #      We run BBS KernelControl on each compute node by directly
            #                             invoking the node script using SSH
            #      Note that we use a job_server to send out job details and
            #           collect logging information, so we define a bunch of
            #    ComputeJobs. However, we need more control than the generic
            #     ComputeJob.dispatch method supplies, so we'll control them
            #                                          with our own threads.
            # --------------------------------------------------------------
            command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
            jobpool = {}
            bbs_kernels = []
            with job_server(self.logger, jobpool,
                            self.error) as (jobhost, jobport):
                self.logger.debug("Job server at %s:%d" % (jobhost, jobport))
                for job_id, details in enumerate(self.bbs_map):
                    host, files = details
                    jobpool[job_id] = ComputeJob(
                        host,
                        command,
                        arguments=[
                            self.inputs['kernel_exec'], files,
                            self.inputs['db_key'], self.inputs['db_name'],
                            self.inputs['db_user'], self.inputs['db_host']
                        ])
                    bbs_kernels.append(
                        threading.Thread(target=self._run_bbs_kernel,
                                         args=(host, command, job_id, jobhost,
                                               str(jobport))))
                self.logger.info("Starting %d threads" % len(bbs_kernels))
                for thread in bbs_kernels:
                    thread.start()
                self.logger.debug("Waiting for all kernels to complete")
                for thread in bbs_kernels:
                    thread.join()

            #         When GlobalControl finishes, our work here is done
            # ----------------------------------------------------------
            self.logger.info("Waiting for GlobalControl thread")
            bbs_control.join()
        finally:
            os.unlink(bbs_parset)

        if self.killswitch.isSet():
            #  If killswitch is set, then one of our processes failed so
            #                                   the whole run is invalid
            # ----------------------------------------------------------
            return 1

        self.outputs['mapfile'] = self.inputs['data_mapfile']
        return 0

    def _run_bbs_kernel(self, host, command, *arguments):
        """
        Run command with arguments on the specified host using ssh. Return its
        return code.

        The resultant process is monitored for failure; see
        _monitor_process() for details.
        """
        try:
            bbs_kernel_process = run_remote_command(self.config,
                                                    self.logger,
                                                    host,
                                                    command,
                                                    self.environment,
                                                    arguments=arguments)
        except OSError:
            self.logger.exception("BBS Kernel failed to start")
            self.killswitch.set()
            return 1
        result = self._monitor_process(bbs_kernel_process,
                                       "BBS Kernel on %s" % host)
        sout, serr = communicate_returning_strings(bbs_kernel_process)
        serr = serr.replace("Connection to %s closed.\r\n" % host, "")
        log_process_output("SSH session (BBS kernel)", sout, serr, self.logger)
        return result

    def _run_bbs_control(self, bbs_parset, run_flag):
        """
        Run BBS Global Control and wait for it to finish. Return its return
        code.
        """
        self.logger.info("Running BBS GlobalControl")
        working_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
        with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl",
                            os.path.basename(self.inputs['control_exec'])):
            with utilities.log_time(self.logger):
                try:
                    bbs_control_process = utilities.spawn_process(
                        [self.inputs['control_exec'], bbs_parset, "0"],
                        self.logger,
                        cwd=working_dir,
                        env=self.environment)
                    # _monitor_process() needs a convenient kill() method.
                    bbs_control_process.kill = lambda: os.kill(
                        bbs_control_process.pid, signal.SIGKILL)
                except OSError as e:
                    self.logger.error("Failed to spawn BBS Control (%s)" %
                                      str(e))
                    self.killswitch.set()
                    return 1
                finally:
                    run_flag.set()

            returncode = self._monitor_process(bbs_control_process,
                                               "BBS Control")
            sout, serr = communicate_returning_strings(bbs_control_process)
        shutil.rmtree(working_dir)
        log_process_output(self.inputs['control_exec'], sout, serr,
                           self.logger)
        return returncode

    def _monitor_process(self, process, name="Monitored process"):
        """
        Monitor a process for successful exit. If it fails, set the kill
        switch, so everything else gets killed too. If the kill switch is set,
        then kill this process off.

        Name is an optional parameter used only for identification in logs.
        """
        while True:
            try:
                returncode = process.poll()
                # Process still running
                if returncode == None:
                    time.sleep(1)

                # Process broke!
                elif returncode != 0:
                    self.logger.warn("%s returned code %d; aborting run" %
                                     (name, returncode))
                    self.killswitch.set()
                    break

                # Process exited cleanly
                else:
                    self.logger.info("%s clean shutdown" % (name))
                    break

                # Other process failed; abort
                if self.killswitch.isSet():
                    self.logger.warn("Killing %s" % (name))
                    process.kill()
                    returncode = process.wait()
                    break

            # Catch All exceptions: we need to take down all processes whatever
            # is throw
            except:
                # An exception here is likely a ctrl-c or similar. Whatever it
                # is, we bail out.
                self.killswitch.set()
        return returncode
class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    responsible for creating a number 
    of databases needed by imaging pipeline:
    
    1. Using pointing extracted from the input measurement set a database is 
       created of sources based on information in the global sky model (gsm)
       One source db is created for each image/node:
       
       a. The pointing is supplied to to GSM database resulting in a sourcelist
       b. This sourcelist is converted into a source db
       c. Possible additional sourcelist from external sources are added to this 
          source list
    2. For each of the timeslice in image a parmdb is created. Each timeslice is 
       recorded on a different time and needs its own calibration and therefore
       instrument parameters. 
    """

    inputs = {
        'working_directory': ingredient.StringField(
            '-w', '--working-directory',
            help="Working directory used on nodes. Results location"
        ),
        'sourcedb_suffix': ingredient.StringField(
            '--sourcedb-suffix',
            default=".sky",
            help="suffix for created sourcedbs"
        ),
        'monetdb_hostname': ingredient.StringField(
            '--monetdb-hostname',
            help="Hostname of monet database"
        ),
        'monetdb_port': ingredient.IntField(
            '--monetdb-port',
            help="port for monet database"
        ),
        'monetdb_name': ingredient.StringField(
            '--monetdb-name',
            help="db name of monet database"
        ),
        'monetdb_user': ingredient.StringField(
            '--monetdb-user',
            help="user on the monet database"
        ),
        'monetdb_password': ingredient.StringField(
            '--monetdb-password',
            help="password on monet database"
        ),
        'assoc_theta': ingredient.StringField(
            '--assoc-theta',
            default="",
            help="assoc_theta is used in creating the skymodel, default == None"
        ),
        'parmdb_executable': ingredient.ExecField(
            '--parmdbm-executable',
            help="Location of the parmdb executable"
        ),
        'slice_paths_mapfile': ingredient.FileField(
            '--slice-paths-mapfile',
            help="Location of the mapfile containing the slice paths"
        ),
        'parmdb_suffix': ingredient.StringField(
            '--parmdb-suffix',
            help="suffix of the to be created paramdbs"
        ),
        'makesourcedb_path': ingredient.ExecField(
             '--makesourcedb-path',
             help="Path to makesourcedb executable."
        ),
        'source_list_path': ingredient.StringField(
             '--source-list-path',
             help="Path to sourcelist from external source (eg. bdsm) "\
             "use an empty string for gsm generated data"
        ),
        'parmdbs_map_path': ingredient.StringField(
            '--parmdbs-map-path',
            help="path to mapfile containing produced parmdb files"
        ),
        'sourcedb_map_path': ingredient.StringField(
            '--sourcedb-map-path',
            help="path to mapfile containing produced sourcedb files"
        ),
    }

    outputs = {
        'sourcedb_map_path':
        ingredient.FileField(
            help="On succes contains path to mapfile containing produced "
            "sourcedb files"),
        'parmdbs_map_path':
        ingredient.FileField(
            help="On succes contains path to mapfile containing produced"
            "parmdb files")
    }

    def __init__(self):
        super(imager_create_dbs, self).__init__()

    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.error(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(input_map,
                                                     slice_paths_map,
                                                     assoc_theta)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                                slice_paths_map)

    def _validate_input_data(self, slice_paths_map, input_map):
        """
        Performs a validation of the supplied slice_paths_map and inputmap.
        Displays error message if this fails
        """
        validation_failed = None
        error_received = None
        try:
            validation_failed = not validate_data_maps(slice_paths_map,
                                                       input_map)
        except AssertionError, exception:
            validation_failed = True
            error_received = str(exception)

        if validation_failed:
            self.logger.error(error_received)
            self.logger.error("Incorrect mapfiles: {0} and {1}".format(
                self.inputs["slice_paths_mapfile"], self.inputs['args'][0]))
            self.logger.error("content input_map: \n{0}".format(input_map))
            self.logger.error(
                "content slice_paths_map: \n{0}".format(slice_paths_map))
            # return with failure
            return 1

        # return with zero (all is ok state)
        return 0
Exemple #18
0
class gainoutliercorrection(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Recipe to correct outliers in the gain solutions of an parmdb,
    using the program `parmexportcal`   
    The main purpose of this program is to strip off the time axis information
    from a instrument model (a.k.a ParmDB)
    -or-
    a minimal implementation of the edit_parmdb program. Search all gains for
    outliers and swap these for the median

    1. Validate input
    2. load mapfiles, validate if a target output location is provided
    3. Call node side of the recipe
    4. validate performance, return corrected files

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. A mapfile with target location <mapfiles are validated if present>
    
    """
    inputs = {
        'executable':
        ingredient.StringField(
            '--executable',
            default="",
            help="Full path to the `parmexportcal` executable, not settings this"
            " results in edit_parmdb behaviour"),
        'suffix':
        ingredient.StringField(
            '--suffix',
            help="Suffix of the table name of the instrument model",
            default=".instrument"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here."),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile to produce; it will contain "
            "a list of the generated instrument-model files"),
        'sigma':
        ingredient.FloatField(
            '--sigma',
            default=1.0,
            help="Clip at sigma * median: (not used by parmexportcal"),
        'export_instrument_model':
        ingredient.FloatField(
            '--use-parmexportcal',
            default=False,
            help="Select between parmexportcal and edit parmdb")
    }

    outputs = {
        'mapfile': ingredient.FileField(help="mapfile with corrected parmdbs")
    }

    def go(self):
        super(gainoutliercorrection, self).go()
        self.logger.info("Starting gainoutliercorrection run")
        # ********************************************************************
        # 1. Validate input
        # if sigma is none use default behaviour and use executable: test if
        # It excists
        executable = self.inputs['executable']
        if executable == "":
            pass
        elif not os.access(executable, os.X_OK):
            self.logger.warn(
                "No parmexportcal excecutable is not found on the suplied"
                "path: {0}".format(self.inputs['executable']))
            self.logger.warn("Defaulting to edit_parmdb behaviour")

        # ********************************************************************
        # 2. load mapfiles, validate if a target output location is provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    (os.path.splitext(os.path.basename(item.file))[0] +
                     self.inputs['suffix']))

        # Update the skip fields of the two maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y in zip(indata, outdata):
            x.skip = y.skip = (x.skip or y.skip)

        # ********************************************************************
        # 3. Call node side of the recipe
        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp in zip(indata, outdata):
            jobs.append(
                ComputeJob(outp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, self.inputs['executable'],
                               self.environment, self.inputs['sigma'],
                               self.inputs['export_instrument_model']
                           ]))
        self._schedule_jobs(jobs)
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # ********************************************************************
        # 4. validate performance, return corrected files
        if self.error.isSet():
            self.logger.warn("Detected failed gainoutliercorrection job")
            return 1
        else:
            self.logger.debug("Writing instrument map file: %s" %
                              self.inputs['mapfile'])
            outdata.save(self.inputs['mapfile'])
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Exemple #19
0
class cep2_datamapper(BaseRecipe):
    """
    Search for a set of MS files on all the CEP-II cluster nodes and generate
    a mapfile suitable for further processing.

    **Arguments**

    observation_dir: full path to the directory to search for MS files.
    mapfile: name of the mapfile to produce.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path (including filename) of mapfile to produce "
            "(clobbered if exists)"),
        'observation_dir':
        ingredient.StringField(
            '--observation-dir',
            help="Full path to the directory to search for MS files "
            "(deprecated)",
            default=""),
        'observation_sap':
        ingredient.IntField('--observation-sap',
                            help="Sub-Array Pointing (deprecated)",
                            default=0),
        'parset':
        ingredient.StringField(
            '--parset',
            help="Full path to the parset-file provided by MAC/SAS",
            default="")
    }

    outputs = {
        'mapfile':
        ingredient.FileField(
            help="Full path (including filename) of generated mapfile")
    }

    def _read_files(self):
        """Read data file locations from parset-file"""
        self.logger.debug("Reading data file locations from parset-file: %s" %
                          self.inputs['parset'])
        parset = parameterset(self.inputs['parset'])
        dps = parset.makeSubset(parset.fullModuleName('DataProducts') + '.')
        return [
            tuple(os.path.join(location, filename).split(':'))
            for location, filename in zip(
                dps.getStringVector('Input_Correlated.locations'),
                dps.getStringVector('Input_Correlated.filenames'))
        ]

    def _search_files(self):
        """
        Search for the data-files. The value of `self.inputs['job_name']` is
        used to compose the glob search pattern. It is split into parts
        separated by '_'. The first part should (in principle) be identical to
        the MAC/SAS observation ID (e.g., L29066). The second (optional) part
        specifies the sub-array-pointing(e.g., 1); it defaults to 0.
        """
        job_name_parts = self.inputs['job_name'].split('_')
        job = job_name_parts[0]
        sap = 0
        try:
            errmsg = (
                "Job-name part indicating sub-array-pointing index is %s, "
                "defaulting to 0")
            sap = int(job_name_parts[1])
        except IndexError:
            self.logger.debug(errmsg % "missing")
        except ValueError:
            self.logger.warn(errmsg % "non-numeric")
        ms_pattern = os.path.join(
            self.inputs['observation_dir'],
            '%s_SAP%03d_SB???_uv.MS{,.dppp}' % (job, sap))
        self.logger.debug("Searching for data files: %s" % ms_pattern)
        data = findFiles(ms_pattern, '-1d')
        return list(zip(data[0], data[1]))

    def go(self):
        self.logger.info("Starting CEP-II datamapper run")
        super(cep2_datamapper, self).go()

        if self.inputs['parset']:
            datamap = self._read_files()
        elif self.inputs['observation_dir']:
            datamap = self._search_files()
        else:
            self.logger.error("Either observation_dir or parset must be given")
            return 1

        self.logger.info("Found %i datasets to process." % len(datamap))
        self.logger.debug("datamap = %s" % datamap)

        # Write datamap-file
        create_directory(os.path.dirname(self.inputs['mapfile']))
        store_data_map(self.inputs['mapfile'], datamap)
        self.logger.debug("Wrote mapfile: %s" % self.inputs['mapfile'])

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
class setupsourcedb(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Create a distributed Sky Model database (SourceDB) for a distributed
    Measurement Set (MS).

    1. Load input and output mapfiles. Validate 
    2. Check if input skymodel file exists. If not, make filename empty.
    3. Call node side of recipe
    4. Validate performance and create output

    **Command line arguments**

    1. A mapfile describing the input data to be processed. 
    2. A mapfile with target location <if provided it will be validated against
       The input data>
    """
    inputs = {
        'executable': ingredient.ExecField(
            '--executable',
            help="Full path to makesourcedb executable",
        ),
        'skymodel': ingredient.FileField(
            '-s', '--skymodel',
            help="Input sky catalogue",
            optional=True
        ),
        'type': ingredient.StringField(
            '--type',
            help="Output type (casa or blob)",
            default="casa"
        ),
        'mapfile': ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile to produce; it will contain "
                 "a list of the generated sky-model files"
        ),
        'nproc': ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8
        ),
        'suffix': ingredient.StringField(
            '--suffix',
            help="Suffix of the table name of the sky model",
            default=".sky"
        ),
        'working_directory': ingredient.StringField(
            '-w', '--working-directory',
            help="Working directory used on output nodes. "
                 "Results will be written here."
        )
    }

    outputs = {
        'mapfile': ingredient.FileField(
            help="mapfile with created sourcedb paths"
        )
    }


    def go(self):
        self.logger.info("Starting setupsourcedb run")
        super(setupsourcedb, self).go()

        # *********************************************************************
        # 1. Load input and output mapfiles. Validate

        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed"
                )
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'],
                    self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix']
                )

        # *********************************************************************
        # 2. Check if input skymodel file exists. If not, make filename empty.
        try:
            skymodel = self.inputs['skymodel']
        except KeyError:
            skymodel = ""
            self.logger.info("No skymodel specified. Using an empty one")

        # ********************************************************************
        # 3. Call node side of script
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        outdata.iterator = DataMap.SkipIterator
        jobs = []
        for outp in outdata:
            jobs.append(
                ComputeJob(
                    outp.host,
                    command,
                    arguments=[
                        self.inputs['executable'],
                        skymodel,
                        outp.file,
                        self.inputs['type']
                    ]
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # *********************************************************************
        # 4. Check job results, and create output data map file
        if self.error.isSet():
             # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        self.logger.debug("Writing sky map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #21
0
class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    responsible for creating a number
    of databases needed by imaging pipeline:

    1. Using pointing extracted from the input measurement set a database is
       created of sources based on information in the global sky model (gsm)
       One source db is created for each image/node:

       a. The pointing is supplied to to GSM database resulting in a sourcelist
       b. This sourcelist is converted into a source db
       c. Possible additional sourcelist from external sources are added to this
          source list
    2. For each of the timeslice in image a parmdb is created. Each timeslice is
       recorded on a different time and needs its own calibration and therefore
       instrument parameters.
    """

    inputs = {
        'working_directory': ingredient.StringField(
            '-w', '--working-directory',
            help = "Working directory used on nodes. Results location"
        ),
        'sourcedb_suffix': ingredient.StringField(
            '--sourcedb-suffix',
            default = ".sky",
            help = "suffix for created sourcedbs"
        ),
        'monetdb_hostname': ingredient.StringField(
            '--monetdb-hostname',
            help = "Hostname of monet database"
        ),
        'monetdb_port': ingredient.IntField(
            '--monetdb-port',
            help = "port for monet database"
        ),
        'monetdb_name': ingredient.StringField(
            '--monetdb-name',
            help = "db name of monet database"
        ),
        'monetdb_user': ingredient.StringField(
            '--monetdb-user',
            help = "user on the monet database"
        ),
        'monetdb_password': ingredient.StringField(
            '--monetdb-password',
            help = "password on monet database"
        ),
        'assoc_theta': ingredient.StringField(
            '--assoc-theta',
            default = "",
            help = "assoc_theta is used in creating the skymodel, default == None"
        ),
        'parmdb_executable': ingredient.ExecField(
            '--parmdbm-executable',
            help = "Location of the parmdb executable"
        ),
        'slice_paths_mapfile': ingredient.FileField(
            '--slice-paths-mapfile',
            help = "Location of the mapfile containing the slice paths"
        ),
        'parmdb_suffix': ingredient.StringField(
            '--parmdb-suffix',
            help = "suffix of the to be created paramdbs"
        ),
        'makesourcedb_path': ingredient.ExecField(
             '--makesourcedb-path',
             help = "Path to makesourcedb executable."
        ),
        'source_list_map_path': ingredient.StringField(
             '--source-list-map-path',
             help = "Path to sourcelist map from external source (eg. bdsm) "\
             "use an empty string for gsm generated data"
        ),
        'parmdbs_map_path': ingredient.StringField(
            '--parmdbs-map-path',
            help = "path to mapfile containing produced parmdb files"
        ),
        'sourcedb_map_path': ingredient.StringField(
            '--sourcedb-map-path',
            help = "path to mapfile containing produced sourcedb files"
        ),
        'major_cycle': ingredient.IntField(
            '--major_cycle',
            default = 0,
            help = "The number of the current cycle"
        ),
    }

    outputs = {
        'sourcedb_map_path': ingredient.FileField(
            help = "On succes contains path to mapfile containing produced "
            "sourcedb files"),
        'parmdbs_map_path': ingredient.FileField(
            help = "On succes contains path to mapfile containing produced"
            "parmdb files")
    }

    def __init__(self):
        super(imager_create_dbs, self).__init__()

    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.info(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])
        source_list_map = DataMap.load(self.inputs['source_list_map_path'])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(
                 input_map, slice_paths_map, assoc_theta,
                 source_list_map)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                    slice_paths_map)

    def _validate_input_data(self, slice_paths_map, input_map):
        """
        Performs a validation of the supplied slice_paths_map and inputmap.
        Displays error message if this fails
        """
        validation_failed = None
        error_received = None
        try:
            validation_failed = not validate_data_maps(slice_paths_map,
                                                     input_map)
        except  AssertionError as exception :
            validation_failed = True
            error_received = str(exception)

        if validation_failed:
            self.logger.error(error_received)
            self.logger.error("Incorrect mapfiles: {0} and {1}".format(
                 self.inputs["slice_paths_mapfile"], self.inputs['args'][0]))
            self.logger.error("content input_map: \n{0}".format(input_map))
            self.logger.error("content slice_paths_map: \n{0}".format(
                                                            slice_paths_map))
            # return with failure
            return 1

        # return with zero (all is ok state)
        return 0

    def _run_create_dbs_node(self, input_map, slice_paths_map,
             assoc_theta, source_list_map):
        """
        Decompose the input mapfiles into task for specific nodes and
        distribute these to the node recipes. Wait for the jobs to finish and
        return the list of created jobs.
        """
        # Compile the command to be executed on the remote machine
        node_command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        # create jobs
        jobs = []
        output_map = copy.deepcopy(input_map)

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        align_data_maps(input_map, output_map, slice_paths_map,
                        source_list_map)

        source_list_map.iterator = slice_paths_map.iterator = \
               input_map.iterator = DataMap.SkipIterator
        for idx, (input_item, slice_item, source_list_item) in enumerate(zip(
                                  input_map, slice_paths_map, source_list_map)):
            host_ms, concat_ms = input_item.host, input_item.file
            host_slice, slice_paths = slice_item.host, slice_item.file

            # Create the parameters depending on the input_map
            sourcedb_target_path = os.path.join(
                  concat_ms + self.inputs["sourcedb_suffix"])

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'], "imager_create_dbs_{0}".format(idx))

            # The actual call for the node script
            arguments = [concat_ms,
                         sourcedb_target_path,
                         self.inputs["monetdb_hostname"],
                         self.inputs["monetdb_port"],
                         self.inputs["monetdb_name"],
                         self.inputs["monetdb_user"],
                         self.inputs["monetdb_password"],
                         assoc_theta,
                         self.inputs["parmdb_executable"],
                         slice_paths,
                         self.inputs["parmdb_suffix"],
                         self.environment,
                         working_dir,
                         self.inputs["makesourcedb_path"],
                         source_list_item.file,
                         self.inputs["major_cycle"]]

            jobs.append(ComputeJob(host_ms, node_command, arguments))
        # Wait the nodes to finish
        if len(jobs) > 0:
            self._schedule_jobs(jobs)

        return jobs, output_map

    def _collect_and_assign_outputs(self, jobs, output_map, slice_paths_map):
        """
        Collect and combine the outputs of the individual create_dbs node
        recipes. Combine into output mapfiles and save these at the supplied
        path locations
        """
        # Create a container for the output parmdbs: same host and
        output_map.iterator = DataMap.TupleIterator
        parmdbs_list = []
        # loop over the raw data including the skip file (use the data member)
        for output_entry in output_map.data:
            parms_tuple = tuple([output_entry.host, [],
                                output_entry.skip])
            parmdbs_list.append(parms_tuple)

        parmdbs_map = MultiDataMap(parmdbs_list)

        output_map.iterator = parmdbs_map.iterator = DataMap.SkipIterator    # The maps are synced
        succesfull_run = False
        for (output_item, parmdbs_item, job) in zip(
                                                output_map, parmdbs_map, jobs):
            node_succeeded = "parmdbs" in job.results and \
                    "sourcedb" in job.results

            host = output_item.host

            # The current job has to be skipped (due to skip field)
            # Or if the node failed:
            if not node_succeeded:
                self.logger.warn("Warning failed selfcalCreateDBs run "
                    "detected: No sourcedb file created, {0} continue".format(
                                                            host))
                output_item.file = "failed"
                output_item.skip = True
                parmdbs_item.file = []
                parmdbs_item.skip = True

            # Else it succeeded and we can write te results
            else:
                succesfull_run = True
                output_item.file = job.results["sourcedb"]
                parmdbs_item.file = job.results["parmdbs"]

                # we also need to manually set the skip for this new
                # file list
                parmdbs_item.file_skip = [False] * len(job.results["parmdbs"])

        # Fail if none of the nodes returned all data
        if not succesfull_run:
            self.logger.error("The creation of dbs on the nodes failed:")
            self.logger.error("Not a single node produces all needed data")
            self.logger.error(
                "products. sourcedb_files: {0}".format(output_map))
            self.logger.error("parameter dbs: {0}".format(parmdbs_map))
            return 1

        # write the mapfiles
        output_map.save(self.inputs["sourcedb_map_path"])
        parmdbs_map.save(self.inputs["parmdbs_map_path"])
        self.logger.debug("Wrote sourcedb dataproducts: {0} \n {1}".format(
            self.inputs["sourcedb_map_path"], self.inputs["parmdbs_map_path"]))

        # Set the outputs
        self.outputs['sourcedb_map_path'] = self.inputs["sourcedb_map_path"]
        self.outputs['parmdbs_map_path'] = self.inputs["parmdbs_map_path"]

        return 0
Exemple #22
0
class selfcal_awimager(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Master script for the awimager. Collects arguments from command line and
    pipeline inputs.
    
    1. Load mapfiles and validate these
    2. Run the awimage node scripts
    3. Retrieve output. Construct output map file succesfull runs
    
    Details regarding the implementation of the imaging step can be found in 
    the node recipe 
    **CommandLine Arguments**
    
    A mapfile containing (node, datafile) pairs. The measurements set use as
    input for awimager executable  
 
    """
    inputs = {
        'executable': ingredient.ExecField(
            '--executable',
            help = "The full path to the  awimager executable"
        ),
        'parset': ingredient.FileField(
            '-p', '--parset',
            help = "The full path to a awimager configuration parset."
        ),
        'working_directory': ingredient.StringField(
            '-w', '--working-directory',
            help = "Working directory used on output nodes. Results location"
        ),
        'output_image': ingredient.StringField(
            '--output-image',
            help = "Path of the image to be create by the awimager"
        ),
        'mapfile': ingredient.StringField(
            '--mapfile',
            help = "Full path for output mapfile. A list of the"
                 "successfully generated images will be written here"
        ),
        'sourcedb_path': ingredient.StringField(
            '--sourcedb-path',
            help = "Full path of sourcedb used to create a mask for known sources"
        ),
        'mask_patch_size': ingredient.FloatField(
            '--mask-patch-size',
            help = "Scale factor for patches in the awimager mask"
        ),
        'autogenerate_parameters': ingredient.BoolField(
            '--autogenerate-parameters',
            default = True,
            help = "Turns on the autogeneration of: cellsize, image-size, fov."
            " MSSS 'type' functionality"
        ),
        'specify_fov': ingredient.BoolField(
            '--specify-fov',
            default = False,
            help = "calculated Image parameters are relative to fov, parameter"
            " is active when autogenerate_parameters is False"
        ),
        'fov': ingredient.FloatField(
            '--fov',
            default = 0.0,
            help = "calculated Image parameters are relative to this"
            " Field Of View in arcSec. This parameter is obligatory when"
            " specify_fov is True"
        ),
        'major_cycle': ingredient.IntField(
            '--major_cycle',
            help = "The number of the current cycle to modify the parset."
        ),
        'nr_cycles': ingredient.IntField(
            '--nr-cycles',
            help = "The number major cycles."
        ) ,
        'perform_self_cal': ingredient.BoolField(
            '--perform-self-cal',
            default=False,          
            help = "Control the usage of the self callibartion functionality"
        )
    }

    outputs = {
        'mapfile': ingredient.StringField(),
    }

    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(selfcal_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                        "the supplied input_ms mapfile and sourcedb mapfile"
                        "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)        
        align_data_maps(input_map, output_map, sourcedb_map)

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for measurement_item, source_item in zip(input_map, sourcedb_map):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host , measurement_path = measurement_item.host, measurement_item.file
            host2 , sourcedb_path = source_item.host, source_item.file

            # construct and save the output name
            arguments = [self.inputs['executable'],
                         self.environment,
                         self.inputs['parset'],
                         self.inputs['working_directory'],
                         self.inputs['output_image'],
                         measurement_path,
                         sourcedb_path,
                         self.inputs['mask_patch_size'],
                         self.inputs['autogenerate_parameters'],
                         self.inputs['specify_fov'],
                         self.inputs['fov'],
                         self.inputs['major_cycle'],
                         self.inputs['nr_cycles'],
                         self.inputs['perform_self_cal']
                         ]

            jobs.append(ComputeJob(host, node_command, arguments))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in  zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                    "None of the started awimager run finished correct")
            self.logger.error(
                    "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.error("Failed awimager node run detected. continue with"
                              "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0
Exemple #23
0
class rficonsole(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    The rficonsole recipe runs the rficonsole executable (flagger) across one
    or more MeasurementSets.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'executable':
        ingredient.ExecField('--executable',
                             default="/opt/LofIm/daily/lofar/bin/rficonsole",
                             help="Full path to rficonsole executable"),
        'strategy':
        ingredient.FileField('--strategy',
                             help="Full path to RFI strategy file",
                             optional=True),
        'indirect_read':
        ingredient.BoolField(
            '--indirect-read',
            default=False,
            help="Indirect baseline reader: re-write MS for efficiency"),
        'skip_flagged':
        ingredient.BoolField(
            '--skip-flagged',
            default=True,
            help="Ignore any MeasurementSet which has been flagged completely"
        ),
        'working_dir':
        ingredient.StringField(
            '--working-dir',
            default='/tmp',
            help=
            "Temporary rficonsole products are stored under this root on each of the remote machines. This directory should therefore be writable on each machine, but need not be shared across hosts"
        ),
        'nthreads':
        ingredient.IntField('--nthreads',
                            default=8,
                            help="Number of threads per rficonsole process"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            default=1,
            help="Maximum number of simultaneous processes per node"),
        'nmeasurementsets':
        ingredient.IntField(
            '--nmeasurementsets',
            optional=True,
            help=
            "Maximum number of MeasurementSets processed by a single rficonsole process"
        ),
    }

    def go(self):
        self.logger.info("Starting rficonsole run")
        super(rficonsole, self).go()

        #                           Load file <-> compute node mapping from disk
        # ----------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'])
        data = load_data_map(self.inputs['args'][0])

        #        Jobs being dispatched to each host are arranged in a dict. Each
        #            entry in the dict is a list of list of filnames to process.
        # ----------------------------------------------------------------------
        hostlist = defaultdict(lambda: list([[]]))
        for host, filename in data:
            if (self.inputs.has_key('nmeasurementsets') and len(
                    hostlist[host][-1]) >= self.inputs['nmeasurementsets']):
                hostlist[host].append([filename])
            else:
                hostlist[host][-1].append(filename)

        if self.inputs.has_key('strategy'):
            strategy = self.inputs['strategy']
        else:
            strategy = None

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, file_lists in hostlist.iteritems():
            for file_list in file_lists:
                jobs.append(
                    ComputeJob(
                        host,
                        command,
                        arguments=[
                            self.inputs['executable'], self.inputs['nthreads'],
                            strategy, self.inputs['indirect_read'],
                            self.inputs['skip_flagged'],
                            self.inputs['working_dir']
                        ] + file_list,
                        resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            self.logger.warn("Failed rficonsole process detected")
            return 1
        else:
            return 0
Exemple #24
0
class cimager(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Provides a convenient, pipeline-based mechanism of running the cimager on
    a dataset.

    Can ingest either an MWimager-style parset, converting to cimager format
    as required, or a cimager parset directly.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'imager_exec':
        ingredient.ExecField('--imager-exec', help="cimager executable"),
        'convert_exec':
        ingredient.ExecField('--convert-exec',
                             help="convertimagerparset executable"),
        'parset':
        ingredient.FileField(
            '--parset',
            help="Imager configuration parset (mwimager or cimager format)"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'timestep':
        ingredient.FloatField(
            '--timestep',
            help=
            "If non-zero, multiple images will be made, each using timestep seconds of data",
            default=0.0),
        'results_dir':
        ingredient.DirectoryField(
            '--results-dir',
            help="Directory in which resulting images will be placed",
        ),
        'parset_type':
        ParsetTypeField('--parset-type',
                        default="mwimager",
                        help="cimager or mwimager"),
        'makevds':
        ingredient.ExecField('--makevds',
                             help="makevds executable",
                             default="/opt/LofIm/daily/lofar/bin/makevds"),
        'combinevds':
        ingredient.ExecField('--comebinevds',
                             help="combinevds executable",
                             default="/opt/LofIm/daily/lofar/bin/combinevds")
    }

    outputs = {'images': ingredient.ListField()}

    def go(self):
        self.logger.info("Starting cimager run")
        super(cimager, self).go()
        self.outputs['images'] = []

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for cimager")
        gvds_file = os.path.join(self.config.get("layout", "job_directory"),
                                 "vds", "cimager.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = gvds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(gvds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("cimager GVDS is %s" % (gvds_file, ))

        #                            Read data for processing from the GVDS file
        # ----------------------------------------------------------------------
        parset = Parset(gvds_file)

        data = []
        for part in range(parset.getInt('NParts')):
            host = parset.getString("Part%d.FileSys" % part).split(":")[0]
            vds = parset.getString("Part%d.Name" % part)
            data.append((host, vds))

        #                                 Divide data into timesteps for imaging
        #          timesteps is a list of (start, end, results directory) tuples
        # ----------------------------------------------------------------------
        timesteps = []
        results_dir = self.inputs['results_dir']
        if self.inputs['timestep'] == 0:
            self.logger.info("No timestep specified; imaging all data")
            timesteps = [(None, None, results_dir)]
        else:
            self.logger.info("Using timestep of %s s" %
                             self.inputs['timestep'])
            gvds = get_parset(gvds_file)
            start_time = quantity(gvds['StartTime'].get()).get('s').get_value()
            end_time = quantity(gvds['EndTime'].get()).get('s').get_value()
            step = float(self.inputs['timestep'])
            while start_time < end_time:
                timesteps.append((start_time, start_time + step,
                                  os.path.join(results_dir, str(start_time))))
                start_time += step

        #                          Run each cimager process in a separate thread
        # ----------------------------------------------------------------------
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        for label, timestep in enumerate(timesteps):
            self.logger.info("Processing timestep %d" % label)
            jobs = []
            parsets = []
            start_time, end_time, resultsdir = timestep
            for host, vds in data:
                vds_data = Parset(vds)
                frequency_range = [
                    vds_data.getDoubleVector("StartFreqs")[0],
                    vds_data.getDoubleVector("EndFreqs")[-1]
                ]
                parsets.append(
                    self.__get_parset(
                        os.path.basename(
                            vds_data.getString('FileName')).split('.')[0],
                        vds_data.getString("FileName"),
                        str(frequency_range),
                        vds_data.getStringVector("Extra.FieldDirectionType")
                        [0],
                        vds_data.getStringVector("Extra.FieldDirectionRa")[0],
                        vds_data.getStringVector("Extra.FieldDirectionDec")[0],
                        'True',  # cimager bug: non-restored image unusable
                    ))
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[
                                   self.inputs['imager_exec'], vds,
                                   parsets[-1], resultsdir, start_time,
                                   end_time
                               ]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for parset in parsets:
                parset = Parset(parset)
                image_names = parset.getStringVector("Cimager.Images.Names")
                self.outputs['images'].extend(image_names)
            [os.unlink(parset) for parset in parsets]

        #                Check if we recorded a failing process before returning
        # ----------------------------------------------------------------------
        if self.error.isSet():
            self.logger.warn("Failed imager process detected")
            return 1
        else:
            return 0

    def __get_parset(self, name, dataset, frequency, ms_dir_type, ms_dir_ra,
                     ms_dir_dec, restore):
        def convert_mwimager_parset(parset):
            try:
                with patched_parset(
                        parset,
                    {
                        'dataset': dataset,
                        'Images.frequency': frequency,
                        'msDirType': ms_dir_type,
                        'msDirRa': ms_dir_ra,
                        'msDirDec': ms_dir_dec,
                        'restore':
                        restore  # cimager bug: non-restored image unusable
                    }) as cimager_parset:
                    fd, converted_parset = tempfile.mkstemp(
                        dir=self.config.get("layout", "job_directory"))
                    convert_process = spawn_process([
                        self.inputs['convert_exec'], cimager_parset,
                        converted_parset
                    ], self.logger)
                    os.close(fd)
                    sout, serr = convert_process.communicate()
                    log_process_output(self.inputs['convert_exec'], sout, serr,
                                       self.logger)
                    if convert_process.returncode != 0:
                        raise subprocess.CalledProcessError(
                            convert_process.returncode, convert_exec)
                    return converted_parset
            except OSError as e:
                self.logger.error("Failed to spawn convertimagerparset (%s)" %
                                  str(e))
                raise
            except subprocess.CalledProcessError as e:
                self.logger.error(str(e))
                raise

        def populate_cimager_parset(parset):
            input_parset = Parset(parset)
            patch_dictionary = {
                'Cimager.dataset': dataset,
                'Cimager.restore': restore
            }
            image_names = []
            for image_name in input_parset.getStringVector(
                    'Cimager.Images.Names'):
                image_names.append("%s_%s" % (image_name, name))
                subset = input_parset.makeSubset(
                    "Cimager.Images.%s" % image_name,
                    "Cimager.Images.%s" % image_names[-1])
                patch_dictionary["Cimager.Images.%s.frequency" %
                                 image_names[-1]] = frequency
                patch_dictionary["Cimager.Images.%s.direction" %
                                 image_names[-1]] = "[ %s,%s,%s ]" % (
                                     ms_dir_ra, ms_dir_dec, ms_dir_type)
                for key in subset:
                    patch_dictionary[key] = subset[key].get()
            input_parset.subtractSubset('Cimager.Images.image')
            for key in input_parset:
                patch_dictionary[key] = input_parset[key].get()
            patch_dictionary['Cimager.Images.Names'] = "[ %s ]" % ", ".join(
                image_names)
            return patch_parset(None, patch_dictionary,
                                self.config.get("layout", "job_directory"))

        try:
            if self.inputs['parset_type'] == "mwimager":
                cimager_parset = convert_mwimager_parset(self.inputs['parset'])
            elif self.inputs['parset_type'] == "cimager":
                cimager_parset = populate_cimager_parset(self.inputs['parset'])
        except Exception as e:
            self.logger.exception("Failed to generate imager parset")
            raise

        return cimager_parset
Exemple #25
0
class vdsmaker(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Generate a GVDS file (and, optionally, individual VDS files per subband;
    see the ``unlink`` input parameter) describing a collection of
    MeasurementSets.

    1. Load data from disk, create the output vds paths
    2. Call the vdsmaker node script to generate the vds files
    3. Combine the vds files in a gvds file (master side operation)
    
    **Command line arguments**

    A mapfile describing the measurementsets to be processed.
    """
    inputs = {
        'gvds':
        ingredient.StringField('-g',
                               '--gvds',
                               help="File name for output GVDS file"),
        'directory':
        ingredient.DirectoryField('--directory',
                                  help="Directory for output GVDS file"),
        'makevds':
        ingredient.ExecField('--makevds',
                             help="Full path to makevds executable"),
        'combinevds':
        ingredient.ExecField('--combinevds',
                             help="Full path to combinevds executable"),
        'unlink':
        ingredient.BoolField('--unlink',
                             help="Unlink VDS files after combining",
                             default=True),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8)
    }

    outputs = {'gvds': ingredient.FileField()}

    def go(self):
        """
        Contains functionality of the vdsmaker
        """
        super(vdsmaker, self).go()
        # **********************************************************************
        # 1. Load data from disk create output files
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # Skip items in `data` that have 'skip' set to True
        data.iterator = DataMap.SkipIterator

        # Create output vds names
        vdsnames = [
            os.path.join(self.inputs['directory'],
                         os.path.basename(item.file) + '.vds') for item in data
        ]

        # *********************************************************************
        # 2. Call vdsmaker
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for inp, vdsfile in zip(data, vdsnames):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file,
                               self.config.get('cluster', 'clusterdesc'),
                               vdsfile, self.inputs['makevds']
                           ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        vdsnames = [
            vds for vds, job in zip(vdsnames, jobs)
            if job.results['returncode'] == 0
        ]
        if not vdsnames:
            self.logger.error("All makevds processes failed. Bailing out!")
            return 1

        # *********************************************************************
        # 3. Combine VDS files to produce GDS
        failure = False
        self.logger.info("Combining VDS files")
        executable = self.inputs['combinevds']
        gvds_out = self.inputs['gvds']
        # Create the gvds directory for output files, needed for combine
        create_directory(os.path.dirname(gvds_out))

        try:
            command = [executable, gvds_out] + vdsnames
            combineproc = subprocess.Popen(command,
                                           close_fds=True,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE)
            sout, serr = combineproc.communicate()
            log_process_output(executable, sout, serr, self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)
            self.outputs['gvds'] = gvds_out
            self.logger.info("Wrote combined VDS file: %s" % gvds_out)
        except subprocess.CalledProcessError, cpe:
            self.logger.exception("combinevds failed with status %d: %s" %
                                  (cpe.returncode, serr))
            failure = True
        except OSError, err:
            self.logger.error("Failed to spawn combinevds (%s)" % str(err))
            failure = True
class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Create a distributed parameter database (ParmDB) for a distributed 
    Measurement set (MS).
    
    1. Create a parmdb template at the master side of the recipe
    2. Call node side of recipe with template and possible targets
    3. Validate performance, cleanup of temp files, construct output

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. A mapfile with output location (If provide input and output are validated)
    
    """
    inputs = {
        'executable':
        ingredient.ExecField(
            '--executable',
            help="Full path to parmdbm executable",
        ),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'suffix':
        ingredient.StringField(
            '--suffix',
            help="Suffix of the table name of the empty parmameter database",
            default=".parmdb"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here."),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile to produce; it will contain "
            "a list of the generated empty parameter database files")
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = parmdbm_process.communicate(template % pdbfile)
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError, err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1

        # *********************************************************************
        # 2. Call node side of recipe with template and possible targets
        #    If output location are provided as input these are validated.
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            args = self.inputs['args']
            self.logger.debug("Loading input-data mapfile: %s" % args[0])
            indata = DataMap.load(args[0])
            if len(args) > 1:
                # If output location provide validate the input and outputmap
                self.logger.debug("Loading output-data mapfile: %s" % args[1])
                outdata = DataMap.load(args[1])
                if not validate_data_maps(indata, outdata):
                    self.logger.error(
                        "Validation of input/output data mapfiles failed")
                    return 1
                # else output location is inputlocation+suffix
            else:
                outdata = copy.deepcopy(indata)
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        os.path.basename(item.file) + self.inputs['suffix'])
            #  Call the node side
            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            outdata.iterator = DataMap.SkipIterator
            jobs = []
            for outp in outdata:
                jobs.append(
                    ComputeJob(outp.host,
                               command,
                               arguments=[pdbfile, outp.file]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for job, outp in zip(jobs, outdata):
                # If the returncode is 123456, failing ssh
                if job.results['returncode'] == 123456:
                    self.logger.warning(
                        "ssh connection with {0} failed."
                        "Skipping further work on this task".format(outp.host))
                    self.logger.warning("Error code 123456.")
                    outp.skip = True
                elif job.results['returncode'] != 0:
                    outp.skip = True

        # *********************************************************************
        # 3. validate performance, cleanup of temp files, construct output
        finally:
            self.logger.debug("Removing template parmdb")
            shutil.rmtree(pdbdir, ignore_errors=True)

        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing parmdb map file: %s" %
                          self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #27
0
class demixing(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Run the demixer on the MS's on the compute nodes.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Name of the output mapfile containing the names of the "
            "MS-files produced by the demixing recipe"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here"),
        'initscript':
        ingredient.FileField(
            '--initscript',
            help="The full path to an (Bourne) shell script which will "
            "intialise the environment (ie, ``lofarinit.sh``)"),
        'demix_parset_dir':
        ingredient.DirectoryField(
            '--demix-parset-dir',
            dest='demixdir',
            help="Directory containing the demixing parset-files",
        ),
        'db_host':
        ingredient.StringField(
            '--db-host',
            dest="db_host",
            help="Database host with optional port (e.g. ldb001)"),
        'skymodel':
        ingredient.FileField(
            '--skymodel',
            help="File containing the sky model to use",
        ),
        'demix_sources':
        ingredient.ListField(
            '--demix-sources',
            dest='remove',
            help="List of sources to remove e.g. 'CygA, CasA'; "
            "will be determined automatically if not specified.",
            default=[]),
        'ms_target':
        ingredient.StringField(
            '--ms-target',
            dest='target',
            help="Substring in the output MS name that replaces the "
            "substring 'uv' (default: 'target')",
            default="target"),
        'timestep':
        ingredient.IntField('--timestep',
                            help="Time step for averaging",
                            default=10),
        'freqstep':
        ingredient.IntField('--freqstep',
                            help="Frequency step for averaging",
                            default=60),
        'half_window':
        ingredient.IntField('--half-window',
                            help="Window size of median filter",
                            default=20),
        'threshold':
        ingredient.FloatField(
            '--threshold',
            help="Solutions above/below threshold*rms are smoothed",
            default=2.5),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=1)
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs['working_directory'],
                               self.inputs['job_name'])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [(host,
                        os.path.join(
                            job_dir,
                            os.path.basename(infile).replace(
                                '_uv',
                                '_' + self.inputs['ms_target'] + '_sub')))
                       for host, infile in indata]

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile, job_dir, self.inputs['initscript'],
                        self.inputs['demix_sources'], self.inputs['ms_target'],
                        self.config.get('cluster', 'clusterdesc'),
                        self.inputs['timestep'], self.inputs['freqstep'],
                        self.inputs['half_window'], self.inputs['threshold'],
                        self.inputs['demix_parset_dir'],
                        self.inputs['skymodel'], self.inputs['db_host']
                    ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs['mapfile'])
            store_data_map(self.inputs['mapfile'], outdata)
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0