Exemple #1
0
class count_timesteps(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Accept a list of baselines (in the format used by NDPPP logging).

    Flag them in all MeasurementSets.
    """
    inputs = {
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8)
    }
    outputs = {
        'start_time': ingredient.FloatField(),
        'end_time': ingredient.FloatField()
    }

    def go(self):
        self.logger.info("Starting count_timesteps run")
        super(count_timesteps, self).go()

        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(ComputeJob(host, command, arguments=[ms]))
        jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        self.outputs['start_time'] = min(job.results['start_time']
                                         for job in jobs.values())
        self.outputs['end_time'] = max(job.results['end_time']
                                       for job in jobs.values())

        if self.error.isSet():
            return 1
        else:
            return 0
Exemple #2
0
class cimager(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Provides a convenient, pipeline-based mechanism of running the cimager on
    a dataset.

    Can ingest either an MWimager-style parset, converting to cimager format
    as required, or a cimager parset directly.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'imager_exec':
        ingredient.ExecField('--imager-exec', help="cimager executable"),
        'convert_exec':
        ingredient.ExecField('--convert-exec',
                             help="convertimagerparset executable"),
        'parset':
        ingredient.FileField(
            '--parset',
            help="Imager configuration parset (mwimager or cimager format)"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'timestep':
        ingredient.FloatField(
            '--timestep',
            help=
            "If non-zero, multiple images will be made, each using timestep seconds of data",
            default=0.0),
        'results_dir':
        ingredient.DirectoryField(
            '--results-dir',
            help="Directory in which resulting images will be placed",
        ),
        'parset_type':
        ParsetTypeField('--parset-type',
                        default="mwimager",
                        help="cimager or mwimager"),
        'makevds':
        ingredient.ExecField('--makevds',
                             help="makevds executable",
                             default="/opt/LofIm/daily/lofar/bin/makevds"),
        'combinevds':
        ingredient.ExecField('--comebinevds',
                             help="combinevds executable",
                             default="/opt/LofIm/daily/lofar/bin/combinevds")
    }

    outputs = {'images': ingredient.ListField()}

    def go(self):
        self.logger.info("Starting cimager run")
        super(cimager, self).go()
        self.outputs['images'] = []

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for cimager")
        gvds_file = os.path.join(self.config.get("layout", "job_directory"),
                                 "vds", "cimager.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = gvds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(gvds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("cimager GVDS is %s" % (gvds_file, ))

        #                            Read data for processing from the GVDS file
        # ----------------------------------------------------------------------
        parset = Parset(gvds_file)

        data = []
        for part in range(parset.getInt('NParts')):
            host = parset.getString("Part%d.FileSys" % part).split(":")[0]
            vds = parset.getString("Part%d.Name" % part)
            data.append((host, vds))

        #                                 Divide data into timesteps for imaging
        #          timesteps is a list of (start, end, results directory) tuples
        # ----------------------------------------------------------------------
        timesteps = []
        results_dir = self.inputs['results_dir']
        if self.inputs['timestep'] == 0:
            self.logger.info("No timestep specified; imaging all data")
            timesteps = [(None, None, results_dir)]
        else:
            self.logger.info("Using timestep of %s s" %
                             self.inputs['timestep'])
            gvds = get_parset(gvds_file)
            start_time = quantity(gvds['StartTime'].get()).get('s').get_value()
            end_time = quantity(gvds['EndTime'].get()).get('s').get_value()
            step = float(self.inputs['timestep'])
            while start_time < end_time:
                timesteps.append((start_time, start_time + step,
                                  os.path.join(results_dir, str(start_time))))
                start_time += step

        #                          Run each cimager process in a separate thread
        # ----------------------------------------------------------------------
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        for label, timestep in enumerate(timesteps):
            self.logger.info("Processing timestep %d" % label)
            jobs = []
            parsets = []
            start_time, end_time, resultsdir = timestep
            for host, vds in data:
                vds_data = Parset(vds)
                frequency_range = [
                    vds_data.getDoubleVector("StartFreqs")[0],
                    vds_data.getDoubleVector("EndFreqs")[-1]
                ]
                parsets.append(
                    self.__get_parset(
                        os.path.basename(
                            vds_data.getString('FileName')).split('.')[0],
                        vds_data.getString("FileName"),
                        str(frequency_range),
                        vds_data.getStringVector("Extra.FieldDirectionType")
                        [0],
                        vds_data.getStringVector("Extra.FieldDirectionRa")[0],
                        vds_data.getStringVector("Extra.FieldDirectionDec")[0],
                        'True',  # cimager bug: non-restored image unusable
                    ))
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[
                                   self.inputs['imager_exec'], vds,
                                   parsets[-1], resultsdir, start_time,
                                   end_time
                               ]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for parset in parsets:
                parset = Parset(parset)
                image_names = parset.getStringVector("Cimager.Images.Names")
                self.outputs['images'].extend(image_names)
            [os.unlink(parset) for parset in parsets]

        #                Check if we recorded a failing process before returning
        # ----------------------------------------------------------------------
        if self.error.isSet():
            self.logger.warn("Failed imager process detected")
            return 1
        else:
            return 0

    def __get_parset(self, name, dataset, frequency, ms_dir_type, ms_dir_ra,
                     ms_dir_dec, restore):
        def convert_mwimager_parset(parset):
            try:
                with patched_parset(
                        parset,
                    {
                        'dataset': dataset,
                        'Images.frequency': frequency,
                        'msDirType': ms_dir_type,
                        'msDirRa': ms_dir_ra,
                        'msDirDec': ms_dir_dec,
                        'restore':
                        restore  # cimager bug: non-restored image unusable
                    }) as cimager_parset:
                    fd, converted_parset = tempfile.mkstemp(
                        dir=self.config.get("layout", "job_directory"))
                    convert_process = spawn_process([
                        self.inputs['convert_exec'], cimager_parset,
                        converted_parset
                    ], self.logger)
                    os.close(fd)
                    sout, serr = convert_process.communicate()
                    log_process_output(self.inputs['convert_exec'], sout, serr,
                                       self.logger)
                    if convert_process.returncode != 0:
                        raise subprocess.CalledProcessError(
                            convert_process.returncode, convert_exec)
                    return converted_parset
            except OSError as e:
                self.logger.error("Failed to spawn convertimagerparset (%s)" %
                                  str(e))
                raise
            except subprocess.CalledProcessError as e:
                self.logger.error(str(e))
                raise

        def populate_cimager_parset(parset):
            input_parset = Parset(parset)
            patch_dictionary = {
                'Cimager.dataset': dataset,
                'Cimager.restore': restore
            }
            image_names = []
            for image_name in input_parset.getStringVector(
                    'Cimager.Images.Names'):
                image_names.append("%s_%s" % (image_name, name))
                subset = input_parset.makeSubset(
                    "Cimager.Images.%s" % image_name,
                    "Cimager.Images.%s" % image_names[-1])
                patch_dictionary["Cimager.Images.%s.frequency" %
                                 image_names[-1]] = frequency
                patch_dictionary["Cimager.Images.%s.direction" %
                                 image_names[-1]] = "[ %s,%s,%s ]" % (
                                     ms_dir_ra, ms_dir_dec, ms_dir_type)
                for key in subset:
                    patch_dictionary[key] = subset[key].get()
            input_parset.subtractSubset('Cimager.Images.image')
            for key in input_parset:
                patch_dictionary[key] = input_parset[key].get()
            patch_dictionary['Cimager.Images.Names'] = "[ %s ]" % ", ".join(
                image_names)
            return patch_parset(None, patch_dictionary,
                                self.config.get("layout", "job_directory"))

        try:
            if self.inputs['parset_type'] == "mwimager":
                cimager_parset = convert_mwimager_parset(self.inputs['parset'])
            elif self.inputs['parset_type'] == "cimager":
                cimager_parset = populate_cimager_parset(self.inputs['parset'])
        except Exception as e:
            self.logger.exception("Failed to generate imager parset")
            raise

        return cimager_parset
Exemple #3
0
class skymodel(BaseRecipe):
    """
    Extract basic sky model information from database
    """
    inputs = {
        'db_host':
        ingredient.StringField('--db-host',
                               help="Host with MonetDB database instance",
                               default="ldb001"),
        'db_port':
        ingredient.IntField('--db-port',
                            help="Host with MonetDB database instance",
                            default=50000),
        'db_dbase':
        ingredient.StringField('--db-dbase',
                               help="Database name",
                               default="gsm"),
        'db_user':
        ingredient.StringField('--db-user',
                               help="Database user",
                               default="gsm"),
        'db_password':
        ingredient.StringField('--db-password',
                               help="Database password",
                               default="msss"),
        'ra':
        ingredient.FloatField('--ra', help='RA of image centre (degrees)'),
        'dec':
        ingredient.FloatField('--dec', help='dec of image centre (degrees)'),
        'search_size':
        ingredient.FloatField(
            '--search-size',
            help='Distance to search in each of RA/dec (degrees)'),
        'min_flux':
        ingredient.FloatField(
            '--min-flux',
            help="Integrated flus threshold, in Jy, for source selection"),
        'skymodel_file':
        ingredient.StringField(
            '--skymodel-file',
            help="Output file for BBS-format sky model definition")
    }

    outputs = {
        'source_name': ingredient.StringField(),
        'source_flux': ingredient.FloatField()
    }

    def go(self):
        self.logger.info("Building sky model")
        super(skymodel, self).go()

        ra_min = self.inputs['ra'] - self.inputs['search_size']
        ra_max = self.inputs['ra'] + self.inputs['search_size']
        dec_min = self.inputs['dec'] - self.inputs['search_size']
        dec_max = self.inputs['dec'] + self.inputs['search_size']

        try:
            with closing(
                    db.connect(
                        hostname=self.inputs["db_host"],
                        port=int(self.inputs["db_port"]),
                        database=self.inputs["db_dbase"],
                        username=self.inputs["db_user"],
                        password=self.inputs["db_password"])) as db_connection:
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(query_central % (float(
                        self.inputs['ra']), float(self.inputs['dec']), "VLSS"))
                    central_source = db_cursor.fetchone()
                    if central_source:
                        self.outputs["source_name"], self.outputs[
                            "source_flux"] = central_source
                    else:
                        raise PipelineException(
                            "Error reading central source from database; got %s"
                            % str(central_source))
                    self.logger.info("Central source is %s; flux %f" %
                                     (self.outputs["source_name"],
                                      self.outputs["source_flux"]))
                    db_cursor.execute(query_skymodel % (
                        4,
                        4,  # Only using VLSS for now
                        float(ra_min),
                        float(ra_max),
                        float(dec_min),
                        float(dec_max),
                        float(self.inputs['min_flux'])))
                    results = db_cursor.fetchall()

        except db.Error, my_error:
            self.logger.warn("Failed to build sky model: %s " % (my_error))
            return 1

        try:
            with open(self.inputs['skymodel_file'], 'w') as file:
                file.write(header_line)
                file.writelines(", ".join(line) + ",\n" for line in results)
        except Exception, e:
            self.logger.warn("Failed to write skymodel file")
            self.logger.warn(str(e))
            return 1
Exemple #4
0
class gainoutliercorrection(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Recipe to correct outliers in the gain solutions of an parmdb,
    using the program `parmexportcal`   
    The main purpose of this program is to strip off the time axis information
    from a instrument model (a.k.a ParmDB)
    -or-
    a minimal implementation of the edit_parmdb program. Search all gains for
    outliers and swap these for the median

    1. Validate input
    2. load mapfiles, validate if a target output location is provided
    3. Call node side of the recipe
    4. validate performance, return corrected files

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. A mapfile with target location <mapfiles are validated if present>
    
    """
    inputs = {
        'executable':
        ingredient.StringField(
            '--executable',
            default="",
            help="Full path to the `parmexportcal` executable, not settings this"
            " results in edit_parmdb behaviour"),
        'suffix':
        ingredient.StringField(
            '--suffix',
            help="Suffix of the table name of the instrument model",
            default=".instrument"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here."),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile to produce; it will contain "
            "a list of the generated instrument-model files"),
        'sigma':
        ingredient.FloatField(
            '--sigma',
            default=1.0,
            help="Clip at sigma * median: (not used by parmexportcal"),
        'export_instrument_model':
        ingredient.FloatField(
            '--use-parmexportcal',
            default=False,
            help="Select between parmexportcal and edit parmdb")
    }

    outputs = {
        'mapfile': ingredient.FileField(help="mapfile with corrected parmdbs")
    }

    def go(self):
        super(gainoutliercorrection, self).go()
        self.logger.info("Starting gainoutliercorrection run")
        # ********************************************************************
        # 1. Validate input
        # if sigma is none use default behaviour and use executable: test if
        # It excists
        executable = self.inputs['executable']
        if executable == "":
            pass
        elif not os.access(executable, os.X_OK):
            self.logger.warn(
                "No parmexportcal excecutable is not found on the suplied"
                "path: {0}".format(self.inputs['executable']))
            self.logger.warn("Defaulting to edit_parmdb behaviour")

        # ********************************************************************
        # 2. load mapfiles, validate if a target output location is provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    (os.path.splitext(os.path.basename(item.file))[0] +
                     self.inputs['suffix']))

        # Update the skip fields of the two maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y in zip(indata, outdata):
            x.skip = y.skip = (x.skip or y.skip)

        # ********************************************************************
        # 3. Call node side of the recipe
        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp in zip(indata, outdata):
            jobs.append(
                ComputeJob(outp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, self.inputs['executable'],
                               self.environment, self.inputs['sigma'],
                               self.inputs['export_instrument_model']
                           ]))
        self._schedule_jobs(jobs)
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # ********************************************************************
        # 4. validate performance, return corrected files
        if self.error.isSet():
            self.logger.warn("Detected failed gainoutliercorrection job")
            return 1
        else:
            self.logger.debug("Writing instrument map file: %s" %
                              self.inputs['mapfile'])
            outdata.save(self.inputs['mapfile'])
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Exemple #5
0
class selfcal_finalize(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    The Imager_finalizer performs a number of steps needed for integrating the
    msss_imager_pipeline in the LOFAR framework: It places the image on the
    output location in the correcy image type (hdf5).
    It also adds some meta data collected from the individual measurement sets
    and the found data.

    This recipe does not have positional commandline arguments
    """
    inputs = {
        'awimager_output_map':
        ingredient.FileField(
            '--awimager-output-mapfile',
            help="""Mapfile containing (host, path) pairs of created sky
                   images """),
        'ms_per_image_map':
        ingredient.FileField(
            '--ms-per-image-map',
            help='''Mapfile containing (host, path) pairs of mapfiles used
            to create image on that node'''),
        'sourcelist_map':
        ingredient.FileField(
            '--sourcelist-map',
            help='''mapfile containing (host, path) pairs to a list of sources
            found in the image'''),
        'sourcedb_map':
        ingredient.FileField(
            '--sourcedb_map',
            help='''mapfile containing (host, path) pairs to a db of sources
            found in the image'''),
        'target_mapfile':
        ingredient.FileField(
            '--target-mapfile',
            help="Mapfile containing (host, path) pairs to the concatenated and"
            "combined measurement set, the source for the actual sky image"),
        'minbaseline':
        ingredient.FloatField(
            '--minbaseline',
            help='''Minimum length of the baseline used for the images'''),
        'maxbaseline':
        ingredient.FloatField(
            '--maxbaseline',
            help='''Maximum length of the baseline used for the images'''),
        'output_image_mapfile':
        ingredient.FileField(
            '--output-image-mapfile',
            help='''mapfile containing (host, path) pairs with the final
            output image (hdf5) location'''),
        'processed_ms_dir':
        ingredient.StringField(
            '--processed-ms-dir',
            help='''Path to directory for processed measurment sets'''),
        'fillrootimagegroup_exec':
        ingredient.ExecField(
            '--fillrootimagegroup_exec',
            help='''Full path to the fillRootImageGroup executable'''),
        'placed_image_mapfile':
        ingredient.FileField(
            '--placed-image-mapfile',
            help="location of mapfile with processed and correctly placed,"
            " hdf5 images"),
        'placed_correlated_mapfile':
        ingredient.FileField(
            '--placed-correlated-mapfile',
            help="location of mapfile with processedd and correctly placed,"
            " correlated ms"),
        'concat_ms_map_path':
        ingredient.FileField('--concat-ms-map-path',
                             help="Output of the concat MS file"),
        'output_correlated_mapfile':
        ingredient.FileField(
            '--output-correlated-mapfile',
            help="location of mapfile where output paths for mss are located"),
        'msselect_executable':
        ingredient.ExecField('--msselect-executable',
                             help="The full path to the msselect executable "),
    }

    outputs = {
        'placed_image_mapfile': ingredient.StringField(),
        'placed_correlated_mapfile': ingredient.StringField(),
    }

    def go(self):
        """
        Steps:

        1. Load and validate the input datamaps
        2. Run the node parts of the recipe
        3. Validate node output and format the recipe output
        """
        super(selfcal_finalize, self).go()
        # *********************************************************************
        # 1. Load the datamaps
        awimager_output_map = DataMap.load(self.inputs["awimager_output_map"])
        ms_per_image_map = DataMap.load(self.inputs["ms_per_image_map"])
        sourcelist_map = DataMap.load(self.inputs["sourcelist_map"])
        sourcedb_map = DataMap.load(self.inputs["sourcedb_map"])
        target_mapfile = DataMap.load(self.inputs["target_mapfile"])
        output_image_mapfile = DataMap.load(
            self.inputs["output_image_mapfile"])
        concat_ms_mapfile = DataMap.load(self.inputs["concat_ms_map_path"])
        output_correlated_map = DataMap.load(
            self.inputs["output_correlated_mapfile"])
        processed_ms_dir = self.inputs["processed_ms_dir"]
        fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"]

        # Align the skip fields
        align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map,
                        target_mapfile, output_image_mapfile, sourcedb_map,
                        concat_ms_mapfile, output_correlated_map)

        # Set the correct iterator
        sourcelist_map.iterator = awimager_output_map.iterator = \
            ms_per_image_map.iterator = target_mapfile.iterator = \
            output_image_mapfile.iterator = sourcedb_map.iterator = \
            concat_ms_mapfile.iterator = output_correlated_map.iterator = \
            DataMap.SkipIterator

        # *********************************************************************
        # 2. Run the node side of the recupe
        command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []
        for (awimager_output_item, ms_per_image_item, sourcelist_item,
             target_item, output_image_item, sourcedb_item, concat_ms_item,
             correlated_item) in zip(awimager_output_map, ms_per_image_map,
                                     sourcelist_map, target_mapfile,
                                     output_image_mapfile, sourcedb_map,
                                     concat_ms_mapfile, output_correlated_map):
            # collect the files as argument
            arguments = [
                awimager_output_item.file,
                ms_per_image_item.file,
                sourcelist_item.file,
                target_item.file,
                output_image_item.file,
                self.inputs["minbaseline"],
                self.inputs["maxbaseline"],
                processed_ms_dir,
                fillrootimagegroup_exec,
                self.environment,
                sourcedb_item.file,
                concat_ms_item.file,
                correlated_item.file,
                self.inputs["msselect_executable"],
            ]

            self.logger.info(
                "Starting finalize with the folowing args: {0}".format(
                    arguments))
            jobs.append(ComputeJob(target_item.host, command, arguments))

        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Validate the performance of the node script and assign output
        succesful_run = False
        for (job, output_image_item,
             output_correlated_item) in zip(jobs, output_image_mapfile,
                                            output_correlated_map):
            if not "hdf5" in job.results:
                # If the output failed set the skip to True
                output_image_item.skip = True
                output_correlated_item = True
            else:
                succesful_run = True
                # signal that we have at least a single run finished ok.
                # No need to set skip in this case

        if not succesful_run:
            self.logger.warn("Not a single finalizer succeeded")
            return 1

        # Save the location of the output images
        output_image_mapfile.save(self.inputs['placed_image_mapfile'])
        self.logger.debug(
            "Wrote mapfile containing placed hdf5 images: {0}".format(
                self.inputs['placed_image_mapfile']))

        # save the location of measurements sets
        output_correlated_map.save(self.inputs['placed_correlated_mapfile'])
        self.logger.debug("Wrote mapfile containing placed mss: {0}".format(
            self.inputs['placed_correlated_mapfile']))

        self.outputs["placed_image_mapfile"] = self.inputs[
            'placed_image_mapfile']
        self.outputs["placed_correlated_mapfile"] = self.inputs[
            'placed_correlated_mapfile']

        return 0
Exemple #6
0
class selfcal_awimager(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Master script for the awimager. Collects arguments from command line and
    pipeline inputs.
    
    1. Load mapfiles and validate these
    2. Run the awimage node scripts
    3. Retrieve output. Construct output map file succesfull runs
    
    Details regarding the implementation of the imaging step can be found in 
    the node recipe 
    **CommandLine Arguments**
    
    A mapfile containing (node, datafile) pairs. The measurements set use as
    input for awimager executable  
 
    """
    inputs = {
        'executable': ingredient.ExecField(
            '--executable',
            help = "The full path to the  awimager executable"
        ),
        'parset': ingredient.FileField(
            '-p', '--parset',
            help = "The full path to a awimager configuration parset."
        ),
        'working_directory': ingredient.StringField(
            '-w', '--working-directory',
            help = "Working directory used on output nodes. Results location"
        ),
        'output_image': ingredient.StringField(
            '--output-image',
            help = "Path of the image to be create by the awimager"
        ),
        'mapfile': ingredient.StringField(
            '--mapfile',
            help = "Full path for output mapfile. A list of the"
                 "successfully generated images will be written here"
        ),
        'sourcedb_path': ingredient.StringField(
            '--sourcedb-path',
            help = "Full path of sourcedb used to create a mask for known sources"
        ),
        'mask_patch_size': ingredient.FloatField(
            '--mask-patch-size',
            help = "Scale factor for patches in the awimager mask"
        ),
        'autogenerate_parameters': ingredient.BoolField(
            '--autogenerate-parameters',
            default = True,
            help = "Turns on the autogeneration of: cellsize, image-size, fov."
            " MSSS 'type' functionality"
        ),
        'specify_fov': ingredient.BoolField(
            '--specify-fov',
            default = False,
            help = "calculated Image parameters are relative to fov, parameter"
            " is active when autogenerate_parameters is False"
        ),
        'fov': ingredient.FloatField(
            '--fov',
            default = 0.0,
            help = "calculated Image parameters are relative to this"
            " Field Of View in arcSec. This parameter is obligatory when"
            " specify_fov is True"
        ),
        'major_cycle': ingredient.IntField(
            '--major_cycle',
            help = "The number of the current cycle to modify the parset."
        ),
        'nr_cycles': ingredient.IntField(
            '--nr-cycles',
            help = "The number major cycles."
        ) ,
        'perform_self_cal': ingredient.BoolField(
            '--perform-self-cal',
            default=False,          
            help = "Control the usage of the self callibartion functionality"
        )
    }

    outputs = {
        'mapfile': ingredient.StringField(),
    }

    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(selfcal_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                        "the supplied input_ms mapfile and sourcedb mapfile"
                        "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)        
        align_data_maps(input_map, output_map, sourcedb_map)

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for measurement_item, source_item in zip(input_map, sourcedb_map):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host , measurement_path = measurement_item.host, measurement_item.file
            host2 , sourcedb_path = source_item.host, source_item.file

            # construct and save the output name
            arguments = [self.inputs['executable'],
                         self.environment,
                         self.inputs['parset'],
                         self.inputs['working_directory'],
                         self.inputs['output_image'],
                         measurement_path,
                         sourcedb_path,
                         self.inputs['mask_patch_size'],
                         self.inputs['autogenerate_parameters'],
                         self.inputs['specify_fov'],
                         self.inputs['fov'],
                         self.inputs['major_cycle'],
                         self.inputs['nr_cycles'],
                         self.inputs['perform_self_cal']
                         ]

            jobs.append(ComputeJob(host, node_command, arguments))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in  zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                    "None of the started awimager run finished correct")
            self.logger.error(
                    "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.error("Failed awimager node run detected. continue with"
                              "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0
Exemple #7
0
class demixing(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Run the demixer on the MS's on the compute nodes.
    """
    inputs = {
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Name of the output mapfile containing the names of the "
            "MS-files produced by the demixing recipe"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here"),
        'initscript':
        ingredient.FileField(
            '--initscript',
            help="The full path to an (Bourne) shell script which will "
            "intialise the environment (ie, ``lofarinit.sh``)"),
        'demix_parset_dir':
        ingredient.DirectoryField(
            '--demix-parset-dir',
            dest='demixdir',
            help="Directory containing the demixing parset-files",
        ),
        'db_host':
        ingredient.StringField(
            '--db-host',
            dest="db_host",
            help="Database host with optional port (e.g. ldb001)"),
        'skymodel':
        ingredient.FileField(
            '--skymodel',
            help="File containing the sky model to use",
        ),
        'demix_sources':
        ingredient.ListField(
            '--demix-sources',
            dest='remove',
            help="List of sources to remove e.g. 'CygA, CasA'; "
            "will be determined automatically if not specified.",
            default=[]),
        'ms_target':
        ingredient.StringField(
            '--ms-target',
            dest='target',
            help="Substring in the output MS name that replaces the "
            "substring 'uv' (default: 'target')",
            default="target"),
        'timestep':
        ingredient.IntField('--timestep',
                            help="Time step for averaging",
                            default=10),
        'freqstep':
        ingredient.IntField('--freqstep',
                            help="Frequency step for averaging",
                            default=60),
        'half_window':
        ingredient.IntField('--half-window',
                            help="Window size of median filter",
                            default=20),
        'threshold':
        ingredient.FloatField(
            '--threshold',
            help="Solutions above/below threshold*rms are smoothed",
            default=2.5),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=1)
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs['working_directory'],
                               self.inputs['job_name'])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [(host,
                        os.path.join(
                            job_dir,
                            os.path.basename(infile).replace(
                                '_uv',
                                '_' + self.inputs['ms_target'] + '_sub')))
                       for host, infile in indata]

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile, job_dir, self.inputs['initscript'],
                        self.inputs['demix_sources'], self.inputs['ms_target'],
                        self.config.get('cluster', 'clusterdesc'),
                        self.inputs['timestep'], self.inputs['freqstep'],
                        self.inputs['half_window'], self.inputs['threshold'],
                        self.inputs['demix_parset_dir'],
                        self.inputs['skymodel'], self.inputs['db_host']
                    ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs['mapfile'])
            store_data_map(self.inputs['mapfile'], outdata)
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0