Exemple #1
0
    def _validate_input_data(self, slice_paths_map, input_map):
        """
        Performs a validation of the supplied slice_paths_map and inputmap.
        Displays error message if this fails
        """
        validation_failed = None
        error_received = None
        try:
            validation_failed = not validate_data_maps(slice_paths_map,
                                                     input_map)
        except  AssertionError as exception :
            validation_failed = True
            error_received = str(exception)

        if validation_failed:
            self.logger.error(error_received)
            self.logger.error("Incorrect mapfiles: {0} and {1}".format(
                 self.inputs["slice_paths_mapfile"], self.inputs['args'][0]))
            self.logger.error("content input_map: \n{0}".format(input_map))
            self.logger.error("content slice_paths_map: \n{0}".format(
                                                            slice_paths_map))
            # return with failure
            return 1

        # return with zero (all is ok state)
        return 0
Exemple #2
0
    def _load_mapfiles(self):
        """
        Load data map file, instrument map file, and sky map file.
        Update the 'skip' fields in these map files: if 'skip' is True in any
        of the maps, then 'skip' must be set to True in all maps.
        """
        self.logger.debug("Loading map files:"
            "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % (
                self.inputs['args'][0], 
                self.inputs['instrument_mapfile'],
                self.inputs['sky_mapfile']
            )
        )
        self.data_map = DataMap.load(self.inputs['args'][0])
        self.inst_map = DataMap.load(self.inputs['instrument_mapfile'])
        self.sky_map = DataMap.load(self.inputs['sky_mapfile'])

        if not validate_data_maps(self.data_map, self.inst_map, self.sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Update the skip fields of the three maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y, z in zip(self.data_map, self.inst_map, self.sky_map):
            x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip)
        
        return True
 def _get_io_product_specs(self):
     """
     Get input- and output-data product specifications from the
     parset-file, and do some sanity checks.
     """
     dps = self.parset.makeSubset(
         self.parset.fullModuleName('DataProducts') + '.'
     )
     self.input_data = DataMap([
         tuple(os.path.join(location, filename).split(':')) + (skip,)
             for location, filename, skip in zip(
                 dps.getStringVector('Input_Correlated.locations'),
                 dps.getStringVector('Input_Correlated.filenames'),
                 dps.getBoolVector('Input_Correlated.skip'))
     ])
     self.logger.debug("%d Input_Correlated data products specified" %
                       len(self.input_data))
     self.output_data = DataMap([
         tuple(os.path.join(location, filename).split(':')) + (skip,)
             for location, filename, skip in zip(
                 dps.getStringVector('Output_Correlated.locations'),
                 dps.getStringVector('Output_Correlated.filenames'),
                 dps.getBoolVector('Output_Correlated.skip'))
     ])
     self.logger.debug("%d Output_Correlated data products specified" %
                       len(self.output_data))
     # Sanity checks on input- and output data product specifications
     if not validate_data_maps(self.input_data, self.output_data):
         raise PipelineException(
             "Validation of input/output data product specification failed!"
         )
 def _get_io_product_specs(self):
     """
     Get input- and output-data product specifications from the
     parset-file, and do some sanity checks.
     """
     dps = self.parset.makeSubset(
         self.parset.fullModuleName('DataProducts') + '.'
     )
     self.input_data = DataMap([
         tuple(os.path.join(location, filename).split(':')) + (skip,)
             for location, filename, skip in zip(
                 dps.getStringVector('Input_Correlated.locations'),
                 dps.getStringVector('Input_Correlated.filenames'),
                 dps.getBoolVector('Input_Correlated.skip'))
     ])
     self.logger.debug("%d Input_Correlated data products specified" %
                       len(self.input_data))
     self.output_data = DataMap([
         tuple(os.path.join(location, filename).split(':')) + (skip,)
             for location, filename, skip in zip(
                 dps.getStringVector('Output_Correlated.locations'),
                 dps.getStringVector('Output_Correlated.filenames'),
                 dps.getBoolVector('Output_Correlated.skip'))
     ])
     self.logger.debug("%d Output_Correlated data products specified" %
                       len(self.output_data))
     # Sanity checks on input- and output data product specifications
     if not validate_data_maps(self.input_data, self.output_data):
         raise PipelineException(
             "Validation of input/output data product specification failed!"
         )
Exemple #5
0
    def _load_mapfiles(self):
        """
        Load data map file, instrument map file, and sky map file.
        Update the 'skip' fields in these map files: if 'skip' is True in any
        of the maps, then 'skip' must be set to True in all maps.
        """
        self.logger.debug(
            "Loading map files:"
            "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" %
            (self.inputs['args'][0], self.inputs['instrument_mapfile'],
             self.inputs['sky_mapfile']))
        self.data_map = DataMap.load(self.inputs['args'][0])
        self.inst_map = DataMap.load(self.inputs['instrument_mapfile'])
        self.sky_map = DataMap.load(self.inputs['sky_mapfile'])

        if not validate_data_maps(self.data_map, self.inst_map, self.sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Update the skip fields of the three maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y, z in zip(self.data_map, self.inst_map, self.sky_map):
            x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip)

        return True
Exemple #6
0
 def _validate_io_product_specs(self):
     """
     Sanity checks on input- and output data product specifications
     """
     if not validate_data_maps(self.input_data['data'],
                               self.input_data['instrument'],
                               self.output_data['data']):
         raise PipelineException(
             "Validation of input/output data product specification failed!"
         )
 def _validate_io_product_specs(self):
     """
     Sanity checks on input- and output data product specifications
     """
     if not validate_data_maps(
         self.input_data['data'],
         self.input_data['instrument'],
         self.output_data['data']
     ):  raise PipelineException(
             "Validation of input/output data product specification failed!"
         )
 def _validate_input_data(self, slice_paths_map, input_map):
     """
     Performs a validation of the supplied slice_paths_map and inputmap.
     Displays error message if this fails
     """
     validation_failed = None
     error_received = None
     try:
         validation_failed = not validate_data_maps(slice_paths_map,
                                                    input_map)
     except AssertionError, exception:
         validation_failed = True
         error_received = str(exception)
 def _validate_input_data(self, slice_paths_map, input_map):
     """
     Performs a validation of the supplied slice_paths_map and inputmap.
     Displays error message if this fails
     """
     validation_failed = None
     error_received = None
     try:
         validation_failed = not validate_data_maps(slice_paths_map,
                                                  input_map)
     except  AssertionError, exception :
         validation_failed = True
         error_received = str(exception)
Exemple #10
0
    def _bbs(self,
             timeslice_map_path,
             parmdbs_map_path,
             sourcedb_map_path,
             skip=False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(
            parset, "bbs", "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(
            None, "bbs_output", "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(
            None, "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                              "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset=parset_path,
                      instrument_mapfile=parmdbs_map_path,
                      sourcedb_mapfile=sourcedb_map_path,
                      mapfile=output_mapfile,
                      working_directory=self.scratch_directory)

        return output_mapfile
Exemple #11
0
    def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path,
              skip = False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(parset, "bbs",
                        "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(None, "bbs_output",
                        "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(None,
                "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                                "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset = parset_path,
                      instrument_mapfile = parmdbs_map_path,
                      sourcedb_mapfile = sourcedb_map_path,
                      mapfile = output_mapfile,
                      working_directory = self.scratch_directory)

        return output_mapfile
    def go(self):
        self.logger.info("Starting setupsourcedb run")
        super(setupsourcedb, self).go()

        # *********************************************************************
        # 1. Load input and output mapfiles. Validate

        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed"
                )
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'],
                    self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix']
                )

        # *********************************************************************
        # 2. Check if input skymodel file exists. If not, make filename empty.
        try:
            skymodel = self.inputs['skymodel']
        except KeyError:
            skymodel = ""
            self.logger.info("No skymodel specified. Using an empty one")

        # ********************************************************************
        # 3. Call node side of script
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        outdata.iterator = DataMap.SkipIterator
        jobs = []
        for outp in outdata:
            jobs.append(
                ComputeJob(
                    outp.host,
                    command,
                    arguments=[
                        self.inputs['executable'],
                        skymodel,
                        outp.file,
                        self.inputs['type']
                    ]
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # *********************************************************************
        # 4. Check job results, and create output data map file
        if self.error.isSet():
             # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        self.logger.debug("Writing sky map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #13
0
    def go(self):
        super(gainoutliercorrection, self).go()
        self.logger.info("Starting gainoutliercorrection run")
        # ********************************************************************
        # 1. Validate input
        # if sigma is none use default behaviour and use executable: test if
        # It excists
        executable = self.inputs['executable']
        if executable == "":
            pass
        elif not os.access(executable, os.X_OK):
            self.logger.warn(
                "No parmexportcal excecutable is not found on the suplied"
                "path: {0}".format(self.inputs['executable']))
            self.logger.warn("Defaulting to edit_parmdb behaviour")

        # ********************************************************************
        # 2. load mapfiles, validate if a target output location is provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    (os.path.splitext(os.path.basename(item.file))[0] +
                     self.inputs['suffix']))

        # Update the skip fields of the two maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y in zip(indata, outdata):
            x.skip = y.skip = (x.skip or y.skip)

        # ********************************************************************
        # 3. Call node side of the recipe
        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp in zip(indata, outdata):
            jobs.append(
                ComputeJob(outp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, self.inputs['executable'],
                               self.environment, self.inputs['sigma'],
                               self.inputs['export_instrument_model']
                           ]))
        self._schedule_jobs(jobs)
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # ********************************************************************
        # 4. validate performance, return corrected files
        if self.error.isSet():
            self.logger.warn("Detected failed gainoutliercorrection job")
            return 1
        else:
            self.logger.debug("Writing instrument map file: %s" %
                              self.inputs['mapfile'])
            outdata.save(self.inputs['mapfile'])
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Exemple #14
0
    def go(self):
        self.logger.info("Starting DPPP run")
        super(dppp, self).go()

        #        #                Keep track of "Total flagged" messages in the DPPP logs
        #        # ----------------------------------------------------------------------
        #        self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines"

        # *********************************************************************
        # 1. load input data file, validate output vs the input location if
        #    output locations are provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix'])

        # ********************************************************************
        # 2. Load parmdb and sourcedb
        # Load parmdb-mapfile, if one was given.
        if self.inputs.has_key('parmdb_mapfile'):
            self.logger.debug("Loading parmdb mapfile: %s" %
                              self.inputs['parmdb_mapfile'])
            parmdbdata = DataMap.load(self.inputs['parmdb_mapfile'])
        else:
            parmdbdata = copy.deepcopy(indata)
            for item in parmdbdata:
                item.file = ''

        # Load sourcedb-mapfile, if one was given.
        if self.inputs.has_key('sourcedb_mapfile'):
            self.logger.debug("Loading sourcedb mapfile: %s" %
                              self.inputs['sourcedb_mapfile'])
            sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile'])
        else:
            sourcedbdata = copy.deepcopy(indata)
            for item in sourcedbdata:
                item.file = ''

        # Validate all the data maps.
        if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata):
            self.logger.error("Validation of data mapfiles failed!")
            return 1

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata):
            w.skip = x.skip = y.skip = z.skip = (w.skip or x.skip or y.skip
                                                 or z.skip)

        # ********************************************************************
        # 3. Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp, pdb, sdb in zip(indata, outdata, parmdbdata,
                                       sourcedbdata):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, pdb.file, sdb.file,
                               self.inputs['parset'],
                               self.inputs['executable'], self.environment,
                               self.inputs['demix_always'],
                               self.inputs['demix_if_needed'],
                               self.inputs['data_start_time'],
                               self.inputs['data_end_time'],
                               self.inputs['nthreads'], self.inputs['clobber']
                           ],
                           resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True


#        # *********************************************************************
#        # 4. parse logfile for fully flagged baselines
#        matches = self.logger.searchpatterns["fullyflagged"].results
#        self.logger.searchpatterns.clear() # finished searching
#        stripchars = "".join(set("Fully flagged baselines: "))
#        baselinecounter = defaultdict(lambda: 0)
#        for match in matches:
#            for pair in (
#                pair.strip(stripchars) for pair in match.getMessage().split(";")
#            ):
#                baselinecounter[pair] += 1
#        self.outputs['fullyflagged'] = baselinecounter.keys()

# *********************************************************************
# 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing data map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}

        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        prefix = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(prefix, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(prefix, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        prefix,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if not self.inputs['inputkeys'] and self.inputs['inputkey']:
            self.inputs['inputkeys'].append(self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              len(self.inputs['inputkeys']), len(inputmapfiles))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap in zip(self.inputs['inputkeys'], inputmapfiles):
                filedict[key] = []
                for inp in filemap:
                    filedict[key].append(inp.file)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            #if keylist:
                #for name, value in zip(keylist, inputlist):
            if filedict:
                for name, value in filedict.iteritems():
                    if arglist_copy and name in arglist_copy:
                        ind = arglist_copy.index(name)
                        arglist_copy[ind] = value[i]
                    elif name in parsetdict_copy.values():
                        for k, v in parsetdict_copy.iteritems():
                            if v == name:
                                parsetdict_copy[k] = value[i]
                    else:
                        parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        prefix,
                        self.inputs['parsetasfile'],
                        args_format,
                        #self.inputs['working_directory'],
                        self.environment
                    ]
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
            for k, v in job.results.items():
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        for k, v in jobresultdict.items():
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
Exemple #16
0
    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(selfcal_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                        "the supplied input_ms mapfile and sourcedb mapfile"
                        "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)        
        align_data_maps(input_map, output_map, sourcedb_map)

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for measurement_item, source_item in zip(input_map, sourcedb_map):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host , measurement_path = measurement_item.host, measurement_item.file
            host2 , sourcedb_path = source_item.host, source_item.file

            # construct and save the output name
            arguments = [self.inputs['executable'],
                         self.environment,
                         self.inputs['parset'],
                         self.inputs['working_directory'],
                         self.inputs['output_image'],
                         measurement_path,
                         sourcedb_path,
                         self.inputs['mask_patch_size'],
                         self.inputs['autogenerate_parameters'],
                         self.inputs['specify_fov'],
                         self.inputs['fov'],
                         self.inputs['major_cycle'],
                         self.inputs['nr_cycles'],
                         self.inputs['perform_self_cal']
                         ]

            jobs.append(ComputeJob(host, node_command, arguments))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in  zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                    "None of the started awimager run finished correct")
            self.logger.error(
                    "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.error("Failed awimager node run detected. continue with"
                              "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0
Exemple #17
0
    def go(self):
        super(gainoutliercorrection, self).go()
        self.logger.info("Starting gainoutliercorrection run")
        # ********************************************************************
        # 1. Validate input
        # if sigma is none use default behaviour and use executable: test if
        # It excists
        executable = self.inputs['executable']
        if executable == "":
            pass
        elif not os.access(executable, os.X_OK):
            self.logger.warn(
                "No parmexportcal excecutable is not found on the suplied"
                "path: {0}".format(self.inputs['executable']))
            self.logger.warn("Defaulting to edit_parmdb behaviour")

        # ********************************************************************
        # 2. load mapfiles, validate if a target output location is provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed"
                )
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'],
                    self.inputs['job_name'],
                    (os.path.splitext(os.path.basename(item.file))[0] +
                     self.inputs['suffix'])
                )

        # Update the skip fields of the two maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y in zip(indata, outdata):
            x.skip = y.skip = (x.skip or y.skip)

        # ********************************************************************
        # 3. Call node side of the recipe
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp in zip(indata, outdata):
            jobs.append(
                ComputeJob(
                    outp.host,
                    command,
                    arguments=[
                        inp.file,
                        outp.file,
                        self.inputs['executable'],
                        self.environment,
                        self.inputs['sigma'],
                        self.inputs['export_instrument_model']
                     ]
                )
            )
        self._schedule_jobs(jobs)
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # ********************************************************************
        # 4. validate performance, return corrected files
        if self.error.isSet():
            self.logger.warn("Detected failed gainoutliercorrection job")
            return 1
        else:
            self.logger.debug("Writing instrument map file: %s" %
                              self.inputs['mapfile'])
            outdata.save(self.inputs['mapfile'])
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Exemple #18
0
    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(imager_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                        "the supplied input_ms mapfile and sourcedb mapfile"
                        "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)        
        for w, x, y in zip(input_map, output_map, sourcedb_map):
            w.skip = x.skip = y.skip = (
                w.skip or x.skip or y.skip
            )

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for idx, (measurement_item, source_item) in enumerate(zip(input_map, sourcedb_map)):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host , measurement_path = measurement_item.host, measurement_item.file
            host2 , sourcedb_path = source_item.host, source_item.file

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'], "imager_awimager_{0}".format(idx))

            # construct and save the output name
            arguments = [self.inputs['executable'],
                         self.environment,
                         self.inputs['parset'],
                         working_dir,
                         # put in unique dir, as node script wants to put private .par files next to it
                         "%s_%s/image" % (self.inputs['output_image'], idx), 
                         measurement_path,
                         sourcedb_path,
                         self.inputs['mask_patch_size'],
                         self.inputs['autogenerate_parameters'],
                         self.inputs['specify_fov'],
                         self.inputs['fov'],
                         ]

            jobs.append(ComputeJob(host, node_command, arguments,
                    resources={
                        "cores": self.inputs['nthreads']
                    }))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in  zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                    "None of the starter awimager run finished correct")
            self.logger.error(
                    "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.warn("Failed awimager node run detected. continue with"
                              "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0
Exemple #19
0
    def go(self):
        self.logger.info("Starting DPPP run")
        super(dppp, self).go()

#        #                Keep track of "Total flagged" messages in the DPPP logs
#        # ----------------------------------------------------------------------
#        self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines"

        # *********************************************************************
        # 1. load input data file, validate output vs the input location if
        #    output locations are provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'],
                    self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix']
                )

        # ********************************************************************
        # 2. Load parmdb and sourcedb
        # Load parmdb-mapfile, if one was given.         
        if self.inputs.has_key('parmdb_mapfile'):
            self.logger.debug(
                "Loading parmdb mapfile: %s" % self.inputs['parmdb_mapfile']
            )
            parmdbdata = DataMap.load(self.inputs['parmdb_mapfile'])
        else:
            parmdbdata = copy.deepcopy(indata)
            for item in parmdbdata:
                item.file = ''

        # Load sourcedb-mapfile, if one was given.         
        if self.inputs.has_key('sourcedb_mapfile'):
            self.logger.debug(
                "Loading sourcedb mapfile: %s" % self.inputs['sourcedb_mapfile']
            )
            sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile'])
        else:
            sourcedbdata = copy.deepcopy(indata)
            for item in sourcedbdata:
                item.file = ''

        # Validate all the data maps.
        if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata):
            w.skip = x.skip = y.skip = z.skip = (
                w.skip or x.skip or y.skip or z.skip
            )

        # ********************************************************************
        # 3. Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp, pdb, sdb in zip(
            indata, outdata, parmdbdata, sourcedbdata
        ):
            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        outp.file,
                        pdb.file,
                        sdb.file,
                        self.inputs['parset'],
                        self.inputs['executable'],
                        self.environment,
                        self.inputs['demix_always'],
                        self.inputs['demix_if_needed'],
                        self.inputs['data_start_time'],
                        self.inputs['data_end_time'],
                        self.inputs['nthreads'],
                        self.inputs['clobber']
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

#        # *********************************************************************
#        # 4. parse logfile for fully flagged baselines
#        matches = self.logger.searchpatterns["fullyflagged"].results
#        self.logger.searchpatterns.clear() # finished searching
#        stripchars = "".join(set("Fully flagged baselines: "))
#        baselinecounter = defaultdict(lambda: 0)
#        for match in matches:
#            for pair in (
#                pair.strip(stripchars) for pair in match.getMessage().split(";")
#            ):
#                baselinecounter[pair] += 1
#        self.outputs['fullyflagged'] = baselinecounter.keys()

        # *********************************************************************
        # 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        self.logger.debug("Writing data map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #20
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Create a distributed parameter database (ParmDB) for a distributed 
    Measurement set (MS).
    
    1. Create a parmdb template at the master side of the recipe
    2. Call node side of recipe with template and possible targets
    3. Validate performance, cleanup of temp files, construct output

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. A mapfile with output location (If provide input and output are validated)
    
    """
    inputs = {
        'executable':
        ingredient.ExecField(
            '--executable',
            help="Full path to parmdbm executable",
        ),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'suffix':
        ingredient.StringField(
            '--suffix',
            help="Suffix of the table name of the empty parmameter database",
            default=".parmdb"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here."),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile to produce; it will contain "
            "a list of the generated empty parameter database files")
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = parmdbm_process.communicate(template % pdbfile)
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError, err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1

        # *********************************************************************
        # 2. Call node side of recipe with template and possible targets
        #    If output location are provided as input these are validated.
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            args = self.inputs['args']
            self.logger.debug("Loading input-data mapfile: %s" % args[0])
            indata = DataMap.load(args[0])
            if len(args) > 1:
                # If output location provide validate the input and outputmap
                self.logger.debug("Loading output-data mapfile: %s" % args[1])
                outdata = DataMap.load(args[1])
                if not validate_data_maps(indata, outdata):
                    self.logger.error(
                        "Validation of input/output data mapfiles failed")
                    return 1
                # else output location is inputlocation+suffix
            else:
                outdata = copy.deepcopy(indata)
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        os.path.basename(item.file) + self.inputs['suffix'])
            #  Call the node side
            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            outdata.iterator = DataMap.SkipIterator
            jobs = []
            for outp in outdata:
                jobs.append(
                    ComputeJob(outp.host,
                               command,
                               arguments=[pdbfile, outp.file]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for job, outp in zip(jobs, outdata):
                # If the returncode is 123456, failing ssh
                if job.results['returncode'] == 123456:
                    self.logger.warning(
                        "ssh connection with {0} failed."
                        "Skipping further work on this task".format(outp.host))
                    self.logger.warning("Error code 123456.")
                    outp.skip = True
                elif job.results['returncode'] != 0:
                    outp.skip = True

        # *********************************************************************
        # 3. validate performance, cleanup of temp files, construct output
        finally:
            self.logger.debug("Removing template parmdb")
            shutil.rmtree(pdbdir, ignore_errors=True)

        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing parmdb map file: %s" %
                          self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Exemple #22
0
    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"),
            suffix=".%s" % (os.path.basename(__file__), ))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = communicate_returning_strings(
                parmdbm_process, input=(template % pdbfile).encode())
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError as err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1

        # *********************************************************************
        # 2. Call node side of recipe with template and possible targets
        #    If output location are provided as input these are validated.
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            args = self.inputs['args']
            self.logger.debug("Loading input-data mapfile: %s" % args[0])
            indata = DataMap.load(args[0])
            if len(args) > 1:
                # If output location provide validate the input and outputmap
                self.logger.debug("Loading output-data mapfile: %s" % args[1])
                outdata = DataMap.load(args[1])
                if not validate_data_maps(indata, outdata):
                    self.logger.error(
                        "Validation of input/output data mapfiles failed")
                    return 1
                # else output location is inputlocation+suffix
            else:
                outdata = copy.deepcopy(indata)
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        os.path.basename(item.file) + self.inputs['suffix'])
            #  Call the node side
            command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
            outdata.iterator = DataMap.SkipIterator
            jobs = []
            for outp in outdata:
                jobs.append(
                    ComputeJob(outp.host,
                               command,
                               arguments=[pdbfile, outp.file]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for job, outp in zip(jobs, outdata):
                # If the returncode is 123456, failing ssh
                if job.results['returncode'] == 123456:
                    self.logger.warning(
                        "ssh connection with {0} failed."
                        "Skipping further work on this task".format(outp.host))
                    self.logger.warning("Error code 123456.")
                    outp.skip = True
                elif job.results['returncode'] != 0:
                    outp.skip = True

        # *********************************************************************
        # 3. validate performance, cleanup of temp files, construct output
        finally:
            self.logger.debug("Removing template parmdb")
            shutil.rmtree(pdbdir, ignore_errors=True)

        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing parmdb map file: %s" %
                          self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.iteritems():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in parsetdict_copy.values():
                            for k, v in parsetdict_copy.iteritems():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in job.results.items():
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in jobresultdict.items():
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
Exemple #24
0
    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(imager_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                "the supplied input_ms mapfile and sourcedb mapfile"
                "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)
        for w, x, y in zip(input_map, output_map, sourcedb_map):
            w.skip = x.skip = y.skip = (w.skip or x.skip or y.skip)

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for idx, (measurement_item,
                  source_item) in enumerate(zip(input_map, sourcedb_map)):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host, measurement_path = measurement_item.host, measurement_item.file
            host2, sourcedb_path = source_item.host, source_item.file

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'],
                                       "imager_awimager_{0}".format(idx))

            # construct and save the output name
            arguments = [
                self.inputs['executable'],
                self.environment,
                self.inputs['parset'],
                working_dir,
                # put in unique dir, as node script wants to put private .par files next to it
                "%s_%s/image" % (self.inputs['output_image'], idx),
                measurement_path,
                sourcedb_path,
                self.inputs['mask_patch_size'],
                self.inputs['autogenerate_parameters'],
                self.inputs['specify_fov'],
                self.inputs['fov'],
            ]

            jobs.append(
                ComputeJob(host,
                           node_command,
                           arguments,
                           resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                "None of the starter awimager run finished correct")
            self.logger.error(
                "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.warn("Failed awimager node run detected. continue with"
                             "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0