Beispiel #1
0
    def test_align_data_maps_equal_maps_skip_set_5_multi_maps(self):
        # test for DataMap align
        data_map = MultiDataMap(self.new_style_multi_map)
        data_map2 = MultiDataMap(self.new_style_multi_map)
        data_map3 = MultiDataMap(self.new_style_multi_map)
        data_map4 = MultiDataMap(self.new_style_multi_map)
        data_map5 = MultiDataMap(self.new_style_multi_map)

        # change single skip value
        data_map.data[1].skip = True

        # Perform an align
        align_data_maps(data_map, data_map2, data_map3, data_map4, data_map5)

        self.assertTrue(
            data_map2.data[2].skip, "The skip field was not"
            " alligned correctly in the second entrie")
        self.assertTrue(
            data_map3.data[2].skip, "The skip field was not"
            " alligned correctly in the second entrie")
        self.assertTrue(
            data_map4.data[2].skip, "The skip field was not"
            " alligned correctly in the second entrie")
        self.assertTrue(
            data_map5.data[2].skip, "The skip field was not"
            " alligned correctly in the second entrie")
Beispiel #2
0
 def test_skip_iterator(self):
     data_map = MultiDataMap(self.new_style_map)
     data_map.iterator = MultiDataMap.SkipIterator
     unskipped = [item for item in data_map]
     self.assertEqual(len(unskipped), 2)
     self.assertTrue(all(isinstance(item, MultiDataProduct) for item in unskipped))
     self.assertEqual(unskipped[0].host, 'locus002')
     self.assertEqual(unskipped[0].file, ['L12345_SB102.MS'])
Beispiel #3
0
 def test_tuple_iterator(self):
     data_map = MultiDataMap(self.new_style_map)
     data_map.iterator = MultiDataMap.TupleIterator
     tuples = [item for item in data_map]
     self.assertEqual(len(tuples), 4)
     self.assertTrue(all(isinstance(item, tuple) for item in tuples))
     self.assertTrue(all(len(item) == 2 for item in tuples))
     self.assertEqual(tuples[0], ('locus001', ['L12345_SB101.MS']))
Beispiel #4
0
 def test_tuple_iterator(self):
     data_map = MultiDataMap(self.new_style_map)
     data_map.iterator = MultiDataMap.TupleIterator
     tuples = [item for item in data_map]
     self.assertEqual(len(tuples), 4)
     self.assertTrue(all(isinstance(item, tuple) for item in tuples))
     self.assertTrue(all(len(item) == 2 for item in tuples))
     self.assertEqual(tuples[0], ('locus001', ['L12345_SB101.MS']))
Beispiel #5
0
 def test_skip_iterator(self):
     data_map = MultiDataMap(self.new_style_map)
     data_map.iterator = MultiDataMap.SkipIterator
     unskipped = [item for item in data_map]
     self.assertEqual(len(unskipped), 2)
     self.assertTrue(
         all(isinstance(item, MultiDataProduct) for item in unskipped))
     self.assertEqual(unskipped[0].host, 'locus002')
     self.assertEqual(unskipped[0].file, ['L12345_SB102.MS'])
Beispiel #6
0
    def test_align_data_maps_different_length_maps(self):
        data_map = MultiDataMap(self.new_style_multi_map)
        data_map_other_length = MultiDataMap([{
            'host': 'locus001',
            'file': ['L12345_SB101.MS'],
            'file_skip': [True],
            'skip': True
        }])

        self.assertRaises(DataMapError, align_data_maps,
                          [data_map, data_map_other_length])
Beispiel #7
0
    def test_align_data_maps_equal_maps_no_change(self):
        data_map = MultiDataMap(self.new_style_multi_map)
        data_map2 = MultiDataMap(self.new_style_multi_map)
        data_map3 = MultiDataMap(self.new_style_multi_map)

        # Perform an align
        align_data_maps(data_map, data_map2)

        # There should be no changes and the three maps should be the same
        for entrie1, entrie2, entrie3 in zip(data_map, data_map2, data_map3):
            self.assertEqual(entrie1, entrie3)
            self.assertEqual(entrie2, entrie3)
Beispiel #8
0
    def test_align_data_maps_equal_maps_skip_set(self):
        data_map = MultiDataMap(self.new_style_multi_map)
        data_map2 = MultiDataMap(self.new_style_multi_map)

        # Change a single entrie in the datamap to True
        data_map.data[1].skip = True

        # Perform an align
        align_data_maps(data_map, data_map2)

        # The second entrie.skip should be set to True
        self.assertTrue(
            data_map2.data[2].skip, "The skip field was not"
            " alligned correctly in the second entrie")
Beispiel #9
0
 def test_new_style_map(self):
     data_map = MultiDataMap(self.new_style_map)
     self.assertEqual(len(data_map), 4)
     self.assertEqual(data_map[0].host, 'locus001')
     self.assertEqual(data_map[1].file, ['L12345_SB102.MS'])
     self.assertEqual(data_map[1].file_skip, [False])
     self.assertTrue(data_map[2].skip)
Beispiel #10
0
    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.info(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])
        source_list_map = DataMap.load(self.inputs['source_list_map_path'])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(
                 input_map, slice_paths_map, assoc_theta,
                 source_list_map)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                    slice_paths_map)
Beispiel #11
0
    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed 
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.info(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])
        source_list_map = DataMap.load(self.inputs['source_list_map_path'])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(
                 input_map, slice_paths_map, assoc_theta,
                 source_list_map)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                    slice_paths_map)
Beispiel #12
0
    def test_align_data_maps_not_enough_arguments(self):
        # If called with zere datamaps expect DataMapError
        self.assertRaises(DataMapError, align_data_maps)

        # if called with single datamap expect DataMapError
        data_map = MultiDataMap(self.new_style_multi_map)
        self.assertRaises(DataMapError, align_data_maps, [data_map])
Beispiel #13
0
    def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path,
             sky_list_path):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        self.logger.debug("Starting imager_bbs Node")
        # *********************************************************************
        # 1. Load mapfiles
        # read in the mapfiles to data maps: The master recipe added the single
        # path to a mapfilem which allows usage of default data methods 
        # (load_data_map)
        # TODO: Datamap
        ms_map = MultiDataMap.load(ms_list_path)
        parmdb_map = MultiDataMap.load(parmdb_list_path)
        sky_list = MultiDataMap.load(sky_list_path)
        source_db = sky_list[0].file[0] # the sourcedb is the first file entry

        try:
            bbs_process_group = SubProcessGroup(self.logger,
                                  self.resourceMonitor)
            # *****************************************************************
            # 2. start the bbs executable with data
            for (measurement_set, parmdm) in zip(ms_map[0].file,
                                                parmdb_map[0].file):
                command = [
                    bbs_executable,
                    "--sourcedb={0}".format(source_db),
                    "--parmdb={0}".format(parmdm) ,
                    measurement_set,
                    parset]
                self.logger.info("Executing bbs command: {0}".format(" ".join(
                            command)))

                bbs_process_group.run(command)

            # *****************************************************************
            # 3. check status of the processes
            if bbs_process_group.wait_for_finish() != None:
                self.logger.error(
                            "Failed bbs run detected Aborting")
                return 1    # If bbs failed we need to abort: the concat
                            # is now corrupt

        except OSError, exception:
            self.logger.error("Failed to execute bbs: {0}".format(str(
                                                                    exception)))
            return 1
Beispiel #14
0
 def test_old_style_map(self):
     data_map = MultiDataMap(self.old_style_map)
     self.assertEqual(len(data_map), 4)
     self.assertEqual(data_map[0].host, 'locus001')
     self.assertEqual(data_map[1].file, ['L12345_SB102.MS'])
     self.assertEqual(data_map[2].file_skip, [True])
     self.assertEqual(data_map[2].skip, True)
     self.assertTrue(all(item.skip for item in data_map))
Beispiel #15
0
    def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path,
            sky_list_path):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        self.logger.debug("Starting imager_bbs Node")
        # *********************************************************************
        # 1. Load mapfiles
        # read in the mapfiles to data maps: The master recipe added the single
        # path to a mapfilem which allows usage of default data methods
        # (load_data_map)
        # TODO: Datamap
        ms_map = MultiDataMap.load(ms_list_path)
        parmdb_map = MultiDataMap.load(parmdb_list_path)
        sky_list = MultiDataMap.load(sky_list_path)
        source_db = sky_list[0].file[0]  # the sourcedb is the first file entry

        try:
            bbs_process_group = SubProcessGroup(self.logger,
                                                self.resourceMonitor)
            # *****************************************************************
            # 2. start the bbs executable with data
            for (measurement_set, parmdm) in zip(ms_map[0].file,
                                                 parmdb_map[0].file):
                command = [
                    bbs_executable, "--sourcedb={0}".format(source_db),
                    "--parmdb={0}".format(parmdm), measurement_set, parset
                ]
                self.logger.info("Executing bbs command: {0}".format(
                    " ".join(command)))

                bbs_process_group.run(command)

            # *****************************************************************
            # 3. check status of the processes
            if bbs_process_group.wait_for_finish() != None:
                self.logger.error("Failed bbs run detected Aborting")
                return 1  # If bbs failed we need to abort: the concat
                # is now corrupt

        except OSError as exception:
            self.logger.error("Failed to execute bbs: {0}".format(
                str(exception)))
            return 1
        return 0
Beispiel #16
0
    def test_append_item_non_skip(self):
        data_map = MultiDataMap(self.new_style_map)
        data_map.append(("host", ["file"], False, [False]))
        data_map.append(("host", ["file"], False))

        data_map.iterator = DataMap.TupleIterator
        tuples = [item for item in data_map]
        self.assertEqual(len(tuples), 6)
        self.assertTrue(all(isinstance(item, tuple) for item in tuples))
        self.assertTrue(all(len(item) == 2 for item in tuples))
        self.assertEqual(tuples[-1], ('host', ['file']))
Beispiel #17
0
    def _bbs(self,
             timeslice_map_path,
             parmdbs_map_path,
             sourcedb_map_path,
             skip=False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(
            parset, "bbs", "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(
            None, "bbs_output", "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(
            None, "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                              "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset=parset_path,
                      instrument_mapfile=parmdbs_map_path,
                      sourcedb_mapfile=sourcedb_map_path,
                      mapfile=output_mapfile,
                      working_directory=self.scratch_directory)

        return output_mapfile
Beispiel #18
0
    def test_append_item_non_skip(self):
        data_map = MultiDataMap(self.new_style_map)
        data_map.append(("host", ["file"],  False, [False] ))
        data_map.append(("host", ["file"], False))

        data_map.iterator = DataMap.TupleIterator
        tuples = [item for item in data_map]
        self.assertEqual(len(tuples), 6)
        self.assertTrue(all(isinstance(item, tuple) for item in tuples))
        self.assertTrue(all(len(item) == 2 for item in tuples))
        self.assertEqual(tuples[-1], ('host', ['file']))
Beispiel #19
0
    def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path,
              skip = False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(parset, "bbs",
                        "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(None, "bbs_output",
                        "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(None,
                "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                                "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset = parset_path,
                      instrument_mapfile = parmdbs_map_path,
                      sourcedb_mapfile = sourcedb_map_path,
                      mapfile = output_mapfile,
                      working_directory = self.scratch_directory)

        return output_mapfile
Beispiel #20
0
    def test_append_item_skip(self):
        data_map = MultiDataMap(self.new_style_map)
        data_map.append(("host", ["file"], True, [True]))
        data_map.append(("host", ["file"], True))

        data_map.iterator = DataMap.SkipIterator
        dataProducts = [item for item in data_map]
        # default contains 2 nonskipped items
        self.assertEqual(len(dataProducts), 2)
        self.assertTrue(
            all(isinstance(item, MultiDataProduct) for item in dataProducts))
        # The map already contains 2 skipped items, the final item is tested
        # here
        self.assertEqual(dataProducts[-1].host, 'locus004')
        self.assertEqual(dataProducts[-1].file, ['L12345_SB104.MS'])
Beispiel #21
0
    def test_append_item_skip(self):
        data_map = MultiDataMap(self.new_style_map)
        data_map.append(("host",["file"], True, [True]))
        data_map.append(("host",["file"], True))

        data_map.iterator = DataMap.SkipIterator
        dataProducts = [item for item in data_map]
        # default contains 2 nonskipped items
        self.assertEqual(len(dataProducts), 2) 
        self.assertTrue(all(isinstance(item, MultiDataProduct) 
                        for item in dataProducts))
        # The map already contains 2 skipped items, the final item is tested 
        # here
        self.assertEqual(dataProducts[-1].host, 'locus004')
        self.assertEqual(dataProducts[-1].file, ['L12345_SB104.MS'])
Beispiel #22
0
def plugin_main(args, **kwargs):
    #print 'PLUGIN KWARG: ', kwargs
    result = {}
    datamap = None
    fileid = kwargs['mapfile_in']
    fileid2 = kwargs['mapfile_ref']
    datamap = MultiDataMap.load(fileid)
    datamap2 = DataMap.load(fileid2)
    newmap = []
    for item in datamap2:
        entry = {}
        entry['host'] = item.host
        entry['file'] = datamap.data[0].file
        entry['skip'] = item.skip
        newmap.append(entry)

    outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename'])
    outmap = open(outfileid, 'w')
    outmap.write(repr(newmap))
    outmap.close()

    result['mapfile'] = outfileid
    return result
Beispiel #23
0
    def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path,
            sky_list_path, concat_ms_path, major_cycle):
        """
        selfcal_bbs functionality. Called by framework performing all the work
        """
        self.logger.debug("Starting selfcal_bbs Node")
        # *********************************************************************
        # 1. Load mapfiles
        # read in the mapfiles to data maps: The master recipe added the single
        # path to a mapfilem which allows usage of default data methods
        # (load_data_map)
        # TODO: Datamap
        ms_map = MultiDataMap.load(ms_list_path)
        parmdb_map = MultiDataMap.load(parmdb_list_path)
        sky_list = MultiDataMap.load(sky_list_path)
        source_db = sky_list[0].file[0]  # the sourcedb is the first file entry

        try:
            bbs_process_group = SubProcessGroup(self.logger,
                                                self.resourceMonitor)
            # *****************************************************************
            # 2. start the bbs executable with data
            # The data is located in multimaps. We need the first entry
            # TODO: THis is not 'nice' usage of the multimap
            for (measurement_set, parmdm) in zip(ms_map[0].file,
                                                 parmdb_map[0].file):
                command = [
                    bbs_executable, "--sourcedb={0}".format(source_db),
                    "--parmdb={0}".format(parmdm), measurement_set, parset
                ]
                self.logger.info("Executing bbs command: {0}".format(
                    " ".join(command)))
                bbs_process_group.run(command)

            # *****************************************************************
            # 3. check status of the processes
            if bbs_process_group.wait_for_finish() != None:
                self.logger.error("Failed bbs run detected Aborting")
                return 1

        except OSError as exception:
            self.logger.error("Failed to execute bbs: {0}".format(
                str(exception)))
            return 1

        # *********************************************************************
        # 4. Concat in time after bbs calibration your MSs using
        #    msconcat (pyrap.tables module) (added by N.Vilchez)
        # this step has te be performed on this location. because the bbs run
        # might add additional columns not present in the original ms
        # and therefore not produced in the concat done in the prepare phase
        # redmine issue #6021
        pt.msconcat(ms_map[0].file, concat_ms_path, concatTime=True)

        # *********************************************************************
        # 5. copy time slives directory to a new one
        # This is done for debugging purpose: The copy is not used for anything
        # The actual selfcal steps are done in place
        #  (added by N.Vilchez)
        # THe save location is created relative to the concat.ms
        # we could also use the self.scratch_directory from the toplevel recipe
        # this would need an aditional ingredient
        # This is a 'debugging' step and should never ever cause a failure of \
        # the pipeline
        try:
            working_dir = os.path.dirname(concat_ms_path)
            time_slice_dir = os.path.join(working_dir, 'time_slices')
            time_slice_copy_dir = os.path.join(
                working_dir, 'time_slices_cycle_{0}'.format(major_cycle))

            cmd = "cp -r {0} {1}".format(time_slice_dir, time_slice_copy_dir)
            os.system(cmd)
        except:
            self.logger.warn(
                "Debug copy of temporary files failed: continue operations")
            pass  # Do nothing

        return 0
    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)
        for idx_sb_group, item in enumerate(output_map):
            #create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            job_directory = self.config.get("layout", "job_directory")
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}".format(idx_sb_group))
            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            #save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            arguments = [
                self.environment, self.inputs['parset'],
                self.inputs['working_directory'],
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['add_beam_tables']
            ]

            jobs.append(ComputeJob(item.host, node_command, arguments))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        #scan the return dict for completed key
        for (item, job) in zip(concat_ms, jobs):
            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, ["/Failed"], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        #map with actual input mss.
        self._store_data_map(self.inputs["raw_ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (raw) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['raw_ms_per_image_mapfile'] = \
            self.inputs["raw_ms_per_image_mapfile"]
        return 0
Beispiel #25
0
    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(imager_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data

        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])

        # TODO: DataMap extention
#        #Check if the input has equal length and on the same nodes
#        if not validate_data_maps(ms_map, parmdb_map):
#            self.logger.error("The combination of mapfiles failed validation:")
#            self.logger.error("ms_map: \n{0}".format(ms_map))
#            self.logger.error("parmdb_map: \n{0}".format(parmdb_map))
#            return 1

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python %s" % (self.__file__.replace("master", "nodes"))
        map_dir = os.path.join(
                        self.config.get("layout", "job_directory"), "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y in zip(ms_map, parmdb_map, sourcedb_map):
            w.skip = x.skip = y.skip = (
                w.skip or x.skip or y.skip
            )

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            DataMap.SkipIterator
        for (ms, parmdb, sourcedb) in zip(ms_map, parmdb_map, sourcedb_map):
            #host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(
                    map_dir, host + "_ms_" + run_id + ".map")
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                    map_dir, host + "_parmdb_" + run_id + ".map")
            MultiDataMap(
                [tuple([host, parmdb.file, False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(
                    map_dir, host + "_sky_" + run_id + ".map")
            MultiDataMap(
                [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path)

            arguments = [self.inputs['bbs_executable'],
                         self.inputs['parset'],
                         ms_list_path, parmdb_list_path, sourcedb_list_path]
            jobs.append(ComputeJob(host, node_command, arguments))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():   #if one of the nodes failed
            self.logger.error("One of the nodes failed while performing"
                              "a BBS run. Aborting: concat.ms corruption")
            return 1

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Beispiel #26
0
 def test_new_style_load_store(self):
     tmp_file = self.new_style_map_file + '.tmp'
     data_map = MultiDataMap(self.new_style_map)
     data_map.save(tmp_file)
     reloaded_data_map = MultiDataMap.load(tmp_file)
     self.assertEqual(data_map, reloaded_data_map)
    def _collect_and_assign_outputs(self, jobs, output_map, slice_paths_map):
        """
        Collect and combine the outputs of the individual create_dbs node
        recipes. Combine into output mapfiles and save these at the supplied
        path locations       
        """
        # Create a container for the output parmdbs: same host and
        output_map.iterator = DataMap.TupleIterator
        parmdbs_list = []
        # loop over the raw data including the skip file (use the data member)
        for output_entry in output_map.data:
            parms_tuple = tuple([output_entry.host, [], output_entry.skip])
            parmdbs_list.append(parms_tuple)

        parmdbs_map = MultiDataMap(parmdbs_list)

        output_map.iterator = parmdbs_map.iterator = DataMap.SkipIterator  # The maps are synced
        succesfull_run = False
        for (output_item, parmdbs_item, job) in zip(output_map, parmdbs_map,
                                                    jobs):
            node_succeeded = job.results.has_key("parmdbs") and \
                    job.results.has_key("sourcedb")

            host = output_item.host

            # The current job has to be skipped (due to skip field)
            # Or if the node failed:
            if not node_succeeded:
                self.logger.warn(
                    "Warning failed ImagerCreateDBs run "
                    "detected: No sourcedb file created, {0} continue".format(
                        host))
                output_item.file = "failed"
                output_item.skip = True
                parmdbs_item.file = ["failed"]
                parmdbs_item.skip = True

            # Else it succeeded and we can write te results
            else:
                succesfull_run = True
                output_item.file = job.results["sourcedb"]
                parmdbs_item.file = job.results["parmdbs"]

                # we also need to manually set the skip for this new
                # file list
                parmdbs_item.file_skip = [False] * len(job.results["parmdbs"])

        # Fail if none of the nodes returned all data
        if not succesfull_run:
            self.logger.error("The creation of dbs on the nodes failed:")
            self.logger.error("Not a single node produces all needed data")
            self.logger.error(
                "products. sourcedb_files: {0}".format(output_map))
            self.logger.error("parameter dbs: {0}".format(parmdbs_map))
            return 1

        # write the mapfiles
        output_map.save(self.inputs["sourcedb_map_path"])
        parmdbs_map.save(self.inputs["parmdbs_map_path"])
        self.logger.debug("Wrote sourcedb dataproducts: {0} \n {1}".format(
            self.inputs["sourcedb_map_path"], self.inputs["parmdbs_map_path"]))

        # Set the outputs
        self.outputs['sourcedb_map_path'] = self.inputs["sourcedb_map_path"]
        self.outputs['parmdbs_map_path'] = self.inputs["parmdbs_map_path"]

        return 0
Beispiel #28
0
    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        job_directory = self.config.get("layout", "job_directory")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)  # needed for subsets in sb list

        globalfs = self.config.has_option(
            "remote", "globalfs") and self.config.getboolean(
                "remote", "globalfs")

        for idx_sb_group, item in enumerate(output_map):
            #create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}.map".format(idx_sb_group))

            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            # skip the current step if skip is set, cannot use skip due to
            # the enumerate: dependency on the index in the map
            if item.skip == True:
                # assure that the mapfile is correct
                paths_to_image_mapfiles.append(tuple([item.host, [], True]))
                continue

            #save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(
                self.inputs['working_directory'],
                "imager_prepare_{0}".format(idx_sb_group))

            arguments = [
                self.environment, self.inputs['parset'], working_dir,
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['do_rficonsole'], self.inputs['add_beam_tables'],
                globalfs
            ]

            jobs.append(
                ComputeJob(item.host,
                           node_command,
                           arguments,
                           resources={"cores": self.inputs['nthreads']}))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        #scan the return dict for completed key
        # loop over the potential jobs including the skipped
        # If we have a skipped item, add the item to the slices with skip set
        jobs_idx = 0
        for item in concat_ms:
            # If this is an item that is skipped via the skip parameter in
            # the parset, append a skipped
            if item.skip:
                slices.append(tuple([item.host, [], True]))
                continue

            # we cannot use the skip iterator so we need to manually get the
            # current job from the list
            job = jobs[jobs_idx]

            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, [], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

            # we have a non skipped workitem, increase the job idx
            jobs_idx += 1

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        #map with actual input mss.
        self._store_data_map(self.inputs["ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (used) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['ms_per_image_mapfile'] = \
            self.inputs["ms_per_image_mapfile"]
        return 0
Beispiel #29
0
    def test_append_item_invalid(self):
        data_map = MultiDataMap(self.new_style_map)

        self.assertRaises(DataMapError, data_map.append,
                          ("host", True, "file", [False], "bwaaa"))
    def _collect_and_assign_outputs(self, jobs, output_map, slice_paths_map):
        """
        Collect and combine the outputs of the individual create_dbs node
        recipes. Combine into output mapfiles and save these at the supplied
        path locations       
        """
        # Create a container for the output parmdbs: same host and 
        output_map.iterator = DataMap.TupleIterator
        parmdbs_list = []
        # loop over the raw data including the skip file (use the data member)
        for output_entry in output_map.data:
            parms_tuple = tuple([output_entry.host, [],
                                output_entry.skip])
            parmdbs_list.append(parms_tuple)

        parmdbs_map = MultiDataMap(parmdbs_list)

        output_map.iterator = parmdbs_map.iterator = DataMap.SkipIterator # The maps are synced
        succesfull_run = False
        for (output_item, parmdbs_item, job) in zip(
                                                output_map, parmdbs_map, jobs):
            node_succeeded = job.results.has_key("parmdbs") and \
                    job.results.has_key("sourcedb")

            host = output_item.host

            # The current job has to be skipped (due to skip field)
            # Or if the node failed:
            if not node_succeeded:
                self.logger.warn("Warning failed ImagerCreateDBs run "
                    "detected: No sourcedb file created, {0} continue".format(
                                                            host))
                output_item.file = "failed"
                output_item.skip = True
                parmdbs_item.file = ["failed"]
                parmdbs_item.skip = True

            # Else it succeeded and we can write te results
            else:
                succesfull_run = True
                output_item.file = job.results["sourcedb"]
                parmdbs_item.file = job.results["parmdbs"]

                # we also need to manually set the skip for this new 
                # file list
                parmdbs_item.file_skip = [False] * len(job.results["parmdbs"])

        # Fail if none of the nodes returned all data
        if not succesfull_run:
            self.logger.error("The creation of dbs on the nodes failed:")
            self.logger.error("Not a single node produces all needed data")
            self.logger.error(
                "products. sourcedb_files: {0}".format(output_map))
            self.logger.error("parameter dbs: {0}".format(parmdbs_map))
            return 1

        # write the mapfiles     
        output_map.save(self.inputs["sourcedb_map_path"])
        parmdbs_map.save(self.inputs["parmdbs_map_path"])
        self.logger.debug("Wrote sourcedb dataproducts: {0} \n {1}".format(
            self.inputs["sourcedb_map_path"], self.inputs["parmdbs_map_path"]))

        # Set the outputs
        self.outputs['sourcedb_map_path'] = self.inputs["sourcedb_map_path"]
        self.outputs['parmdbs_map_path'] = self.inputs["parmdbs_map_path"]

        return 0
Beispiel #31
0
 def test_compare_DataMap_and_MultiDataMap(self):
     data_map = DataMap([])
     multi_data_map = MultiDataMap([])
     # Empty maps should be unequal also
     self.assertNotEqual(data_map, multi_data_map)
Beispiel #32
0
    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(selfcal_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data
        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])
        concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path'])

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        map_dir = os.path.join(self.config.get("layout", "job_directory"),
                               "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map)

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            concat_ms_map.iterator = DataMap.SkipIterator

        # *********************************************************************
        for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map,
                                                     sourcedb_map,
                                                     concat_ms_map):
            #host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(map_dir,
                                        host + "_ms_" + run_id + ".map")
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                map_dir, host + "_parmdb_" + run_id + ".map")
            MultiDataMap([tuple([host, parmdb.file,
                                 False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(map_dir,
                                              host + "_sky_" + run_id + ".map")
            MultiDataMap([tuple([host, [sourcedb.file],
                                 False])]).save(sourcedb_list_path)

            # THe concat ms does not have to be written: It already is a
            # singular item (it is the output of the reduce step)
            # redmine issue #6021
            arguments = [
                self.inputs['bbs_executable'], self.inputs['parset'],
                ms_list_path, parmdb_list_path, sourcedb_list_path,
                concat_ms.file, self.inputs['major_cycle']
            ]
            jobs.append(ComputeJob(host, node_command, arguments))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed bbs node run detected, skipping work"
                             "on this work item for further computations")

        # find failed job and set the skip field
        for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs):
            if job.results["returncode"] == 0:
                continue
            else:
                ms_item.skip = True
                concat_item.skip = True
                self.logger.warn("bbs failed on item: {0}".format(
                    ms_item.file))

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        # also save the concat_ms map with possible skips
        DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Beispiel #33
0
    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(imager_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data

        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])

        # TODO: DataMap extention
#        #Check if the input has equal length and on the same nodes
#        if not validate_data_maps(ms_map, parmdb_map):
#            self.logger.error("The combination of mapfiles failed validation:")
#            self.logger.error("ms_map: \n{0}".format(ms_map))
#            self.logger.error("parmdb_map: \n{0}".format(parmdb_map))
#            return 1

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        map_dir = os.path.join(
                        self.config.get("layout", "job_directory"), "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y in zip(ms_map, parmdb_map, sourcedb_map):
            w.skip = x.skip = y.skip = (
                w.skip or x.skip or y.skip
            )

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            DataMap.SkipIterator
        for (idx, (ms, parmdb, sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)):
            # host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(
                    map_dir, "%s-%s_map_%s.map" % (host, idx, run_id))
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                    map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id))
            MultiDataMap(
                [tuple([host, parmdb.file, False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(
                    map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id))
            MultiDataMap(
                [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path)

            arguments = [self.inputs['bbs_executable'],
                         self.inputs['parset'],
                         ms_list_path, parmdb_list_path, sourcedb_list_path]
            jobs.append(ComputeJob(host, node_command, arguments,
                    resources = {
                        "cores": self.inputs['nthreads']
                    }))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():    # if one of the nodes failed
            self.logger.error("One of the nodes failed while performing"
                              "a BBS run. Aborting: concat.ms corruption")
            return 1

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Beispiel #34
0
 def test_new_style_load_store(self):
     tmp_file = self.new_style_map_file + '.tmp'
     data_map = MultiDataMap(self.new_style_map)
     data_map.save(tmp_file)
     reloaded_data_map = MultiDataMap.load(tmp_file)
     self.assertEqual(data_map, reloaded_data_map)