def test_align_data_maps_equal_maps_skip_set_5_multi_maps(self): # test for DataMap align data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) data_map3 = MultiDataMap(self.new_style_multi_map) data_map4 = MultiDataMap(self.new_style_multi_map) data_map5 = MultiDataMap(self.new_style_multi_map) # change single skip value data_map.data[1].skip = True # Perform an align align_data_maps(data_map, data_map2, data_map3, data_map4, data_map5) self.assertTrue( data_map2.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue( data_map3.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue( data_map4.data[2].skip, "The skip field was not" " alligned correctly in the second entrie") self.assertTrue( data_map5.data[2].skip, "The skip field was not" " alligned correctly in the second entrie")
def test_skip_iterator(self): data_map = MultiDataMap(self.new_style_map) data_map.iterator = MultiDataMap.SkipIterator unskipped = [item for item in data_map] self.assertEqual(len(unskipped), 2) self.assertTrue(all(isinstance(item, MultiDataProduct) for item in unskipped)) self.assertEqual(unskipped[0].host, 'locus002') self.assertEqual(unskipped[0].file, ['L12345_SB102.MS'])
def test_tuple_iterator(self): data_map = MultiDataMap(self.new_style_map) data_map.iterator = MultiDataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 4) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[0], ('locus001', ['L12345_SB101.MS']))
def test_skip_iterator(self): data_map = MultiDataMap(self.new_style_map) data_map.iterator = MultiDataMap.SkipIterator unskipped = [item for item in data_map] self.assertEqual(len(unskipped), 2) self.assertTrue( all(isinstance(item, MultiDataProduct) for item in unskipped)) self.assertEqual(unskipped[0].host, 'locus002') self.assertEqual(unskipped[0].file, ['L12345_SB102.MS'])
def test_align_data_maps_different_length_maps(self): data_map = MultiDataMap(self.new_style_multi_map) data_map_other_length = MultiDataMap([{ 'host': 'locus001', 'file': ['L12345_SB101.MS'], 'file_skip': [True], 'skip': True }]) self.assertRaises(DataMapError, align_data_maps, [data_map, data_map_other_length])
def test_align_data_maps_equal_maps_no_change(self): data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) data_map3 = MultiDataMap(self.new_style_multi_map) # Perform an align align_data_maps(data_map, data_map2) # There should be no changes and the three maps should be the same for entrie1, entrie2, entrie3 in zip(data_map, data_map2, data_map3): self.assertEqual(entrie1, entrie3) self.assertEqual(entrie2, entrie3)
def test_align_data_maps_equal_maps_skip_set(self): data_map = MultiDataMap(self.new_style_multi_map) data_map2 = MultiDataMap(self.new_style_multi_map) # Change a single entrie in the datamap to True data_map.data[1].skip = True # Perform an align align_data_maps(data_map, data_map2) # The second entrie.skip should be set to True self.assertTrue( data_map2.data[2].skip, "The skip field was not" " alligned correctly in the second entrie")
def test_new_style_map(self): data_map = MultiDataMap(self.new_style_map) self.assertEqual(len(data_map), 4) self.assertEqual(data_map[0].host, 'locus001') self.assertEqual(data_map[1].file, ['L12345_SB102.MS']) self.assertEqual(data_map[1].file_skip, [False]) self.assertTrue(data_map[2].skip)
def go(self): super(imager_create_dbs, self).go() # get assoc_theta, convert from empty string if needed assoc_theta = self.inputs["assoc_theta"] if assoc_theta == "": assoc_theta = None # Load mapfile data from files self.logger.info(self.inputs["slice_paths_mapfile"]) slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"]) input_map = DataMap.load(self.inputs['args'][0]) source_list_map = DataMap.load(self.inputs['source_list_map_path']) if self._validate_input_data(input_map, slice_paths_map): return 1 # Run the nodes with now collected inputs jobs, output_map = self._run_create_dbs_node( input_map, slice_paths_map, assoc_theta, source_list_map) # Collect the output of the node scripts write to (map) files return self._collect_and_assign_outputs(jobs, output_map, slice_paths_map)
def test_align_data_maps_not_enough_arguments(self): # If called with zere datamaps expect DataMapError self.assertRaises(DataMapError, align_data_maps) # if called with single datamap expect DataMapError data_map = MultiDataMap(self.new_style_multi_map) self.assertRaises(DataMapError, align_data_maps, [data_map])
def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path, sky_list_path): """ imager_bbs functionality. Called by framework performing all the work """ self.logger.debug("Starting imager_bbs Node") # ********************************************************************* # 1. Load mapfiles # read in the mapfiles to data maps: The master recipe added the single # path to a mapfilem which allows usage of default data methods # (load_data_map) # TODO: Datamap ms_map = MultiDataMap.load(ms_list_path) parmdb_map = MultiDataMap.load(parmdb_list_path) sky_list = MultiDataMap.load(sky_list_path) source_db = sky_list[0].file[0] # the sourcedb is the first file entry try: bbs_process_group = SubProcessGroup(self.logger, self.resourceMonitor) # ***************************************************************** # 2. start the bbs executable with data for (measurement_set, parmdm) in zip(ms_map[0].file, parmdb_map[0].file): command = [ bbs_executable, "--sourcedb={0}".format(source_db), "--parmdb={0}".format(parmdm) , measurement_set, parset] self.logger.info("Executing bbs command: {0}".format(" ".join( command))) bbs_process_group.run(command) # ***************************************************************** # 3. check status of the processes if bbs_process_group.wait_for_finish() != None: self.logger.error( "Failed bbs run detected Aborting") return 1 # If bbs failed we need to abort: the concat # is now corrupt except OSError, exception: self.logger.error("Failed to execute bbs: {0}".format(str( exception))) return 1
def test_old_style_map(self): data_map = MultiDataMap(self.old_style_map) self.assertEqual(len(data_map), 4) self.assertEqual(data_map[0].host, 'locus001') self.assertEqual(data_map[1].file, ['L12345_SB102.MS']) self.assertEqual(data_map[2].file_skip, [True]) self.assertEqual(data_map[2].skip, True) self.assertTrue(all(item.skip for item in data_map))
def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path, sky_list_path): """ imager_bbs functionality. Called by framework performing all the work """ self.logger.debug("Starting imager_bbs Node") # ********************************************************************* # 1. Load mapfiles # read in the mapfiles to data maps: The master recipe added the single # path to a mapfilem which allows usage of default data methods # (load_data_map) # TODO: Datamap ms_map = MultiDataMap.load(ms_list_path) parmdb_map = MultiDataMap.load(parmdb_list_path) sky_list = MultiDataMap.load(sky_list_path) source_db = sky_list[0].file[0] # the sourcedb is the first file entry try: bbs_process_group = SubProcessGroup(self.logger, self.resourceMonitor) # ***************************************************************** # 2. start the bbs executable with data for (measurement_set, parmdm) in zip(ms_map[0].file, parmdb_map[0].file): command = [ bbs_executable, "--sourcedb={0}".format(source_db), "--parmdb={0}".format(parmdm), measurement_set, parset ] self.logger.info("Executing bbs command: {0}".format( " ".join(command))) bbs_process_group.run(command) # ***************************************************************** # 3. check status of the processes if bbs_process_group.wait_for_finish() != None: self.logger.error("Failed bbs run detected Aborting") return 1 # If bbs failed we need to abort: the concat # is now corrupt except OSError as exception: self.logger.error("Failed to execute bbs: {0}".format( str(exception))) return 1 return 0
def test_append_item_non_skip(self): data_map = MultiDataMap(self.new_style_map) data_map.append(("host", ["file"], False, [False])) data_map.append(("host", ["file"], False)) data_map.iterator = DataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 6) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[-1], ('host', ['file']))
def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip=False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file( parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file( None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file( None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset=parset_path, instrument_mapfile=parmdbs_map_path, sourcedb_mapfile=sourcedb_map_path, mapfile=output_mapfile, working_directory=self.scratch_directory) return output_mapfile
def test_append_item_non_skip(self): data_map = MultiDataMap(self.new_style_map) data_map.append(("host", ["file"], False, [False] )) data_map.append(("host", ["file"], False)) data_map.iterator = DataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 6) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[-1], ('host', ['file']))
def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip = False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file(parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file(None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file(None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset = parset_path, instrument_mapfile = parmdbs_map_path, sourcedb_mapfile = sourcedb_map_path, mapfile = output_mapfile, working_directory = self.scratch_directory) return output_mapfile
def test_append_item_skip(self): data_map = MultiDataMap(self.new_style_map) data_map.append(("host", ["file"], True, [True])) data_map.append(("host", ["file"], True)) data_map.iterator = DataMap.SkipIterator dataProducts = [item for item in data_map] # default contains 2 nonskipped items self.assertEqual(len(dataProducts), 2) self.assertTrue( all(isinstance(item, MultiDataProduct) for item in dataProducts)) # The map already contains 2 skipped items, the final item is tested # here self.assertEqual(dataProducts[-1].host, 'locus004') self.assertEqual(dataProducts[-1].file, ['L12345_SB104.MS'])
def test_append_item_skip(self): data_map = MultiDataMap(self.new_style_map) data_map.append(("host",["file"], True, [True])) data_map.append(("host",["file"], True)) data_map.iterator = DataMap.SkipIterator dataProducts = [item for item in data_map] # default contains 2 nonskipped items self.assertEqual(len(dataProducts), 2) self.assertTrue(all(isinstance(item, MultiDataProduct) for item in dataProducts)) # The map already contains 2 skipped items, the final item is tested # here self.assertEqual(dataProducts[-1].host, 'locus004') self.assertEqual(dataProducts[-1].file, ['L12345_SB104.MS'])
def plugin_main(args, **kwargs): #print 'PLUGIN KWARG: ', kwargs result = {} datamap = None fileid = kwargs['mapfile_in'] fileid2 = kwargs['mapfile_ref'] datamap = MultiDataMap.load(fileid) datamap2 = DataMap.load(fileid2) newmap = [] for item in datamap2: entry = {} entry['host'] = item.host entry['file'] = datamap.data[0].file entry['skip'] = item.skip newmap.append(entry) outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename']) outmap = open(outfileid, 'w') outmap.write(repr(newmap)) outmap.close() result['mapfile'] = outfileid return result
def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path, sky_list_path, concat_ms_path, major_cycle): """ selfcal_bbs functionality. Called by framework performing all the work """ self.logger.debug("Starting selfcal_bbs Node") # ********************************************************************* # 1. Load mapfiles # read in the mapfiles to data maps: The master recipe added the single # path to a mapfilem which allows usage of default data methods # (load_data_map) # TODO: Datamap ms_map = MultiDataMap.load(ms_list_path) parmdb_map = MultiDataMap.load(parmdb_list_path) sky_list = MultiDataMap.load(sky_list_path) source_db = sky_list[0].file[0] # the sourcedb is the first file entry try: bbs_process_group = SubProcessGroup(self.logger, self.resourceMonitor) # ***************************************************************** # 2. start the bbs executable with data # The data is located in multimaps. We need the first entry # TODO: THis is not 'nice' usage of the multimap for (measurement_set, parmdm) in zip(ms_map[0].file, parmdb_map[0].file): command = [ bbs_executable, "--sourcedb={0}".format(source_db), "--parmdb={0}".format(parmdm), measurement_set, parset ] self.logger.info("Executing bbs command: {0}".format( " ".join(command))) bbs_process_group.run(command) # ***************************************************************** # 3. check status of the processes if bbs_process_group.wait_for_finish() != None: self.logger.error("Failed bbs run detected Aborting") return 1 except OSError as exception: self.logger.error("Failed to execute bbs: {0}".format( str(exception))) return 1 # ********************************************************************* # 4. Concat in time after bbs calibration your MSs using # msconcat (pyrap.tables module) (added by N.Vilchez) # this step has te be performed on this location. because the bbs run # might add additional columns not present in the original ms # and therefore not produced in the concat done in the prepare phase # redmine issue #6021 pt.msconcat(ms_map[0].file, concat_ms_path, concatTime=True) # ********************************************************************* # 5. copy time slives directory to a new one # This is done for debugging purpose: The copy is not used for anything # The actual selfcal steps are done in place # (added by N.Vilchez) # THe save location is created relative to the concat.ms # we could also use the self.scratch_directory from the toplevel recipe # this would need an aditional ingredient # This is a 'debugging' step and should never ever cause a failure of \ # the pipeline try: working_dir = os.path.dirname(concat_ms_path) time_slice_dir = os.path.join(working_dir, 'time_slices') time_slice_copy_dir = os.path.join( working_dir, 'time_slices_cycle_{0}'.format(major_cycle)) cmd = "cp -r {0} {1}".format(time_slice_dir, time_slice_copy_dir) os.system(cmd) except: self.logger.warn( "Debug copy of temporary files failed: continue operations") pass # Do nothing return 0
def go(self): """ Entry point for recipe: Called by the pipeline framework """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 # outputs output_ms_mapfile_path = self.inputs['mapfile'] # ********************************************************************* # schedule the actual work # TODO: Refactor this function into: load data, perform work, # create output node_command = " python %s" % (self.__file__.replace( "master", "nodes")) jobs = [] paths_to_image_mapfiles = [] n_subband_groups = len(output_map) for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_map) # Save the mapfile job_directory = self.config.get("layout", "job_directory") inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}".format(idx_sb_group)) self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") #save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) arguments = [ self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, self.inputs['asciistat_executable'], self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], self.inputs['add_beam_tables'] ] jobs.append(ComputeJob(item.host, node_command, arguments)) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) concat_ms = copy.deepcopy(output_map) slices = [] finished_runs = 0 #scan the return dict for completed key for (item, job) in zip(concat_ms, jobs): # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 slices.append( tuple([item.host, job.results["time_slices"], False])) else: # Set the dataproduct to skipped!! item.skip = True slices.append(tuple([item.host, ["/Failed"], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) if finished_runs == 0: self.logger.error( "None of the started compute node finished:" "The current recipe produced no output, aborting") return 1 # Write the output mapfiles: # concat.ms paths: self._store_data_map(output_ms_mapfile_path, concat_ms, "mapfile with concat.ms") # timeslices MultiDataMap(slices).save(self.inputs['slices_mapfile']) self.logger.info( "Wrote MultiMapfile with produces timeslice: {0}".format( self.inputs['slices_mapfile'])) #map with actual input mss. self._store_data_map(self.inputs["raw_ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), "mapfile containing (raw) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['raw_ms_per_image_mapfile'] = \ self.inputs["raw_ms_per_image_mapfile"] return 0
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(imager_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) # TODO: DataMap extention # #Check if the input has equal length and on the same nodes # if not validate_data_maps(ms_map, parmdb_map): # self.logger.error("The combination of mapfiles failed validation:") # self.logger.error("ms_map: \n{0}".format(ms_map)) # self.logger.error("parmdb_map: \n{0}".format(parmdb_map)) # return 1 # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python %s" % (self.__file__.replace("master", "nodes")) map_dir = os.path.join( self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y in zip(ms_map, parmdb_map, sourcedb_map): w.skip = x.skip = y.skip = ( w.skip or x.skip or y.skip ) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator for (ms, parmdb, sourcedb) in zip(ms_map, parmdb_map, sourcedb_map): #host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join( map_dir, host + "_ms_" + run_id + ".map") MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, host + "_parmdb_" + run_id + ".map") MultiDataMap( [tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join( map_dir, host + "_sky_" + run_id + ".map") MultiDataMap( [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) arguments = [self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path] jobs.append(ComputeJob(host, node_command, arguments)) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): #if one of the nodes failed self.logger.error("One of the nodes failed while performing" "a BBS run. Aborting: concat.ms corruption") return 1 # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def test_new_style_load_store(self): tmp_file = self.new_style_map_file + '.tmp' data_map = MultiDataMap(self.new_style_map) data_map.save(tmp_file) reloaded_data_map = MultiDataMap.load(tmp_file) self.assertEqual(data_map, reloaded_data_map)
def _collect_and_assign_outputs(self, jobs, output_map, slice_paths_map): """ Collect and combine the outputs of the individual create_dbs node recipes. Combine into output mapfiles and save these at the supplied path locations """ # Create a container for the output parmdbs: same host and output_map.iterator = DataMap.TupleIterator parmdbs_list = [] # loop over the raw data including the skip file (use the data member) for output_entry in output_map.data: parms_tuple = tuple([output_entry.host, [], output_entry.skip]) parmdbs_list.append(parms_tuple) parmdbs_map = MultiDataMap(parmdbs_list) output_map.iterator = parmdbs_map.iterator = DataMap.SkipIterator # The maps are synced succesfull_run = False for (output_item, parmdbs_item, job) in zip(output_map, parmdbs_map, jobs): node_succeeded = job.results.has_key("parmdbs") and \ job.results.has_key("sourcedb") host = output_item.host # The current job has to be skipped (due to skip field) # Or if the node failed: if not node_succeeded: self.logger.warn( "Warning failed ImagerCreateDBs run " "detected: No sourcedb file created, {0} continue".format( host)) output_item.file = "failed" output_item.skip = True parmdbs_item.file = ["failed"] parmdbs_item.skip = True # Else it succeeded and we can write te results else: succesfull_run = True output_item.file = job.results["sourcedb"] parmdbs_item.file = job.results["parmdbs"] # we also need to manually set the skip for this new # file list parmdbs_item.file_skip = [False] * len(job.results["parmdbs"]) # Fail if none of the nodes returned all data if not succesfull_run: self.logger.error("The creation of dbs on the nodes failed:") self.logger.error("Not a single node produces all needed data") self.logger.error( "products. sourcedb_files: {0}".format(output_map)) self.logger.error("parameter dbs: {0}".format(parmdbs_map)) return 1 # write the mapfiles output_map.save(self.inputs["sourcedb_map_path"]) parmdbs_map.save(self.inputs["parmdbs_map_path"]) self.logger.debug("Wrote sourcedb dataproducts: {0} \n {1}".format( self.inputs["sourcedb_map_path"], self.inputs["parmdbs_map_path"])) # Set the outputs self.outputs['sourcedb_map_path'] = self.inputs["sourcedb_map_path"] self.outputs['parmdbs_map_path'] = self.inputs["parmdbs_map_path"] return 0
def go(self): """ Entry point for recipe: Called by the pipeline framework """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") job_directory = self.config.get("layout", "job_directory") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 # outputs output_ms_mapfile_path = self.inputs['mapfile'] # ********************************************************************* # schedule the actual work # TODO: Refactor this function into: load data, perform work, # create output node_command = " python %s" % (self.__file__.replace( "master", "nodes")) jobs = [] paths_to_image_mapfiles = [] n_subband_groups = len(output_map) # needed for subsets in sb list globalfs = self.config.has_option( "remote", "globalfs") and self.config.getboolean( "remote", "globalfs") for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_map) # Save the mapfile inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}.map".format(idx_sb_group)) self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") # skip the current step if skip is set, cannot use skip due to # the enumerate: dependency on the index in the map if item.skip == True: # assure that the mapfile is correct paths_to_image_mapfiles.append(tuple([item.host, [], True])) continue #save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join( self.inputs['working_directory'], "imager_prepare_{0}".format(idx_sb_group)) arguments = [ self.environment, self.inputs['parset'], working_dir, self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, self.inputs['asciistat_executable'], self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], self.inputs['do_rficonsole'], self.inputs['add_beam_tables'], globalfs ] jobs.append( ComputeJob(item.host, node_command, arguments, resources={"cores": self.inputs['nthreads']})) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) concat_ms = copy.deepcopy(output_map) slices = [] finished_runs = 0 #scan the return dict for completed key # loop over the potential jobs including the skipped # If we have a skipped item, add the item to the slices with skip set jobs_idx = 0 for item in concat_ms: # If this is an item that is skipped via the skip parameter in # the parset, append a skipped if item.skip: slices.append(tuple([item.host, [], True])) continue # we cannot use the skip iterator so we need to manually get the # current job from the list job = jobs[jobs_idx] # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 slices.append( tuple([item.host, job.results["time_slices"], False])) else: # Set the dataproduct to skipped!! item.skip = True slices.append(tuple([item.host, [], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) # we have a non skipped workitem, increase the job idx jobs_idx += 1 if finished_runs == 0: self.logger.error( "None of the started compute node finished:" "The current recipe produced no output, aborting") return 1 # Write the output mapfiles: # concat.ms paths: self._store_data_map(output_ms_mapfile_path, concat_ms, "mapfile with concat.ms") # timeslices MultiDataMap(slices).save(self.inputs['slices_mapfile']) self.logger.info( "Wrote MultiMapfile with produces timeslice: {0}".format( self.inputs['slices_mapfile'])) #map with actual input mss. self._store_data_map(self.inputs["ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), "mapfile containing (used) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['ms_per_image_mapfile'] = \ self.inputs["ms_per_image_mapfile"] return 0
def test_append_item_invalid(self): data_map = MultiDataMap(self.new_style_map) self.assertRaises(DataMapError, data_map.append, ("host", True, "file", [False], "bwaaa"))
def _collect_and_assign_outputs(self, jobs, output_map, slice_paths_map): """ Collect and combine the outputs of the individual create_dbs node recipes. Combine into output mapfiles and save these at the supplied path locations """ # Create a container for the output parmdbs: same host and output_map.iterator = DataMap.TupleIterator parmdbs_list = [] # loop over the raw data including the skip file (use the data member) for output_entry in output_map.data: parms_tuple = tuple([output_entry.host, [], output_entry.skip]) parmdbs_list.append(parms_tuple) parmdbs_map = MultiDataMap(parmdbs_list) output_map.iterator = parmdbs_map.iterator = DataMap.SkipIterator # The maps are synced succesfull_run = False for (output_item, parmdbs_item, job) in zip( output_map, parmdbs_map, jobs): node_succeeded = job.results.has_key("parmdbs") and \ job.results.has_key("sourcedb") host = output_item.host # The current job has to be skipped (due to skip field) # Or if the node failed: if not node_succeeded: self.logger.warn("Warning failed ImagerCreateDBs run " "detected: No sourcedb file created, {0} continue".format( host)) output_item.file = "failed" output_item.skip = True parmdbs_item.file = ["failed"] parmdbs_item.skip = True # Else it succeeded and we can write te results else: succesfull_run = True output_item.file = job.results["sourcedb"] parmdbs_item.file = job.results["parmdbs"] # we also need to manually set the skip for this new # file list parmdbs_item.file_skip = [False] * len(job.results["parmdbs"]) # Fail if none of the nodes returned all data if not succesfull_run: self.logger.error("The creation of dbs on the nodes failed:") self.logger.error("Not a single node produces all needed data") self.logger.error( "products. sourcedb_files: {0}".format(output_map)) self.logger.error("parameter dbs: {0}".format(parmdbs_map)) return 1 # write the mapfiles output_map.save(self.inputs["sourcedb_map_path"]) parmdbs_map.save(self.inputs["parmdbs_map_path"]) self.logger.debug("Wrote sourcedb dataproducts: {0} \n {1}".format( self.inputs["sourcedb_map_path"], self.inputs["parmdbs_map_path"])) # Set the outputs self.outputs['sourcedb_map_path'] = self.inputs["sourcedb_map_path"] self.outputs['parmdbs_map_path'] = self.inputs["parmdbs_map_path"] return 0
def test_compare_DataMap_and_MultiDataMap(self): data_map = DataMap([]) multi_data_map = MultiDataMap([]) # Empty maps should be unequal also self.assertNotEqual(data_map, multi_data_map)
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(selfcal_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path']) # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python %s" % (self.__file__.replace( "master", "nodes")) map_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ concat_ms_map.iterator = DataMap.SkipIterator # ********************************************************************* for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map, sourcedb_map, concat_ms_map): #host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join(map_dir, host + "_ms_" + run_id + ".map") MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, host + "_parmdb_" + run_id + ".map") MultiDataMap([tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join(map_dir, host + "_sky_" + run_id + ".map") MultiDataMap([tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) # THe concat ms does not have to be written: It already is a # singular item (it is the output of the reduce step) # redmine issue #6021 arguments = [ self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path, concat_ms.file, self.inputs['major_cycle'] ] jobs.append(ComputeJob(host, node_command, arguments)) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed bbs node run detected, skipping work" "on this work item for further computations") # find failed job and set the skip field for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs): if job.results["returncode"] == 0: continue else: ms_item.skip = True concat_item.skip = True self.logger.warn("bbs failed on item: {0}".format( ms_item.file)) # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) # also save the concat_ms map with possible skips DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(imager_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) # TODO: DataMap extention # #Check if the input has equal length and on the same nodes # if not validate_data_maps(ms_map, parmdb_map): # self.logger.error("The combination of mapfiles failed validation:") # self.logger.error("ms_map: \n{0}".format(ms_map)) # self.logger.error("parmdb_map: \n{0}".format(parmdb_map)) # return 1 # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python3 %s" % (self.__file__.replace("master", "nodes")) map_dir = os.path.join( self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y in zip(ms_map, parmdb_map, sourcedb_map): w.skip = x.skip = y.skip = ( w.skip or x.skip or y.skip ) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator for (idx, (ms, parmdb, sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)): # host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join( map_dir, "%s-%s_map_%s.map" % (host, idx, run_id)) MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id)) MultiDataMap( [tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join( map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id)) MultiDataMap( [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) arguments = [self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path] jobs.append(ComputeJob(host, node_command, arguments, resources = { "cores": self.inputs['nthreads'] })) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): # if one of the nodes failed self.logger.error("One of the nodes failed while performing" "a BBS run. Aborting: concat.ms corruption") return 1 # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0