def plugin_main(args, **kwargs): """ Matchs the hosts in one datamap with those in another Parameters ---------- mapfile_in : str, optional Filename of datamap to adjust mapfile_to_match : str, optional Filename of datamap to match """ mapfile_in = kwargs['mapfile_in'] mapfile_to_match = kwargs['mapfile_to_match'] map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_to_match = DataMap.load(mapfile_to_match) map_to_match.iterator = DataMap.SkipIterator hosts_to_match = [] for item in map_to_match: hosts_to_match.append(item.host) for item, host in zip(map_in, hosts_to_match): item.host = host map_in.save(mapfile_in)
def go(self): super(imager_create_dbs, self).go() # get assoc_theta, convert from empty string if needed assoc_theta = self.inputs["assoc_theta"] if assoc_theta == "": assoc_theta = None # Load mapfile data from files self.logger.info(self.inputs["slice_paths_mapfile"]) slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"]) input_map = DataMap.load(self.inputs['args'][0]) source_list_map = DataMap.load(self.inputs['source_list_map_path']) if self._validate_input_data(input_map, slice_paths_map): return 1 # Run the nodes with now collected inputs jobs, output_map = self._run_create_dbs_node( input_map, slice_paths_map, assoc_theta, source_list_map) # Collect the output of the node scripts write to (map) files return self._collect_and_assign_outputs(jobs, output_map, slice_paths_map)
def _load_mapfiles(self): """ Load data map file, instrument map file, and sky map file. Update the 'skip' fields in these map files: if 'skip' is True in any of the maps, then 'skip' must be set to True in all maps. """ self.logger.debug("Loading map files:" "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % ( self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'] ) ) self.data_map = DataMap.load(self.inputs['args'][0]) self.inst_map = DataMap.load(self.inputs['instrument_mapfile']) self.sky_map = DataMap.load(self.inputs['sky_mapfile']) if not validate_data_maps(self.data_map, self.inst_map, self.sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Update the skip fields of the three maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y, z in zip(self.data_map, self.inst_map, self.sky_map): x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip) return True
def _get_io_product_specs(self): """ Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.' ) self.input_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Input_Correlated.locations'), dps.getStringVector('Input_Correlated.filenames'), dps.getBoolVector('Input_Correlated.skip')) ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) self.output_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Output_Correlated.locations'), dps.getStringVector('Output_Correlated.filenames'), dps.getBoolVector('Output_Correlated.skip')) ]) self.logger.debug("%d Output_Correlated data products specified" % len(self.output_data)) # Sanity checks on input- and output data product specifications if not validate_data_maps(self.input_data, self.output_data): raise PipelineException( "Validation of input/output data product specification failed!" )
def plugin_main(args, **kwargs): """ Prunes entries from a mapfile Parameters ---------- mapfile_in : str Filename of datamap to trim prune_str : str Entries starting with this string will be removed. Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] prune_str = kwargs['prune_str'].lower() mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] prunelen = len(prune_str) map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if item.file[:prunelen].lower() != prune_str: map_out.data.append(DataProduct(item.host, item.file, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Copies each entry of mapfile_in as often as the the length of the corresponding group into a new mapfile Parameters ---------- mapfile_in : str Name of the input mapfile to be expanded. (E.g. with the skymodels for the different groups.) mapfile_groups : str Name of the multi-mapfile with the given groups. Number of groups need to be the same as the number of files in mapfile_in. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile ignore_dummies: str (optional) If true, do not count dummy entries when expanding Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] try: ignore_dummies = str(kwargs['ignore_dummies']) # if the user has defined a dummy preference, follow it, otherwise count dummies as usual ignore_dummies = ignore_dummies in ['true', 'True', '1', 'T', 't'] except: ignore_dummies = False inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) if len(inmap) != len(groupmap): raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups'])) map_out = DataMap([]) inindex = 0 if ignore_dummies: for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): if (groupmap[groupID].file)[fileID] != 'dummy_entry': map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) )) else: for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) )) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile for selfcal images (assuming standard naming conventions) Parameters ---------- selfcal_dir : str Full path of selfcal directory hosts : list or str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ selfcal_dir = kwargs['selfcal_dir'] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if os.path.exists(selfcal_dir): selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-MFS-image.fits')) tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-MFS-image.fits')) if len(tec_iter_images) == 0: tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-MFS-image.fits')) selfcal_images += tec_iter_images selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-MFS-image.fits')) selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-MFS-image.fits')) if len(selfcal_images) == 0: selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-image.fits')) tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-image.fits')) if len(tec_iter_images) == 0: tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-image.fits')) selfcal_images += tec_iter_images selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-image.fits')) selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-image.fits')) selfcal_images.sort() else: selfcal_images = [] # Save image list as a string to the output mapfile image_list = '[{0}]'.format(','.join(selfcal_images)) map_out = DataMap([]) map_out.data.append(DataProduct(hosts[0], image_list, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def test_skip_iterator(self): data_map = DataMap(self.new_style_map) data_map.iterator = DataMap.SkipIterator unskipped = [item for item in data_map] self.assertEqual(len(unskipped), 2) self.assertTrue(all(isinstance(item, DataProduct) for item in unskipped)) self.assertEqual(unskipped[0].host, 'locus002') self.assertEqual(unskipped[0].file, 'L12345_SB102.MS')
def test_tuple_iterator(self): data_map = DataMap(self.new_style_map) data_map.iterator = DataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 4) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[0], ('locus001', 'L12345_SB101.MS'))
def test_append_item_non_skip(self): data_map = DataMap(self.new_style_map) data_map.append(("host","file", False)) data_map.iterator = DataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 5) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[-1], ('host', 'file'))
def __init__(self): """ Initialize our data members. """ super(bbs_reducer, self).__init__() self.bbs_map = list() self.jobs = list() self.data_map = DataMap() self.inst_map = DataMap() self.sky_map = DataMap()
def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append : str String to append append_index : bool If True, append a unique index to each file mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] if 'append_index' in kwargs: append_index = kwargs['append_index'] if type(append_index) is str: if append_index.lower() == 'true': append_index = True else: append_index = False else: append_index = False append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if append_index: map_out.data.append(DataProduct(item.host, item.file+append_str+'_{}'.format(i), item.skip)) else: map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def __init__(self): """ Initialize member variables and call superclass init function """ control.__init__(self) self.input_data = DataMap() self.target_data = DataMap() self.output_data = DataMap() self.scratch_directory = None self.parset_dir = None self.mapfile_dir = None
def plugin_main(args, **kwargs): """ Makes a mapfile for list of files Parameters ---------- files : list or str List of files or mapfile with such a list as the only entry. May be given as a list of strings or as a string (e.g., '[s1.skymodel, s2.skymodel]' hosts : list or str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ if type(kwargs['files']) is str: try: # Check if input is mapfile containing list as a string map_in = DataMap.load(kwargs['files']) in_files = [item.file for item in map_in] files = [] for f in in_files: files += f.strip('[]').split(',') except: files = kwargs['files'] files = files.strip('[]').split(',') files = [f.strip() for f in files] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] for i in range(len(files)-len(hosts)): hosts.append(hosts[i]) map_out = DataMap([]) for h, f in zip(hosts, files): map_out.data.append(DataProduct(h, f, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile by compressing input mapfile items into one item Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile list_format : bool, optional If True, the compreseed item will use a Python list format (e.g., '[file1, file2, ...]'. If False, it will be a space-separated list (e.g., 'file1 file2 ...' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'list_format' in kwargs: list_format = kwargs['list_format'] else: list_format = True if type(list_format) is str: if list_format.lower() == 'true': list_format = True else: list_format = False map_in = DataMap.load(mapfile_in) map_out = DataMap([]) map_in.iterator = DataMap.SkipIterator file_list = [item.file for item in map_in] if list_format: newlist = '[{0}]'.format(','.join(file_list)) else: newlist = '{0}'.format(' '.join(file_list)) # Just assign host of first file to compressed file hosts = [item.host for item in map_in] map_out.data.append(DataProduct(hosts[0], newlist, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Trims a string from filenames in a mapfile Note that everything from the last instance of the matching string to the end is trimmed. Parameters ---------- mapfile_in : str Filename of datamap to trim trim_str : str String to remove mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile counter : int If counter is greater than 0, replace "image32" with "image42". This is a special argument for facetselfcal looping only Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] trim_str = kwargs['trim'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'counter' in kwargs: counter = int(kwargs['counter']) else: counter = 0 map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): index = item.file.rfind(trim_str) if index >= 0: item_trim = item.file[:index] if counter > 0: item_trim = item_trim.replace('image32', 'image42') map_out.data.append(DataProduct(item.host, item_trim, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def test_append_item_skip(self): data_map = DataMap(self.new_style_map) data_map.append(("host","file", True)) data_map.iterator = DataMap.SkipIterator dataProducts = [item for item in data_map] # default contains 2 nonskipped items self.assertEqual(len(dataProducts), 2) self.assertTrue(all(isinstance(item, DataProduct) for item in dataProducts)) # The map already contains 2 skipped items, the final item is tested # here self.assertEqual(dataProducts[-1].host, 'locus004') self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS')
def plugin_main(args, **kwargs): fileid = kwargs['mapfile_in'] datamap = DataMap.load(fileid) hdf5File = os.path.join(kwargs['hdf5_dir'],kwargs['hdf5file']) if kwargs.has_key('instrument'): instrument = kwargs['instrument'] else: instrument = '/instrument' if kwargs.has_key('compression'): compression = int(kwargs['compression']) else: compression = 5 if kwargs.has_key('solset'): solsetName = kwargs['solset'] else: solsetName = None # Check is all the necessary files are available antennaFile = os.path.join(datamap[0].file,'ANTENNA') if not os.path.isdir(antennaFile): logging.critical('Missing ANTENNA table.') sys.exit(1) fieldFile = os.path.join(datamap[0].file,'FIELD') if not os.path.isdir(fieldFile): logging.critical('Missing FIELD table.') sys.exit(1) skydbFile = os.path.join(datamap[0].file,'sky') if not os.path.isdir(skydbFile): logging.critical('Missing sky table.') sys.exit(1) #generate list of parmDB-filenames parmDBnames = [ MS.file+instrument for MS in datamap ] #create and fill the hdf5-file: solset = parmDBs2h5parm(hdf5File, parmDBnames, antennaFile, fieldFile, skydbFile, compression=compression, solsetName=solsetName) # Add CREATE entry to history h5parmDB = h5parm(hdf5File, readonly = False) soltabs = h5parmDB.getSoltabs(solset=solset) for st in soltabs: sw = solWriter(soltabs[st]) sw.addHistory('CREATE (by PipelineStep_losotoImporter from %s / %s - %s)' % (os.path.abspath(''), os.path.basename(parmDBnames[0]), os.path.basename(parmDBnames[-1]) ) ) h5parmDB.close() #generate mapfile and wrap up mapfileentry = {} mapfileentry['host'] = 'localhost' mapfileentry['file'] = hdf5File mapfileentry['skip'] = False outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename']) outmap = open(outfileid, 'w') outmap.write(repr([mapfileentry])) outmap.close() result = {} result['mapfile'] = outfileid return result
def __init__(self): """ Constructor sets the python command used to call node scripts """ super(copier, self).__init__( "python {0}".format(self.__file__.replace('master', 'nodes'))) self.source_map = DataMap() self.target_map = DataMap()
def plugin_main(args, **kwargs): """ Makes a mapfile by uncompressing input mapfile list item into separate items Parameters ---------- mapfile_in : str Filename of datamap containing list of MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile hosts : str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) files = map_in[0].file.strip('[]').split(',') files = [f.strip() for f in files] for i in range(len(files)-len(hosts)): hosts.append(hosts[i]) for file, host in zip(files, hosts): map_out.data.append(DataProduct(host, file, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Copies each entry of mapfile_in as often as the the length of the corresponding group into a new mapfile Parameters ---------- mapfile_in : str Name of the input mapfile to be expanded. (E.g. with the skymodels for the different groups.) mapfile_groups : str Name of the multi-mapfile with the given groups. Number of groups need to be the same as the number of files in mapfile_in. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) if len(inmap) != len(groupmap): raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups'])) map_out = DataMap([]) inindex = 0 for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) )) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _create_mapfile_ato(inmap): maps = DataMap([]) mapsin = DataMap.load(inmap) mapsin.iterator = DataMap.SkipIterator newlist = '' for i, item in enumerate(mapsin): newlist = newlist + item.file + ',' newlist = newlist.rstrip(',') newlist = '[' + newlist + ']' maps.data.append(DataProduct('localhost', newlist, False)) return maps
def _calc_edge_chans(inmap, numch, edgeFactor=32): """ Generates a map with strings that can be used as input for NDPPP to flag the edges of the input MSs during (or after) concatenation. inmap - MultiDataMap (not mapfilename!) with the files to be concatenated. numch - Number of channels per input file (All files are assumed to have the same number of channels.) edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end") """ outmap = DataMap([]) for group in inmap: flaglist = [] for i in xrange(len(group.file)): flaglist.extend(range(i*numch,i*numch+numch/edgeFactor)) flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch)) outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip)) print str(flaglist).replace(' ','') return outmap
def go(self): # TODO: Remove dependency on mapfile_dir self.logger.info("Starting copier run") super(copier, self).go() # Load data from mapfiles self.source_map = DataMap.load(self.inputs['mapfile_source']) self.target_map = DataMap.load(self.inputs['mapfile_target']) # validate data in mapfiles if not self._validate_mapfiles(self.inputs['allow_rename']): return 1 # Run the compute nodes with the node specific mapfiles for source, target in zip(self.source_map, self.target_map): args = [source.host, source.file, target.file] self.append_job(target.host, args) # start the jobs, return the exit status. return self.run_jobs()
def update_state(dir_input): """ Updates the paths in mapfiles or state files Parameters ---------- dir_input : str Directory containing files to update """ file_list = glob.glob(os.path.join(dir_input, '*')) if dir_input.endswith('mapfiles'): # Assume path is a pipeline mapfiles directory. In this case, we can # simply substitute the new working_dir for the old one in each of the # mapfiles working_dir = dir_input.split('results/')[0] for f in file_list: map = DataMap.load(f) for item in map: if '/' in item.file: old_working_dir = item.file.split('results/')[0] item.file = item.file.replace(old_working_dir, working_dir) map.save(f) elif dir_input.endswith('state'): # Assume path is the Factor state directory. In this case, we can try to # load files as pickled state files and look for paths inside. If found, # substitute new working_dir for the old one working_dir = os.path.dirname(dir_input) for f in file_list: try: with open(f, "rb") as fp: d = pickle.load(fp) for k, v in d.iteritems(): if type(v) is str: if k == 'working_dir': d[k] = working_dir if '/' in v: for infix in ['results/', 'state/', 'chunks/']: parts = v.split(infix) if len(parts) > 1: d[k] = os.path.join(working_dir, infix, parts[-1]) elif type(v) is list: for i, l in enumerate(v): if '/' in l: for infix in ['results/', 'state/', 'chunks/']: parts = l.split(infix) if len(parts) > 1: v[i] = os.path.join(working_dir, infix, parts[-1]) d[k] = v with open(f, "w") as fp: pickle.dump(d, fp) except: pass
def plugin_main(args, **kwargs): """ Makes a mapfile by filtering input mapfile items into one item (the middle one) Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) map_in.iterator = DataMap.SkipIterator files = [item.file for item in map_in] hosts = [item.host for item in map_in] if 'index' in kwargs: index = int(kwargs['index']) else: index = len(files)/2 map_out.data.append(DataProduct(hosts[index], files[index], False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _get_io_product_specs(self): """ Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.' ) # convert input dataproducts from parset value to DataMap self.input_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Input_Correlated.locations'), dps.getStringVector('Input_Correlated.filenames'), dps.getBoolVector('Input_Correlated.skip')) ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) self.output_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( dps.getStringVector('Output_SkyImage.locations'), dps.getStringVector('Output_SkyImage.filenames'), dps.getBoolVector('Output_SkyImage.skip')) ]) self.logger.debug("%d Output_SkyImage data products specified" % len(self.output_data)) # # Sanity checks on input- and output data product specifications # if not validate_data_maps(self.input_data, self.output_data): # raise PipelineException( # "Validation of input/output data product specification failed!" # )#Turned off untill DataMap is extended.. # Target data is basically scratch data, consisting of one concatenated # MS per image. It must be stored on the same host as the final image. self.target_data = copy.deepcopy(self.output_data) for idx, item in enumerate(self.target_data): item.file = os.path.join(self.scratch_directory, 'ms_per_image_%d' % idx, 'concat.ms')
def plugin_main(args, **kwargs): """ Makes a mapfile by expanding single input mapfile item into many items Parameters ---------- mapfile_in : str Filename of datamap containing single item mapfile_to_match : str Filename of datamap containing multiple items mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_to_match = kwargs['mapfile_to_match'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_match = DataMap.load(mapfile_to_match) map_out = DataMap([]) map_match.iterator = DataMap.SkipIterator for item in map_match: map_out.data.append(DataProduct(item.host, map_in[0].file, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append_str : str String to append mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Takes in mapfile_in, containing many files, and returns only one Parameters ---------- mapfile_in : str Parmdbs containing phase solutions mapfile_dir : str mapfile directory filename : str output filename mapfile_comp : str target MSs Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] mapfile_in = kwargs['mapfile_in'] mapfile_comp = kwargs['mapfile_comp'] filename = kwargs['filename'] value = DataMap.load(mapfile_in)[0] # this the the single mapfile to be expanded n = len(DataMap.load(mapfile_comp)) # these are actual MS files map_out = DataMap([]) for i in range(n): map_out.data.append(DataProduct(value.host,value.file, value.skip )) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Find the measurement set closest to a given solution table, suitable for reading station names. Parameters ---------- mapfile_ms : str Mapfile of the measurement sets mapfile_grpd : str Mapfile of the (grouped) calibration tables mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] mapfile_in = kwargs['mapfile_ms'] mapfile_grpd = kwargs['mapfile_grpd'] filename = kwargs['filename'] result = {} data = DataMap.load(mapfile_in) # these are actual MS files groups = DataMap.load(mapfile_grpd) # these are probably parmdbs datalist = [data[i].file for i in xrange(len(data))] grp_list = [groups[i].file for i in xrange(len(groups))] frequency_groups = [] map_out = DataMap([]) map_out_addIS = DataMap([]) map_out_addIS_tables = DataMap([]) tomap_addIS = 0 for grp_file in grp_list: table = pyrap.tables.table(grp_file, readonly=True) frequency_range = list(np.zeros(2)) frequency_range[0] = float(table.getcol('STARTX')[0]) frequency_range[1] = float(table.getcol('ENDX')[0]) frequency_groups.append(frequency_range) table.close() pass for msID, ms_file in enumerate(datalist): table = pyrap.tables.table(ms_file + '/SPECTRAL_WINDOW', readonly=True) ref_frequency = float(table.getcol('REF_FREQUENCY')[0]) table.close() for groupID, freq_group in enumerate(frequency_groups): if freq_group[0] <= ref_frequency <= freq_group[1]: map_out.data.append( DataProduct(groups[groupID].host, groups[groupID].file, (groups[groupID].skip or data[msID].skip))) if tomap_addIS <= groupID: map_out_addIS.data.append( DataProduct(data[msID].host, data[msID].file, (data[msID].skip or groups[groupID].skip))) map_out_addIS_tables.data.append( DataProduct(groups[groupID].host, groups[groupID].file, (groups[groupID].skip or data[msID].skip))) tomap_addIS += 1 pass break pass else: continue pass pass pass if len(data) != len(map_out): raise ValueError( 'PipelineStep_FindCorrespondingMS: length of mapfiles mismatch. Probably there are some phase solution tables missing.' ) fileid = os.path.join(mapfile_dir, filename + '_parmdbs') map_out.save(fileid) result['parmdbs'] = fileid fileid = os.path.join(mapfile_dir, filename + '_tables') map_out_addIS_tables.save(fileid) result['tables'] = fileid fileid = os.path.join(mapfile_dir, filename) map_out_addIS.save(fileid) result['mapfile'] = fileid return result pass
def go(self): """ Entry point for recipe: Called by the pipeline framework """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") job_directory = self.config.get("layout", "job_directory") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 # outputs output_ms_mapfile_path = self.inputs['mapfile'] # ********************************************************************* # schedule the actual work # TODO: Refactor this function into: load data, perform work, # create output node_command = " python3 %s" % (self.__file__.replace( "master", "nodes")) jobs = [] paths_to_image_mapfiles = [] n_subband_groups = len(output_map) # needed for subsets in sb list globalfs = self.config.has_option( "remote", "globalfs") and self.config.getboolean( "remote", "globalfs") for idx_sb_group, item in enumerate(output_map): # create the input files for this node self.logger.debug("Creating input data subset for processing" "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_map) # Save the mapfile inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}.map".format(idx_sb_group)) self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") # skip the current step if skip is set, cannot use skip due to # the enumerate: dependency on the index in the map if item.skip == True: # assure that the mapfile is correct paths_to_image_mapfiles.append(tuple([item.host, [], True])) continue # save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join( self.inputs['working_directory'], "imager_prepare_{0}".format(idx_sb_group)) arguments = [ self.environment, self.inputs['parset'], working_dir, self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, self.inputs['asciistat_executable'], self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], self.inputs['do_rficonsole'], self.inputs['add_beam_tables'], globalfs ] jobs.append( ComputeJob(item.host, node_command, arguments, resources={"cores": self.inputs['nthreads']})) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output if self.error.isSet(): # if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) concat_ms = copy.deepcopy(output_map) slices = [] finished_runs = 0 # scan the return dict for completed key # loop over the potential jobs including the skipped # If we have a skipped item, add the item to the slices with skip set jobs_idx = 0 for item in concat_ms: # If this is an item that is skipped via the skip parameter in # the parset, append a skipped if item.skip: slices.append(tuple([item.host, [], True])) continue # we cannot use the skip iterator so we need to manually get the # current job from the list job = jobs[jobs_idx] # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 slices.append( tuple([item.host, job.results["time_slices"], False])) else: # Set the dataproduct to skipped!! item.skip = True slices.append(tuple([item.host, [], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) # we have a non skipped workitem, increase the job idx jobs_idx += 1 if finished_runs == 0: self.logger.error( "None of the started compute node finished:" "The current recipe produced no output, aborting") return 1 # Write the output mapfiles: # concat.ms paths: self._store_data_map(output_ms_mapfile_path, concat_ms, "mapfile with concat.ms") # timeslices MultiDataMap(slices).save(self.inputs['slices_mapfile']) self.logger.info( "Wrote MultiMapfile with produces timeslice: {0}".format( self.inputs['slices_mapfile'])) # map with actual input mss. self._store_data_map(self.inputs["ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), "mapfile containing (used) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['ms_per_image_mapfile'] = \ self.inputs["ms_per_image_mapfile"] return 0
def go(self): """ This member contains all the functionality of the imager_awimager. Functionality is all located at the node side of the script. """ super(imager_awimager, self).go() self.logger.info("Starting imager_awimager run") # ********************************************************************* # 1. collect the inputs and validate input_map = DataMap.load(self.inputs['args'][0]) sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( "the supplied input_ms mapfile and sourcedb mapfile" "are incorrect. Aborting") self.logger.error(repr(input_map)) self.logger.error(repr(sourcedb_map)) return 1 # ********************************************************************* # 2. Start the node side of the awimager recipe # Compile the command to be executed on the remote machine node_command = "python3 %s" % (self.__file__.replace("master", "nodes")) jobs = [] output_map = copy.deepcopy(input_map) for w, x, y in zip(input_map, output_map, sourcedb_map): w.skip = x.skip = y.skip = ( w.skip or x.skip or y.skip ) sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ DataMap.SkipIterator for idx, (measurement_item, source_item) in enumerate(zip(input_map, sourcedb_map)): if measurement_item.skip or source_item.skip: jobs.append(None) continue # both the sourcedb and the measurement are in a map # unpack both host , measurement_path = measurement_item.host, measurement_item.file host2 , sourcedb_path = source_item.host, source_item.file # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_awimager_{0}".format(idx)) # construct and save the output name arguments = [self.inputs['executable'], self.environment, self.inputs['parset'], working_dir, # put in unique dir, as node script wants to put private .par files next to it "%s_%s/image" % (self.inputs['output_image'], idx), measurement_path, sourcedb_path, self.inputs['mask_patch_size'], self.inputs['autogenerate_parameters'], self.inputs['specify_fov'], self.inputs['fov'], ] jobs.append(ComputeJob(host, node_command, arguments, resources={ "cores": self.inputs['nthreads'] })) self._schedule_jobs(jobs) # ********************************************************************* # 3. Check output of the node scripts for job, output_item in zip(jobs, output_map): # job == None on skipped job if not "image" in job.results: output_item.file = "failed" output_item.skip = True else: output_item.file = job.results["image"] output_item.skip = False # Check if there are finished runs succesfull_runs = None for item in output_map: if item.skip == False: succesfull_runs = True break if not succesfull_runs: self.logger.error( "None of the starter awimager run finished correct") self.logger.error( "No work left to be done: exiting with error status") return 1 # If partial succes if self.error.isSet(): self.logger.warn("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] return 0
# Check options if len(args) != 1: opt.print_help() sys.exit() # first argument: pattern for measurement-sets inMSs = glob.glob(args[0]) if options.randomize: random.shuffle(inMSs) if options.decimate: for i in range((len(inMSs) - 1), -1, -10): inMSs.pop(i) ergdict = main(inMSs, options.filename, '.', numSB=options.numbands, hosts=None, NDPPPfill=True) groupmap = DataMap.load(ergdict['groupmapfile']) filemap = MultiDataMap.load(ergdict['mapfile']) print "len(groupmap) : %d , len(filemap) : %d " % (len(groupmap), len(filemap)) if len(groupmap) != len(filemap): print "groupmap and filemap have different length!" sys.exit(1) for i in xrange(len(groupmap)): print "Group \"%s\" has %d entries." % (groupmap[i].file, len(filemap[i].file))
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, enforce_numSB=True, hosts=None, NDPPPfill=True, target_path=None, stepname=None, nband_pad=0, make_dummy_files=False, skip_flagged_groups=True): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 enforce_numSB : bool, optional If True and numSB > 0, then add flagged dummy data to ensure that the last block has exactly numSB files. If False, then the last block can have fewer files (as long as there are no gaps in frequency) hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files nband_pad : int, optional Add this number of bands of dummy data to the high-frequency end of the list make_dummy_files : bool, optional If True, make MS files for all dummy data skip_flagged_groups : bool, optional If True, groups that are missing have their skip flag set to True. If False, these groups are filled with dummy data and their skip flag set to False Returns ------- result : dict Dict with the name of the generated mapfile """ if not filename or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: filename and mapfile_dir are needed!') # convert input to needed types ms_list = input2strlist(ms_input) NDPPPfill = input2bool(NDPPPfill) numSB = int(numSB) nband_pad = int(nband_pad) enforce_numSB = input2bool(enforce_numSB) make_dummy_files = input2bool(make_dummy_files) skip_flagged_groups = input2bool(skip_flagged_groups) if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on", len(ms_list), "files" dirname = os.path.dirname(ms_list[0]) time_groups = {} # sort by time for i, ms in enumerate(ms_list): # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = { 'files': [ms], 'basename': os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found", len(time_groups), "time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: freq_width = sw.col('TOTAL_BANDWIDTH')[0] maxfreq = freq minfreq = freq first = False else: assert freq_width == sw.col('TOTAL_BANDWIDTH')[0] maxfreq = max(maxfreq, freq) minfreq = min(minfreq, freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs, time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" #the new output map filemap = MultiDataMap() groupmap = DataMap() maxfreq = maxfreq + freq_width / 2. minfreq = minfreq - freq_width / 2. numFiles = round((maxfreq - minfreq) / freq_width) if numSB > 0: ngroups = int(np.ceil(numFiles / numSB)) else: ngroups = 1 numSB = int(numFiles) hostID = 0 for time in timestamps: (freq, fname) = time_groups[time]['freq_names'].pop(0) nbands = 0 all_group_files = [] for fgroup in range(ngroups): files = [] skip_this = True for fIdx in range(numSB): thisfreq = (fIdx + fgroup * numSB + 1) * freq_width + minfreq if freq > thisfreq: if enforce_numSB or thisfreq - freq_width / 2. < maxfreq: files.append('dummy.ms') else: files.append(fname) skip_this = False if len(time_groups[time]['freq_names']) > 0: (freq, fname) = time_groups[time]['freq_names'].pop(0) else: # Set freq to high value to pad the rest of the group # with dummy data (freq, fname) = (1e12, 'This_shouldn\'t_show_up') if fgroup == ngroups - 1: # Append dummy data to last frequency group only for i in range(nband_pad): files.append('dummy.ms') if not skip_this: nbands += len(files) if make_dummy_files: for i, ms in enumerate(files): if ms == 'dummy.ms': # Replace dummy.ms in files list with new filename files[i] = os.path.join( dirname, '{0}_{1}.ms'.format( os.path.splitext(ms)[0], uuid.uuid4().urn.split('-')[-1])) if not skip_flagged_groups: # Don't set skip flag to True, even if group is missing all files if not make_dummy_files: raise ValueError( 'skip_flagged_groups cannot be False if make_dummy_files is also False' ) else: skip_this = False filemap.append( MultiDataProduct(hosts[hostID % numhosts], files, skip_this)) groupname = time_groups[time]['basename'] + '_%Xt_%dg.ms' % ( time, fgroup) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path, os.path.basename(groupname)) groupmap.append( DataProduct(hosts[hostID % numhosts], groupname, skip_this)) hostID += 1 all_group_files.extend(files) assert freq == 1e12 if make_dummy_files: # Find at least one existing ms for this timestamp ms_exists = None for ms in all_group_files: if os.path.exists(ms): ms_exists = ms sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms)) ms_exists_ref_freq = sw.getcol('REF_FREQUENCY')[0] sw.close() break for i, ms in enumerate(all_group_files): if 'dummy' in ms: # Alter SPECTRAL_WINDOW subtable as appropriate to fill gap ref_freq = minfreq + freq_width * (i + 0.5) pt.tableutil.tablecopy(ms_exists, ms) sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms), readonly=False) chan_freq = sw.getcol( 'CHAN_FREQ') - ms_exists_ref_freq + ref_freq sw.putcol('REF_FREQUENCY', ref_freq) sw.putcol('CHAN_FREQ', chan_freq) sw.close() # Flag all data t = pt.table(ms, readonly=False) t.putcol('FLAG_ROW', np.ones(len(t), dtype=bool)) f = t.getcol('FLAG') t.putcol('FLAG', np.ones(f.shape, dtype=bool)) t.close() filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename + '_groups') groupmap.save(groupmapname) result = { 'mapfile': filemapname, 'groupmapfile': groupmapname, 'nbands': nbands } return result
def go(self): """ Steps: 1. Load and validate the input datamaps 2. Run the node parts of the recipe 3. Validate node output and format the recipe output """ super(imager_finalize, self).go() # ********************************************************************* # 1. Load the datamaps awimager_output_map = DataMap.load(self.inputs["awimager_output_map"]) raw_ms_per_image_map = DataMap.load( self.inputs["raw_ms_per_image_map"]) sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) target_mapfile = DataMap.load(self.inputs["target_mapfile"]) output_image_mapfile = DataMap.load( self.inputs["output_image_mapfile"]) processed_ms_dir = self.inputs["processed_ms_dir"] fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] # Align the skip fields align_data_maps(awimager_output_map, raw_ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map) # Set the correct iterator sourcelist_map.iterator = awimager_output_map.iterator = \ raw_ms_per_image_map.iterator = target_mapfile.iterator = \ output_image_mapfile.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator # ********************************************************************* # 2. Run the node side of the recupe command = " python %s" % (self.__file__.replace("master", "nodes")) jobs = [] for (awimager_output_item, raw_ms_per_image_item, sourcelist_item, target_item, output_image_item, sourcedb_item) in zip(awimager_output_map, raw_ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map): # collect the files as argument arguments = [ awimager_output_item.file, raw_ms_per_image_item.file, sourcelist_item.file, target_item.file, output_image_item.file, self.inputs["minbaseline"], self.inputs["maxbaseline"], processed_ms_dir, fillrootimagegroup_exec, self.environment, sourcedb_item.file ] self.logger.info( "Starting finalize with the folowing args: {0}".format( arguments)) jobs.append(ComputeJob(target_item.host, command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Validate the performance of the node script and assign output succesful_run = False for (job, output_image_item) in zip(jobs, output_image_mapfile): if not "hdf5" in job.results: # If the output failed set the skip to True output_image_item.skip = True else: succesful_run = True # signal that we have at least a single run finished ok. # No need to set skip in this case if not succesful_run: self.logger.warn("Failed finalizer node run detected") return 1 output_image_mapfile.save(self.inputs['placed_image_mapfile']) self.logger.debug( "Wrote mapfile containing placed hdf5 images: {0}".format( self.inputs['placed_image_mapfile'])) self.outputs["placed_image_mapfile"] = self.inputs[ 'placed_image_mapfile'] return 0
def go(self): """ Entry point for recipe: Called by the pipeline framework """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 # outputs output_ms_mapfile_path = self.inputs['mapfile'] # ********************************************************************* # schedule the actual work # TODO: Refactor this function into: load data, perform work, # create output node_command = " python %s" % (self.__file__.replace( "master", "nodes")) jobs = [] paths_to_image_mapfiles = [] n_subband_groups = len(output_map) for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_map) # Save the mapfile job_directory = self.config.get("layout", "job_directory") inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}".format(idx_sb_group)) self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") #save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) arguments = [ self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, self.inputs['asciistat_executable'], self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], self.inputs['add_beam_tables'] ] jobs.append(ComputeJob(item.host, node_command, arguments)) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) concat_ms = copy.deepcopy(output_map) slices = [] finished_runs = 0 #scan the return dict for completed key for (item, job) in zip(concat_ms, jobs): # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 slices.append( tuple([item.host, job.results["time_slices"], False])) else: # Set the dataproduct to skipped!! item.skip = True slices.append(tuple([item.host, ["/Failed"], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) if finished_runs == 0: self.logger.error( "None of the started compute node finished:" "The current recipe produced no output, aborting") return 1 # Write the output mapfiles: # concat.ms paths: self._store_data_map(output_ms_mapfile_path, concat_ms, "mapfile with concat.ms") # timeslices MultiDataMap(slices).save(self.inputs['slices_mapfile']) self.logger.info( "Wrote MultiMapfile with produces timeslice: {0}".format( self.inputs['slices_mapfile'])) #map with actual input mss. self._store_data_map(self.inputs["raw_ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), "mapfile containing (raw) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['raw_ms_per_image_mapfile'] = \ self.inputs["raw_ms_per_image_mapfile"] return 0
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' ) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] bandfreqs = [] print("InitSubtract_deep_sort_and_compute.py: Putting files into bands.") for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) bandfreqs.append(Band(msdict[MSkey]).freq) ## min freq gives largest image size for deep image bandfreqs = np.array(bandfreqs) minfreq = np.min(bandfreqs) bandmin = np.argmin(bandfreqs) ## need to map the output from wsclean channels to the right frequencies ## just put the bands in the right freq order wsclean_channum = np.argsort(bandfreqs) bands = np.array(bands) bands = bands[wsclean_channum] #minfreq = 1e9 #for ib, band in enumerate(bands): #if band.freq < minfreq: #minfreq = band.freq #bandmin = ib group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 nbands = np.int(len(bands) / 10) if nbands > 8: nchansout_clean1 = np.int(nbands / 4) elif nbands > 4: nchansout_clean1 = np.int(nbands / 2) else: nchansout_clean1 = np.int(nbands) if nchansout_clean1 < 2: nchansout_clean1 = 2 (freqstep, timestep) = bands[0].get_averaging_steps() int_time_sec = bands[ 0].timestep_sec * timestep # timestep_sec gets added to band object in get_averaging_steps() nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg, int_time_sec) nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg, int_time_sec) print("InitSubtract_deep_sort_and_compute.py: analyzing data...") for band in bands: group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes( cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) imsize_high_res_stretch = band.get_optimum_size( int(imsize_high_res * y_axis_stretch)) high_size_map.append( DataProduct( 'localhost', str(imsize_high_res) + " " + str(imsize_high_res_stretch), False)) imsize_low_res_stretch = band.get_optimum_size( int(imsize_low_res * y_axis_stretch)) low_size_map.append( DataProduct( 'localhost', str(imsize_low_res) + " " + str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size( int(imsize_high_res * image_padding)) imsize_high_pad_stretch = band.get_optimum_size( int(imsize_high_res * image_padding * y_axis_stretch)) high_paddedsize_map.append( DataProduct( 'localhost', str(imsize_high_pad) + " " + str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size( int(imsize_low_res * image_padding)) imsize_low_pad_stretch = band.get_optimum_size( int(imsize_low_res * image_padding * y_axis_stretch)) low_paddedsize_map.append( DataProduct( 'localhost', str(imsize_low_pad) + " " + str(imsize_low_pad_stretch), False)) if band.freq == minfreq: deep_imsize_high_res = imsize_high_res deep_imsize_high_res_stretch = imsize_high_res_stretch deep_imsize_high_pad = imsize_high_pad deep_imsize_high_pad_stretch = imsize_high_pad_stretch deep_imsize_low_res = imsize_low_res deep_imsize_low_res_stretch = imsize_low_res_stretch deep_imsize_low_pad = imsize_low_pad deep_imsize_low_pad_stretch = imsize_low_pad_stretch deep_high_size_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_high_res) + " " + str(deep_imsize_high_res_stretch), False) ]) deep_high_paddedsize_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_high_pad) + " " + str(deep_imsize_high_pad_stretch), False) ]) deep_low_size_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_low_res) + " " + str(deep_imsize_low_res_stretch), False) ]) deep_low_paddedsize_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_low_pad) + " " + str(deep_imsize_low_pad_stretch), False) ]) nbands_map = DataMap([DataProduct('localhost', str(nbands), False)]) nchansout_clean1_map = DataMap( [DataProduct('localhost', str(nchansout_clean1), False)]) # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) nwavelengths_high_map = DataMap([]) nwavelengths_low_map = DataMap([]) for index in range(numfiles): # set time and frequency averaging values (in sec and Hz) freqstep_map.append( DataProduct('localhost', str(freqstep * bands[0].chan_width_hz), False)) timestep_map.append( DataProduct('localhost', str(timestep * bands[0].timestep_sec), False)) nwavelengths_high_map.append( DataProduct('localhost', str(nwavelengths_high), False)) nwavelengths_low_map.append( DataProduct('localhost', str(nwavelengths_low), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname + '_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname + '_low_padded_size') low_paddedsize_map.save(low_padsize_name) deep_high_sizename = os.path.join(mapfile_dir, outmapname + '_deep_high_size') deep_high_size_map.save(deep_high_sizename) deep_low_sizename = os.path.join(mapfile_dir, outmapname + '_deep_low_size') deep_low_size_map.save(deep_low_sizename) deep_high_padsize_name = os.path.join( mapfile_dir, outmapname + '_deep_high_padded_size') deep_high_paddedsize_map.save(deep_high_padsize_name) deep_low_padsize_name = os.path.join(mapfile_dir, outmapname + '_deep_low_padded_size') deep_low_paddedsize_map.save(deep_low_padsize_name) nbands_mapname = os.path.join(mapfile_dir, outmapname + '_nbands') nbands_map.save(nbands_mapname) nchansout_clean1_mapname = os.path.join(mapfile_dir, outmapname + '_nchansout_clean1') nchansout_clean1_map.save(nchansout_clean1_mapname) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) nwavelengths_high_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_high') nwavelengths_high_map.save(nwavelengths_high_name) nwavelengths_low_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_low') nwavelengths_low_map.save(nwavelengths_low_name) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'high_padsize_mapfile': high_padsize_name, 'low_padsize_mapfile': low_padsize_name, 'deep_high_size_mapfile': deep_high_sizename, 'deep_low_size_mapfile': deep_low_sizename, 'deep_high_padsize_mapfile': deep_high_padsize_name, 'deep_low_padsize_mapfile': deep_low_padsize_name, 'nbands': nbands_mapname, 'nchansout_clean1': nchansout_clean1_mapname, 'freqstep': freqstepname, 'timestep': timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, 'nwavelengths_low_mapfile': nwavelengths_low_name } return result
def _add_name(mapname, suffix): dmap = DataMap.load(mapname) for item in dmap: item.file = item.file + suffix return dmap
def plugin_main(args, **kwargs): fileid = kwargs['mapfile_in'] datamap = DataMap.load(fileid) hdf5File = os.path.join(kwargs['hdf5_dir'],kwargs['hdf5file']) if kwargs.has_key('instrument'): instrument = kwargs['instrument'] else: instrument = '/instrument' if kwargs.has_key('compression'): compression = int(kwargs['compression']) else: compression = 5 if kwargs.has_key('solset'): solsetName = kwargs['solset'] else: solsetName = None # Check is all the necessary files are available antennaFile = os.path.join(datamap[0].file,'ANTENNA') if not os.path.isdir(antennaFile): logging.critical('Missing ANTENNA table.') sys.exit(1) fieldFile = os.path.join(datamap[0].file,'FIELD') if not os.path.isdir(fieldFile): logging.critical('Missing FIELD table.') sys.exit(1) skydbFile = os.path.join(datamap[0].file,'sky') if not os.path.isdir(skydbFile): logging.warning('No sky table found. (Direction-dependent parameters will not work.)') skydbFile = 'None' #generate list of parmDB-filenames parmDBnames = [ MS.file+instrument for MS in datamap ] ##create and fill the hdf5-file: #solset = parmDBs2h5parm(hdf5File, parmDBnames, antennaFile, fieldFile, skydbFile, compression=compression, solsetName=solsetName) # call the create_h5parm function from losoto (will put a stupid create message in the h5parm file) create_h5parm(parmDBnames, antennaFile, fieldFile, skydbFile, hdf5File, compression, solsetName) ## Add CREATE entry to history #h5parmDB = h5parm(hdf5File, readonly = False) #soltabs = h5parmDB.getSoltabs(solset=solset) #for st in soltabs: # sw = solWriter(soltabs[st]) # sw.addHistory('CREATE (by PipelineStep_losotoImporter from %s / %s - %s)' % (os.path.abspath(''), # os.path.basename(parmDBnames[0]), os.path.basename(parmDBnames[-1]) ) ) #h5parmDB.close() #generate mapfile and wrap up mapfileentry = {} mapfileentry['host'] = 'localhost' mapfileentry['file'] = hdf5File mapfileentry['skip'] = False outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename']) outmap = open(outfileid, 'w') outmap.write(repr([mapfileentry])) outmap.close() result = {} result['mapfile'] = outfileid return result
def plugin_main(args, **kwargs): """ Re-groups a simple (flat) mapfile into a multi-mapfile that has the same shape as a given multi-mapfile. Parameters ---------- mapfile_in : str Name of the input mapfile to be re-grouped. mapfile_groups : str Name of the multi-mapfile with the given groups. Total number of files needs to be the same as in mapfile_in. check_basename : Bool (str) , optional Check if the basenames (see os.path.basename()) minus extension match default = True join_groups : int (str), optional If it is set, then join so many groups into one new group. (Gives fewer groups but more files per group than in mapfile_groups.) default = keep same grouping as in mapfile_groups join_max_files : int (str), optional If it is set, then try to join as many groups together before the number of files per group woud exceed "join_max_files". Similar to "join_groups", but the number of joind groups is not fixed but depends on the number of files per group. Mutaully exclusive with "join_groups"! mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] check_names = True if 'check_basename' in kwargs: check_names = string2bool(kwargs['check_basename']) inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) map_out = MultiDataMap([]) inindex = 0 for group in groupmap: grouplist = [] skip = False for fname in group.file: if check_names: refbase = os.path.splitext(os.path.basename(fname))[0] newbase = os.path.splitext( os.path.basename(inmap[inindex].file))[0] if refbase != newbase: raise ValueError( 'PipelineStep_reGroupMapfile: basenames {0} and {1} differ' .format(refbase, newbase)) grouplist.append(inmap[inindex].file) if inmap[inindex].skip: print 'PipelineStep_reGroupMapfile: Skipping full group for file:' + inmap[ inindex].file skip = True inindex += 1 map_out.data.append(MultiDataProduct(group.host, grouplist, skip)) assert inindex == len(inmap) if 'join_groups' in kwargs: if 'join_max_files' in kwargs: raise ValueError( "PipelineStep_reGroupMapfile: \"join_groups\" and \"join_max_files\" are mutually exclusive!" ) groups_to_join = int(kwargs['join_groups']) if groups_to_join > 1: newmap = MultiDataMap([]) for start_idx in xrange(0, len(map_out), groups_to_join): end_idx = min((start_idx + groups_to_join), len(map_out)) grouplist = [] for group in map_out[start_idx:end_idx]: grouplist.extend(group.file) if group.skip: raise ValueError( "PipelineStep_reGroupMapfile: Found group that should be skipped! " "(I.e. there is probably something wrong with your data!)" ) newmap.data.append( MultiDataProduct(map_out[start_idx].host, grouplist, False)) map_out = newmap elif 'join_max_files' in kwargs: max_files = int(kwargs['join_max_files']) newmap = MultiDataMap([]) grouplist = map_out[0].file grouphost = map_out[0].host for gindex in xrange(1, len(map_out)): if map_out[gindex].skip: raise ValueError( "PipelineStep_reGroupMapfile: Found group that should be skipped! " "(I.e. there is probably something wrong with your data!)") if (len(grouplist) + len(map_out[gindex].file)) > max_files: newmap.data.append( MultiDataProduct(grouphost, grouplist, False)) grouplist = map_out[gindex].file grouphost = map_out[gindex].host else: grouplist.extend(map_out[gindex].file) # add the final (partial?) group to the map newmap.data.append(MultiDataProduct(grouphost, grouplist, False)) map_out = newmap fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.items(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in list(parsetdict_copy.values()): for k, v in parsetdict_copy.items(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in list(job.results.items()): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in list(jobresultdict.items()): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, mergeLastGroup=False, truncateLastSBs=True): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. mergeLastGroup, truncateLastSBs : bool, optional mergeLastGroup = True, truncateLastSBs = True: not allowed mergeLastGroup = True, truncateLastSBs = False: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). mergeLastGroup = False, truncateLastSBs = True: ignore last files, that don't make for a full group (not all files are used). mergeLastGroup = False, truncateLastSBs = False: keep inclomplete last group, or - with NDPPPfill=True - fill last group with dummies. Returns ------- result : dict Dict with the name of the generated mapfile """ if not filename or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!') if mergeLastGroup and truncateLastSBs: raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!') if mergeLastGroup: raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!') if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on",len(ms_list),"files" time_groups = {} # sort by time for i, ms in enumerate(ms_list): # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found",len(time_groups),"time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True nchans = 0 for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: freq_width = sw.col('TOTAL_BANDWIDTH')[0] nchans = sw.col('CHAN_WIDTH')[0].shape[0] chwidth = sw.col('CHAN_WIDTH')[0][0] maxfreq = freq minfreq = freq first = False else: assert freq_width == sw.col('TOTAL_BANDWIDTH')[0] assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] assert chwidth == sw.col('CHAN_WIDTH')[0][0] maxfreq = max(maxfreq,freq) minfreq = min(minfreq,freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" #the new output map filemap = MultiDataMap() groupmap = DataMap() maxfreq = maxfreq+freq_width/2. minfreq = minfreq-freq_width/2. numFiles = round((maxfreq-minfreq)/freq_width) if numSB > 0: if truncateLastSBs: ngroups = int(np.floor(numFiles/numSB)) else: ngroups = int(np.ceil(numFiles/numSB)) else: ngroups = 1 numSB = int(numFiles) hostID = 0 for time in timestamps: (freq,fname) = time_groups[time]['freq_names'].pop(0) for fgroup in range(ngroups): files = [] skip_this = True for fIdx in range(numSB): if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq: if NDPPPfill: files.append('dummy.ms') else: files.append(fname) if len(time_groups[time]['freq_names'])>0: (freq,fname) = time_groups[time]['freq_names'].pop(0) else: (freq,fname) = (1e12,'This_shouldn\'t_show_up') skip_this = False filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this)) groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path,os.path.basename(groupname)) groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this)) assert freq==1e12 filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename+'_groups') groupmap.save(groupmapname) # genertate map with edge-channels to flag flagmap = _calc_edge_chans(filemapname, nchans) flagmapname = os.path.join(mapfile_dir, filename+'_flags') flagmap.save(flagmapname) result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname} return result
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, source_list_map_path=source_list_map_path, skip_create_dbs=False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip=False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_loop, sourcedb_map_path, skip=False) # ***************************************************************** # (5) Source finding sourcelist_map, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_loop, skip=False) # should the output be a sourcedb? instead of a sourcelist # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize( aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) # ********************************************************************* # (7) Get metadata # Create a parset containing the metadata for MAC/SAS metadata_file = "%s_feedback_SkyImage" % (self.parset_file, ) self.run_task( "get_metadata", placed_data_image_map, parset_prefix=(full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts')), product_type="SkyImage", metadata_file=metadata_file) self.send_feedback_processing( parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def plugin_main(args, **kwargs): """ Makes a mapfile with only the MSs at the middle Frequency Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile index: int, optional Index of the frequency band to use. Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'include' in kwargs: include = kwargs['include'] else: include = None fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: if include is not None: if include in item.file: # Get the frequency info sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) else: # Get the frequency info sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # find maximum number of files per frequency-group maxfiles = max([len(group) for group in freq_groups.values()]) # find the center-frequency freqs = freq_groups.keys() freqs.sort() selfreq = freqs[len(freqs) / 2] if 'index' in kwargs: selfreq = int(kwargs['index']) else: # make sure that chosen frequncy has maxfiles entries while len(freq_groups[selfreq]) < maxfiles: freqs.remove(selfreq) selfreq = freqs[len(freqs) / 2] # extend the hosts-list for i in range(len(freq_groups[selfreq]) - len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host, fname) in zip(hosts, freq_groups[selfreq]): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del (map_in) del (map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): infile_map = kwargs['infile'] mapfile_dir = kwargs['mapfile_dir'] jobname = kwargs['jobname'] filename = kwargs['filename'] current_loop = str(int(kwargs['counter'])+1) data = DataMap.load(infile_map) # these are actual MS files datalist = [data[i].file for i in xrange(len(data))] globaldb_map = os.path.join(mapfile_dir, filename + '_globaldb') # this file holds all the globaldbs globaldbtec_map = os.path.join(mapfile_dir, filename + '_globaldbtec') # this file holds all the globaldbs globaldbtec2_map = os.path.join(mapfile_dir, filename + '_globaldbtec2') # this file holds all the globaldbs globaldbFR_map = os.path.join(mapfile_dir, filename + '_globaldbFR') # this file holds all the globaldbs globaldbCD_map = os.path.join(mapfile_dir, filename + '_globaldbCD') # this file holds all the globaldbs globaldbamp_map = os.path.join(mapfile_dir, filename + '_globaldbamp') # this file holds all the globaldbs h5parmtec_map = os.path.join(mapfile_dir, filename + '_h5parmtec') # this file holds all the h5parms h5parmtec2_map = os.path.join(mapfile_dir, filename + '_h5parmtec2') # this file holds all the h5parms h5parmFR_map = os.path.join(mapfile_dir, filename + '_h5parmFR') # this file holds all the h5parms h5parmCD_map = os.path.join(mapfile_dir, filename + '_h5parmCD') # this file holds all the h5parms h5parmamp_map = os.path.join(mapfile_dir, filename + '_h5parmamp') # this file holds all the h5parms map_out_globaldb = DataMap([]) map_out_globaldbtec = DataMap([]) map_out_globaldbtec2 = DataMap([]) map_out_globaldbFR = DataMap([]) map_out_globaldbCD = DataMap([]) map_out_globaldbamp = DataMap([]) map_out_h5parmtec = DataMap([]) map_out_h5parmtec2 = DataMap([]) map_out_h5parmFR = DataMap([]) map_out_h5parmCD = DataMap([]) map_out_h5parmamp = DataMap([]) map_out_globaldb.data.append(DataProduct( data[0].host, jobname + '.globaldb_loop' + current_loop, False)) map_out_globaldbtec.data.append(DataProduct( data[0].host, jobname + '.globaldbtec_loop' + current_loop, False)) map_out_globaldbtec2.data.append(DataProduct( data[0].host, jobname + '.globaldbtec2_loop' + current_loop, False)) map_out_globaldbFR.data.append(DataProduct( data[0].host, jobname + '.globaldbFR_loop' + current_loop, False)) map_out_globaldbCD.data.append(DataProduct( data[0].host, jobname + '.globaldbCD_loop' + current_loop, False)) map_out_globaldbamp.data.append(DataProduct( data[0].host, jobname + '.globaldbamp_loop' + current_loop, False)) map_out_h5parmtec.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmtec', False)) map_out_h5parmtec2.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmtec2', False)) map_out_h5parmFR.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmFR', False)) map_out_h5parmCD.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmCD', False)) map_out_h5parmamp.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmamp', False)) globaldbFR_folder = jobname + '.globaldbFR_loop' + current_loop globaldbCD_folder = jobname + '.globaldbCD_loop' + current_loop globaldbamp_folder = jobname + '.globaldbamp_loop' + current_loop image_high1 = jobname + '_image_high1_loop' + current_loop image_high2 = jobname + '_image_high2_loop' + current_loop image_mask = jobname + '_mask_high1_loop' + current_loop filter_model = jobname + '_filter_model_loop' + current_loop sourcedb_target = jobname + '-make_sourcedb_target_loop' + current_loop image_high1_pattern = image_high1 + '-MFS-image.fits' image_high2_sources = image_high2 + '-sources.txt' map_out_globaldb.save(globaldb_map) map_out_globaldbtec.save(globaldbtec_map) map_out_globaldbtec2.save(globaldbtec2_map) map_out_globaldbFR.save(globaldbFR_map) map_out_globaldbCD.save(globaldbCD_map) map_out_globaldbamp.save(globaldbamp_map) map_out_h5parmtec.save(h5parmtec_map) map_out_h5parmtec2.save(h5parmtec2_map) map_out_h5parmFR.save(h5parmFR_map) map_out_h5parmCD.save(h5parmCD_map) map_out_h5parmamp.save(h5parmamp_map) result = {'globaldb':globaldb_map, 'globaldbtec':globaldbtec_map, 'globaldbtec2':globaldbtec2_map, 'globaldbFR':globaldbFR_map, 'globaldbCD':globaldbCD_map, 'globaldbamp':globaldbamp_map, 'h5parmtec':h5parmtec_map, 'h5parmtec2':h5parmtec2_map, 'h5parmFR':h5parmFR_map, 'h5parmCD':h5parmCD_map, 'h5parmamp':h5parmamp_map, 'plotstec': 'plots-tec' + current_loop, 'plotstec2': 'plots-tec2' + current_loop, 'plotsFR': 'plots-fr' + current_loop, 'plotsCD': 'plots-cd' + current_loop, 'plotsamp': 'plots-amp' + current_loop, 'globaldbFR_folder': globaldbFR_folder, 'globaldbCD_folder': globaldbCD_folder, 'globaldbamp_folder': globaldbamp_folder, 'image_high1': image_high1, 'image_high1_pattern': image_high1_pattern, 'image_mask': image_mask, 'image_high2': image_high2, 'image_high2_sources': image_high2_sources, 'filter_model': filter_model, 'sourcedb_target': sourcedb_target} return result pass
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1., calc_y_axis_stretch=False, apply_y_axis_stretch_highres=True, apply_y_axis_stretch_lowres=True): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. calc_y_axis_stretch : bool, optional Adjust the image sizes returned by this script for the mean elevation. If True, the value of y_axis_stretch above is ignored apply_y_axis_stretch_highres : bool, optional Apply the y-axis stretch to the high-res image sizes apply_y_axis_stretch_lowres : bool, optional Apply the y-axis stretch to the low-res image sizes Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' ) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) calc_y_axis_stretch = input2bool(calc_y_axis_stretch) apply_y_axis_stretch_highres = input2bool(apply_y_axis_stretch_highres) apply_y_axis_stretch_lowres = input2bool(apply_y_axis_stretch_lowres) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] print "InitSubtract_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 for i, band in enumerate(bands): print "InitSubtract_sort_and_compute.py: Working on Band:", band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes( cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) # Calculate y_axis_stretch if desired if calc_y_axis_stretch: if i == 0: y_axis_stretch = 1.0 / np.sin(band.mean_el_rad) print "InitSubtract_sort_and_compute.py: Using y-axis stretch of:", y_axis_stretch # Adjust sizes so that we get the correct ones below if apply_y_axis_stretch_highres: imsize_high_res /= y_axis_stretch if apply_y_axis_stretch_lowres: imsize_low_res /= y_axis_stretch imsize_high_res = band.get_optimum_size(int(imsize_high_res)) imsize_high_res_stretch = band.get_optimum_size( int(imsize_high_res * y_axis_stretch)) high_size_map.append( DataProduct( 'localhost', str(imsize_high_res) + " " + str(imsize_high_res_stretch), False)) imsize_low_res = band.get_optimum_size(int(imsize_low_res)) imsize_low_res_stretch = band.get_optimum_size( int(imsize_low_res * y_axis_stretch)) low_size_map.append( DataProduct( 'localhost', str(imsize_low_res) + " " + str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size( int(imsize_high_res * image_padding)) imsize_high_pad_stretch = band.get_optimum_size( int(imsize_high_res * image_padding * y_axis_stretch)) high_paddedsize_map.append( DataProduct( 'localhost', str(imsize_high_pad) + " " + str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size( int(imsize_low_res * image_padding)) imsize_low_pad_stretch = band.get_optimum_size( int(imsize_low_res * image_padding * y_axis_stretch)) low_paddedsize_map.append( DataProduct( 'localhost', str(imsize_low_pad) + " " + str(imsize_low_pad_stretch), False)) print "InitSubtract_sort_and_compute.py: Computing averaging steps." (freqstep, timestep) = bands[0].get_averaging_steps() (nwavelengths_high, nwavelengths_low) = bands[0].nwavelengths(cellsize_highres_deg, cellsize_lowres_deg, timestep) # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) nwavelengths_high_map = DataMap([]) nwavelengths_low_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) nwavelengths_high_map.append( DataProduct('localhost', str(nwavelengths_high), False)) nwavelengths_low_map.append( DataProduct('localhost', str(nwavelengths_low), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname + '_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname + '_low_padded_size') low_paddedsize_map.save(low_padsize_name) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) nwavelengths_high_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_high') nwavelengths_high_map.save(nwavelengths_high_name) nwavelengths_low_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_low') nwavelengths_low_map.save(nwavelengths_low_name) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'high_padsize_mapfile': high_padsize_name, 'low_padsize_mapfile': low_padsize_name, 'freqstep': freqstepname, 'timestep': timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, 'nwavelengths_low_mapfile': nwavelengths_low_name } return result
def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb, concat_ms, correlated_output_location, msselect_executable): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> :param minbaseline: Minimum baseline used for the image :param maxbaseline: largest/maximum baseline used for the image :param processed_ms_dir: The X (90) measurements set actually used to create the image :param fillrootimagegroup_exec: Executable used to add image data to the hdf5 image :rtype: self.outputs['hdf5'] set to "succes" to signal node succes :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info # Get all the files in the processed measurement dir file_list = os.listdir(processed_ms_dir) processed_ms_paths = [] ms_per_image_map.iterator = DataMap.SkipIterator for item in ms_per_image_map: ms_path = item.file processed_ms_paths.append(ms_path) #add the information the image try: self.logger.debug("Start addImage Info") addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception, error: self.logger.warn("addImagingInfo Threw Exception:") self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): self.logger.warn("addImagingInfo already done, continue") pass else: raise Exception(error) #The majority of the tables is updated correctly # *************************************************************** # 2. convert to hdf5 image format output_directory = None pim_image = pim.image(awimager_output) try: self.logger.info( "Saving image in HDF5 Format to: {0}".format(output_image)) # Create the output directory output_directory = os.path.dirname(output_image) create_directory(output_directory) # save the image pim_image.saveas(output_image, hdf5=True) except Exception, error: self.logger.error( "Exception raised inside pyrap.images: {0}".format( str(error))) raise error
def plugin_main( args, **kwargs ): mapfile_in = kwargs['mapfile_in'] lotss_radius = kwargs['lotss_radius'] lbcs_radius = kwargs['lbcs_radius'] im_radius = float(kwargs['im_radius']) image_limit_Jy = float(kwargs['image_limit_Jy']) bright_limit_Jy = float(kwargs['bright_limit_Jy']) lotss_result_file = kwargs['lotss_result_file'] lotss_catalogue = kwargs['lotss_catalogue'] lbcs_catalogue = kwargs['lbcs_catalogue'] delay_cals_file = kwargs['delay_cals_file'] match_tolerance = float(kwargs['match_tolerance']) fail_lotss_ok = kwargs['continue_no_lotss'].lower().capitalize() mslist = DataMap.load(mapfile_in) MSname = mslist[0].file # For testing #MSname = kwargs['MSname'] ## first check for a valid delay_calibrator file if os.path.isfile(delay_cals_file): print( 'Delay calibrators file {:s} exists! returning.'.format(delay_cals_file) ) return ## look for or download LBCS print("Attempting to find or download LBCS catalogue.") lbcs_catalogue = my_lbcs_catalogue( MSname, Radius=lbcs_radius, outfile=lbcs_catalogue ) ## look for or download LoTSS print("Attempting to find or download LoTSS catalogue.") lotss_catalogue = my_lotss_catalogue( MSname, Radius=lotss_radius, bright_limit_Jy=bright_limit_Jy, outfile=lotss_catalogue ) ## if lbcs exists, and either lotss exists or continue_without_lotss = True, process the catalogue(s). ## else provide an error message and stop if len(lbcs_catalogue) == 0: logging.error('LBCS coverage does not exist, and catalogue not found on disk.') return if len(lotss_catalogue) == 0 and not fail_lotss_ok: logging.error('LoTSS coverage does not exist, and contine_without_lotss is set to False.') return ## if the LoTSS catalogue is empty, write out the delay cals only and stop if len(lotss_catalogue) == 0: print('Target field not in LoTSS coverage yet! Only writing {:s} based on LBCS'.format(delay_cals_file)) ## Add the radius from phase centre to the catalogue RATar, DECTar = grab_coo_MS(input2strlist_nomapfile(MSname)[0]) ptg_coords = SkyCoord( RATar, DECTar, frame='icrs', unit='deg' ) src_coords = SkyCoord( lbcs_catalogue['RA'], lbcs_catalogue['DEC'], frame='icrs', unit='deg' ) separations = src_coords.separation(ptg_coords ) seps = Column( separations.deg, name='Radius' ) lbcs_catalogue.add_column( seps ) ## rename the source_id column lbcs_catalogue.rename_column('Observation','Source_id') ## add in some dummy data Total_flux = Column( np.ones(len(lbcs_catalogue)), name='Total_flux' ) lbcs_catalogue.add_column( Total_flux ) LGZ_Size = Column( np.ones( len(lbcs_catalogue) )*20., name='LGZ_Size' ) ## set to a default of 20 arcsec lbcs_catalogue.add_column( LGZ_Size ) ## order based on radius from the phase centre lbcs_catalogue.sort('Radius') ## write the catalogue lbcs_catalogue.write(delay_cals_file, format='csv') return ## else continue result = find_close_objs( lotss_catalogue, lbcs_catalogue, tolerance=match_tolerance ) ## check if there are any matches if len(result) == 0: logging.error('LoTSS and LBCS coverage exists, but no matches found. This indicates something went wrong, please check your catalogues.') return else: # add radius to the catalogue RATar, DECTar = grab_coo_MS(input2strlist_nomapfile(MSname)[0]) ptg_coords = SkyCoord( RATar, DECTar, frame='icrs', unit='deg' ) src_coords = SkyCoord( result['RA'], result['DEC'], frame='icrs', unit='deg' ) separations = src_coords.separation(ptg_coords ) seps = Column( separations.deg, name='Radius' ) result.add_column( seps ) ## order by radius from the phase centre result.sort( 'Radius' ) ## Write catalogues ## 1 - delay calibrators -- from lbcs_catalogue result.write( delay_cals_file, format='csv' ) print('Writing delay calibrator candidate file {:s}'.format(delay_cals_file)) ## sources to image -- first remove things that are already in the delay_cals_file good_index = [ x for x, src_id in enumerate( lotss_catalogue['Source_id'] ) if src_id not in result['Source_id'] ] tmp_cat = lotss_catalogue[good_index] ## make a flux cut image_index = np.where( tmp_cat['Peak_flux'] >= image_limit_Jy*1e3 )[0] flux_cut_sources = tmp_cat[image_index] ## make a radius cut src_coords = SkyCoord( flux_cut_sources['RA'], flux_cut_sources['DEC'], frame='icrs', unit='deg' ) separations = src_coords.separation( ptg_coords ) seps = Column( separations.deg, name='Radius' ) flux_cut_sources.add_column( seps ) good_idx = np.where( flux_cut_sources['Radius'] <= im_radius )[0] sources_to_image = flux_cut_sources[good_idx] nsrcs = float( len( sources_to_image ) ) print "There are "+str(nsrcs)+" sources above "+str(image_limit_Jy)+" mJy within "+str(im_radius)+" degrees of the phase centre." sources_to_image.write( lotss_result_file, format='csv' ) return
def finalize(self): """ Finalize this operation """ # Add output datamaps to direction object for later use self.direction.input_files_single_mapfile = os.path.join( self.pipeline_mapfile_dir, 'input_files_single.mapfile') self.direction.shifted_model_data_mapfile = os.path.join( self.pipeline_mapfile_dir, 'corrupt_final_model.mapfile') self.direction.diff_models_field_mapfile = os.path.join( self.pipeline_mapfile_dir, 'shift_diff_model_to_field.mapfile') self.direction.dir_indep_parmdbs_mapfile = os.path.join( self.pipeline_mapfile_dir, 'dir_indep_instrument_parmdbs.mapfile') self.direction.dir_indep_skymodels_mapfile = os.path.join( self.pipeline_mapfile_dir, 'full_skymodels.mapfile') self.direction.dir_indep_facet_skymodels_mapfile = os.path.join( self.pipeline_mapfile_dir, 'make_facet_skymodels_all.mapfile') self.direction.dir_dep_parmdb_mapfile = os.path.join( self.pipeline_mapfile_dir, 'merge_selfcal_parmdbs.mapfile') self.direction.selfcal_plots_mapfile = os.path.join( self.pipeline_mapfile_dir, 'make_selfcal_plots.mapfile') self.direction.facet_image_mapfile = os.path.join( self.pipeline_mapfile_dir, 'final_image.mapfile') self.direction.facet_model_mapfile = os.path.join( self.pipeline_mapfile_dir, 'final_model_rootnames.mapfile') self.direction.facet_premask_mapfile = os.path.join( self.pipeline_mapfile_dir, 'premask.mapfile') self.direction.wsclean_modelimg_size_mapfile = os.path.join( self.pipeline_mapfile_dir, 'pad_model_images.padsize.mapfile') self.direction.verify_subtract_mapfile = os.path.join( self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile') # Store results of verify_subtract check. This will work if the verification # was done using multiple bands although we use only one at the moment if os.path.exists(self.direction.verify_subtract_mapfile ) and not self.parset['skip_selfcal_check']: ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile) ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile] if all(ok_flags): self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False elif self.parset['skip_selfcal_check']: self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False # Delete all data used only for selfcal as they're no longer needed. # Note: we keep the data if selfcal failed verification, so that the user # can check them for problems self.direction.cleanup_mapfiles = [ os.path.join(self.pipeline_mapfile_dir, 'make_sourcedb_all_facet_sources.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'make_sourcedb_cal_facet_sources.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_averaged_input.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_pre_compressed.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_post_compressed.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'corrupt_final_model.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'predict_all_model_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'shift_cal.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_corr.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_blavg_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat0_input.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat1_input.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat2_input.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat3_input.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat4_input.mapfile') ] if not self.parset[ 'keep_avg_facet_data'] and self.direction.name != 'target': # Add averaged calibrated data for the facet to files to be deleted. # These are only needed if the user wants to reimage by hand (e.g., # with a different weighting). They are always kept for the target self.direction.cleanup_mapfiles.append( os.path.join(self.pipeline_mapfile_dir, 'concat_averaged_compressed.mapfile')) if not self.parset['keep_unavg_facet_data']: # Add unaveraged calibrated data for the facet to files to be deleted. # These are only needed if the user wants to phase shift them to # another direction (e.g., to combine several facets together before # imaging them all at once) self.direction.cleanup_mapfiles.append( os.path.join(self.pipeline_mapfile_dir, 'shift_empty.mapfile')) if self.direction.selfcal_ok or not self.parset[ 'exit_on_selfcal_failure']: self.log.debug('Cleaning up files (direction: {})'.format( self.direction.name)) self.direction.cleanup()
def plugin_main(args, **kwargs): """ Takes in list of targets and returns the appropriate one in a mapfile Knows which is the current target by storing target ID in a mapfile Outputs an expanded list of the current target Parameters ---------- mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile target_list: str List of all targets target_id: str Current Returns ------- result : dict Output datamap filename """ infile_map = kwargs['infile'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] outdir = kwargs['wd'] tick = int(kwargs['counter']) manual = kwargs['manual'] manual = manual.lower() in ['true','t','1'] data = DataMap.load(infile_map) # these are actual MS files datalist = [data[i].file for i in xrange(len(data))] if manual: target_file = kwargs['target_file'] pass try: nP = int(kwargs['nP']) # if the user has defined a different value, use it pass except: nP = 3 pass try: radius = float(kwargs['radius']) # if the user has defined a different value, use it pass except: radius = 2.5 pass ## if tick = 0, need to do the work to make directions files etc., otherwise just update the ticker fileid = os.path.join(mapfile_dir, filename) # this file holds all the output measurement sets bigfileid = os.path.join(mapfile_dir, filename + '_bigfield') # this big file holds all the directions if tick == 0: map_out_big = DataMap([]) if manual: with open(target_file, 'r') as f: # if the user has provided a list of targets, use it: otherwise use Lobos to find good targets for line in f: if 'RA' not in line: coords = (line.rstrip('\n')).split(',') map_out_big.data.append(DataProduct( '[\"'+coords[0]+'\",\"'+coords[1]+'\"]' , coords[2], False )) else: infile = ((DataMap.load(infile_map))[0]).file # get the actual filename from the map provided table = pyrap.tables.table(infile + '/FIELD', readonly = True) ra = math.degrees(float(table.getcol('PHASE_DIR')[0][0][0] ) % (2 * math.pi)) dec = math.degrees(float(table.getcol('PHASE_DIR')[0][0][-1])) table.close() hexcoords = SkyCoord(ra, dec, unit = 'degree', frame='fk5') hexcoords = hexcoords.to_string('hmsdms', sep=':') hexra = [hexcoords.split(' ')[0] for i in hexcoords] hexdec = [hexcoords.split(' ')[1] for i in hexcoords] if not os.path.isfile (outdir+'/lobos_stats.sum'): os.system ('wget http://www.jb.man.ac.uk/~njj/lobos_stats.sum -P '+outdir) lobos = np.loadtxt(outdir+'/lobos_stats.sum',dtype='S') for l in lobos: newcoords = SkyCoord(l[1],l[2], unit=(u.hourangle, u.deg),frame='fk5') new = np.array([newcoords.ra.degree,newcoords.dec.degree]) try: lobos_coord = np.vstack((lobos_coord,new)) except: lobos_coord = np.copy(new) a = correlate(np.array([[ra,dec]]),0,1,lobos_coord,0,1,radius) for i in np.asarray(a[:,1],dtype='int'): if lobos[i][5].count('P')>=nP: namera = lobos[i,1].replace(':','').split('.')[0] namedec = lobos[i,2].replace(':','').split('.')[0] dpppra = lobos[i,1].replace(':','h',1).replace(':','m',1)+'s' ## phase-shift RA dpppdec = lobos[i,2].replace(':','d',1).replace(':','m',1)+'s'## phase-shift DEC hemisphere = '-' if '-' in hexdec else '+' outfile = namera+hemisphere+namedec ## outfilename map_out_big.data.append(DataProduct('[\"'+dpppra+'\",\"'+dpppdec+'\"]', outfile, False )) map_out_big.save(bigfileid) # save all directions current_coords = map_out_big[0].host # save current direction current_name = map_out_big[0].file # save current filename n = len(map_out_big) else: data_big = DataMap.load(bigfileid) # load all directions current_coords = data_big[tick].host # save current direction current_name = data_big[tick].file n = len(data_big) # current progress map_out = DataMap([]) for msID, ms_file in enumerate(datalist): map_out.data.append(DataProduct( data[msID].host, '/'.join(data[msID].file.split('/')[:-1]) + '/' + current_name + '_' + data[msID].file.split('/')[-1], data[msID].skip)) pass map_out.save(fileid) # save all output measurement sets if (tick + 1) == n: # check how far the progress is do_break = True else: do_break = False #result = {'targetlist':bigfileid,'cords':current_coords,'cdir':current_name,'cdir_pattern':'*'+current_name+'*','ndir':int(n),'break':do_break} result = {'targetlist':bigfileid,'cords':current_coords,'ndir':int(n),'break':do_break,'mapfile':fileid} return result
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(imager_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) # TODO: DataMap extention # #Check if the input has equal length and on the same nodes # if not validate_data_maps(ms_map, parmdb_map): # self.logger.error("The combination of mapfiles failed validation:") # self.logger.error("ms_map: \n{0}".format(ms_map)) # self.logger.error("parmdb_map: \n{0}".format(parmdb_map)) # return 1 # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python %s" % (self.__file__.replace( "master", "nodes")) map_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y in zip(ms_map, parmdb_map, sourcedb_map): w.skip = x.skip = y.skip = (w.skip or x.skip or y.skip) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator for (idx, (ms, parmdb, sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)): #host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join( map_dir, "%s-%s_map_%s.map" % (host, idx, run_id)) MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id)) MultiDataMap([tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join( map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id)) MultiDataMap([tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) arguments = [ self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path ] jobs.append( ComputeJob(host, node_command, arguments, resources={"cores": self.inputs['nthreads']})) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): #if one of the nodes failed self.logger.error("One of the nodes failed while performing" "a BBS run. Aborting: concat.ms corruption") return 1 # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def _add_name(inmap, suffix): dmap = DataMap.load(inmap) for item in dmap: item.file += suffix return dmap
#parser.add_argument('-v','--verbose',help='More detailed information',action='store_true') #parser.add_argument('-f','--faillog',help='Name of a file which will contain a list of failed commands from the list.',default=None) #parser.add_argument('-N','--NumberOfTasks',help='Number of concurrent commands.',type=int,default=0) #parser.add_argument('-l','--Logs',help='Individual log files for each process.',action='store_true') #parser.add_argument('-R','--retry',help='Number of times failed commands should be retried after all commands ran through',type=int,default=-1) #parser.add_argument('-L','--low',help='Low index of the commandlist. Start from here.',type=int,default=0) #parser.add_argument('-H','--high',help='High index of the commandlist. End execution at this index',type=int,default=None) args = parser.parse_args() mm = MapfileManager() #print 'MAP: ', mm.map #mm.expand(args.number) #mm.from_parts(ntimes=args.number) mm.from_parts(data=['d1', 'd2', 'd3'], ntimes=args.number) dp = DataProduct('i am', 'last', False) dmtest = DataMap([dp]) mm.insert(2, {'host': 'i am', 'file': 'number two', 'skip': False}) mm.append(dp) print 'MAP: ', mm.data mm.save(args.name) dm = DataMap.load(args.name) print 'LOADED: ', dm md = MultiDataProduct('localhost', dm, False) md2 = MultiDataProduct('foreignhost', dm, False) print 'MULTIprod', md mm.append(md) print 'BLA: ', mm.data mdm = MultiDataMap([md]) print 'MULTIMAP: ', mdm mdm.split_list(1) print 'MULTIMAP SPLIT: ', mdm
def go(self): super(get_metadata, self).go() # ******************************************************************** # 1. Parse and validate inputs args = self.inputs['args'] product_type = self.inputs['product_type'] global_prefix = self.inputs['parset_prefix'] # Add a trailing dot (.) if not present in the prefix. if global_prefix and not global_prefix.endswith('.'): global_prefix += '.' if not product_type in self.valid_product_types: self.logger.warn( "Unknown product type: %s\n\tValid product types are: %s" % (product_type, ', '.join(self.valid_product_types)) ) # ******************************************************************** # 2. Load mapfiles self.logger.debug("Loading input-data mapfile: %s" % args[0]) data = DataMap.load(args[0]) # ******************************************************************** # 3. call node side of the recipe command = "python %s" % (self.__file__.replace('master', 'nodes')) data.iterator = DataMap.SkipIterator jobs = [] for inp in data: jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, self.inputs['product_type'] ] ) ) self._schedule_jobs(jobs) for job, inp in zip(jobs, data): if job.results['returncode'] != 0: inp.skip = True # ******************************************************************** # 4. validate performance # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) self.logger.debug("Updating data map file: %s" % args[0]) data.save(args[0]) # ******************************************************************** # 5. Create the parset-file and return it to the caller parset = parameterset() prefix = "Output_%s_" % product_type #Underscore is needed because # Mom / LTA cannot differentiate input and output parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs))) prefix = global_prefix + prefix for idx, job in enumerate(jobs): self.logger.debug("job[%d].results = %s" % (idx, job.results)) # the Master/node communication adds a monitor_stats entry, # this must be remove manually here meta_data_parset = metadata.to_parset(job.results) try: meta_data_parset.remove("monitor_stats") except: pass parset.adoptCollection(meta_data_parset, '%s[%d].' % (prefix, idx)) # Return result to caller parset.writeFile(self.inputs["metadata_file"]) return 0
class imaging_pipeline(control): """ The imaging pipeline is used to generate images and find sources in the generated images. Generated images and lists of found sources are complemented with meta data and thus ready for consumption by the Long Term Storage (LTA) This pipeline difference from the MSSS imaging pipeline on two aspects: 1. It does not by default perform any automated parameter determination for, the awimager. 2. It does not output images and sourcelist to the image server. *subband groups* The imager_pipeline is able to generate images on the frequency range of LOFAR in parallel. Combining the frequency subbands together in so called subbandgroups. Each subband group will result in an image and sourcelist, (typically 8, because ten subband groups are combined). *Time Slices* Images are compiled from a number of so-called (time) slices. Each slice comprises a short (approx. 10 min) observation of a field (an area on the sky) containing typically 80 subbands. The number of slices will be different for LBA observations (typically 9) and HBA observations (typically 2), due to differences in sensitivity. Each image will be compiled on a different cluster node to balance the processing load. The input- and output- files and locations are determined by the scheduler and specified in the parset-file. **This pipeline performs the following operations:** 1. Prepare Phase. Copy the preprocessed MS's from the different compute nodes to the nodes where the images will be compiled (the prepare phase) Combine the subbands in subband groups, concattenate the timeslice in a single large measurement set and perform flagging, RFI and bad station exclusion. 2. Create db. Generate a local sky model (LSM) from the global sky model (GSM) for the sources that are in the field-of-view (FoV). The LSM is stored as sourcedb. In step 3 calibration of the measurement sets is performed on these sources and in step 4 to create a mask for the awimager. The calibration solution will be placed in an instrument table/db also created in this step. 3. BBS. Calibrate the measurement set with the sourcedb from the gsm. In later iterations sourced found in the created images will be added to this list. Resulting in a selfcalibration cycle. 4. Awimager. The combined measurement sets are now imaged. The imaging is performed using a mask: The sources in the sourcedb are used to create an casa image masking known sources. Together with the measurement set an image is created. 5. Sourcefinding. The images created in step 4 are fed to pyBDSM to find and describe sources. In multiple itterations substracting the found sources, all sources are collectedin a sourcelist. Step I. The sources found in step 5 are fed back into step 2. This allows the Measurement sets to be calibrated with sources currently found in the image. This loop will continue until convergence (3 times for the time being). 6. Finalize. Meta data with regards to the input, computations performed and results are collected an added to the casa image. The images created are converted from casa to HDF5 and copied to the correct output location. 7. Export meta data: meta data is generated ready for consumption by the LTA and/or the LOFAR framework. **Per subband-group, the following output products will be delivered:** a. An image b. A source list c. (Calibration solutions and corrected visibilities) """ def __init__(self): """ Initialize member variables and call superclass init function """ control.__init__(self) self.input_data = DataMap() self.target_data = DataMap() self.output_data = DataMap() self.scratch_directory = None self.parset_dir = None self.mapfile_dir = None @mail_log_on_exception def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, source_list_map_path=source_list_map_path, skip_create_dbs=False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip=False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_loop, sourcedb_map_path, skip=False) # ***************************************************************** # (5) Source finding sourcelist_map, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_loop, skip=False) # should the output be a sourcedb? instead of a sourcelist # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize( aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) # ********************************************************************* # (7) Get metadata # Create a parset containing the metadata for MAC/SAS metadata_file = "%s_feedback_SkyImage" % (self.parset_file, ) self.run_task( "get_metadata", placed_data_image_map, parset_prefix=(full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts')), product_type="SkyImage", metadata_file=metadata_file) self.send_feedback_processing( parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0 def _get_io_product_specs(self): """ Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.') # convert input dataproducts from parset value to DataMap self.input_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip, ) for location, filename, skip in zip( dps.getStringVector('Input_Correlated.locations'), dps.getStringVector('Input_Correlated.filenames'), dps.getBoolVector('Input_Correlated.skip')) ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) self.output_data = DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip, ) for location, filename, skip in zip( dps.getStringVector('Output_SkyImage.locations'), dps.getStringVector('Output_SkyImage.filenames'), dps.getBoolVector('Output_SkyImage.skip')) ]) self.logger.debug("%d Output_SkyImage data products specified" % len(self.output_data)) # # Sanity checks on input- and output data product specifications # if not validate_data_maps(self.input_data, self.output_data): # raise PipelineException( # "Validation of input/output data product specification failed!" # )#Turned off untill DataMap is extended.. # Target data is basically scratch data, consisting of one concatenated # MS per image. It must be stored on the same host as the final image. self.target_data = copy.deepcopy(self.output_data) for idx, item in enumerate(self.target_data): item.file = os.path.join(self.scratch_directory, 'ms_per_image_%d' % idx, 'concat.ms') @xml_node def _finalize(self, awimager_output_map, processed_ms_dir, ms_per_image_map, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, sourcedb_map, skip=False): """ Perform the final step of the imager: Convert the output image to hdf5 and copy to output location Collect meta data and add to the image """ placed_image_mapfile = self._write_datamap_to_file( None, "placed_image") self.logger.debug("Touched mapfile for correctly placed" " hdf images: {0}".format(placed_image_mapfile)) if skip: return placed_image_mapfile else: # run the awimager recipe placed_image_mapfile = self.run_task( "imager_finalize", target_mapfile, awimager_output_map=awimager_output_map, ms_per_image_map=ms_per_image_map, sourcelist_map=sourcelist_map, sourcedb_map=sourcedb_map, minbaseline=minbaseline, maxbaseline=maxbaseline, target_mapfile=target_mapfile, output_image_mapfile=output_image_mapfile, processed_ms_dir=processed_ms_dir, placed_image_mapfile=placed_image_mapfile )["placed_image_mapfile"] return placed_image_mapfile @xml_node def _source_finding(self, image_map_path, major_cycle, skip=True): """ Perform the sourcefinding step """ # Create the parsets for the different sourcefinder runs bdsm_parset_pass_1 = self.parset.makeSubset("BDSM[0].") parset_path_pass_1 = self._write_parset_to_file( bdsm_parset_pass_1, "pybdsm_first_pass.par", "Sourcefinder first pass parset.") bdsm_parset_pass_2 = self.parset.makeSubset("BDSM[1].") parset_path_pass_2 = self._write_parset_to_file( bdsm_parset_pass_2, "pybdsm_second_pass.par", "sourcefinder second pass parset") # touch a mapfile to be filled with created sourcelists source_list_map = self._write_datamap_to_file( None, "source_finding_outputs", "map to sourcefinding outputs (sourcelist)") sourcedb_map_path = self._write_datamap_to_file( None, "source_dbs_outputs", "Map to sourcedbs based in found sources") # construct the location to save the output products of the # sourcefinder cycle_path = os.path.join(self.scratch_directory, "awimage_cycle_{0}".format(major_cycle)) catalog_path = os.path.join(cycle_path, "bdsm_catalog") sourcedb_path = os.path.join(cycle_path, "bdsm_sourcedb") # Run the sourcefinder if skip: return source_list_map, sourcedb_map_path else: self.run_task("imager_source_finding", image_map_path, bdsm_parset_file_run1=parset_path_pass_1, bdsm_parset_file_run2x=parset_path_pass_2, working_directory=self.scratch_directory, catalog_output_path=catalog_path, mapfile=source_list_map, sourcedb_target_path=sourcedb_path, sourcedb_map_path=sourcedb_map_path) return source_list_map, sourcedb_map_path @xml_node def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip=False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file( parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file( None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file( None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset=parset_path, instrument_mapfile=parmdbs_map_path, sourcedb_mapfile=sourcedb_map_path, mapfile=output_mapfile, working_directory=self.scratch_directory) return output_mapfile @xml_node def _aw_imager(self, prepare_phase_output, major_cycle, sky_path, skip=False): """ Create an image based on the calibrated, filtered and combined data. """ # Create parset for the awimage recipe parset = self.parset.makeSubset("AWimager.") # Get maxbaseline from 'full' parset max_baseline = self.parset.getInt("Imaging.maxbaseline") patch_dictionary = {"maxbaseline": str(max_baseline)} try: temp_parset_filename = patch_parset(parset, patch_dictionary) aw_image_parset = get_parset(temp_parset_filename) aw_image_parset_path = self._write_parset_to_file( aw_image_parset, "awimager_cycle_{0}".format(major_cycle), "Awimager recipe parset") finally: # remove tempfile os.remove(temp_parset_filename) # Create path to write the awimage files intermediate_image_path = os.path.join( self.scratch_directory, "awimage_cycle_{0}".format(major_cycle), "image") output_mapfile = self._write_datamap_to_file( None, "awimager", "output map for awimager recipe") mask_patch_size = self.parset.getInt("Imaging.mask_patch_size") auto_imaging_specs = self.parset.getBool("Imaging.auto_imaging_specs") fov = self.parset.getFloat("Imaging.fov") specify_fov = self.parset.getBool("Imaging.specify_fov") if skip: pass else: # run the awimager recipe self.run_task("imager_awimager", prepare_phase_output, parset=aw_image_parset_path, mapfile=output_mapfile, output_image=intermediate_image_path, mask_patch_size=mask_patch_size, sourcedb_path=sky_path, working_directory=self.scratch_directory, autogenerate_parameters=auto_imaging_specs, specify_fov=specify_fov, fov=fov) return output_mapfile, max_baseline @xml_node def _prepare_phase(self, input_ms_map_path, target_mapfile): """ Copy ms to correct location, combine the ms in slices and combine the time slices into a large virtual measurement set """ # Create the dir where found and processed ms are placed # ms_per_image_map_path contains all the original ms locations: # this list contains possible missing files processed_ms_dir = os.path.join(self.scratch_directory, "subbands") # get the parameters, create a subset for ndppp, save ndppp_parset = self.parset.makeSubset("DPPP.") ndppp_parset_path = self._write_parset_to_file( ndppp_parset, "prepare_imager_ndppp", "parset for ndpp recipe") # create the output file paths # [1] output -> prepare_output output_mapfile = self._write_datamap_to_file(None, "prepare_output") time_slices_mapfile = self._write_datamap_to_file( None, "prepare_time_slices") ms_per_image_mapfile = self._write_datamap_to_file( None, "ms_per_image") # get some parameters from the imaging pipeline parset: slices_per_image = self.parset.getInt("Imaging.slices_per_image") subbands_per_image = self.parset.getInt("Imaging.subbands_per_image") outputs = self.run_task("imager_prepare", input_ms_map_path, parset=ndppp_parset_path, target_mapfile=target_mapfile, slices_per_image=slices_per_image, subbands_per_image=subbands_per_image, mapfile=output_mapfile, slices_mapfile=time_slices_mapfile, ms_per_image_mapfile=ms_per_image_mapfile, working_directory=self.scratch_directory, processed_ms_dir=processed_ms_dir) # validate that the prepare phase produced the correct data output_keys = list(outputs.keys()) if not ('mapfile' in output_keys): error_msg = "The imager_prepare master script did not"\ "return correct data. missing: {0}".format('mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) if not ('slices_mapfile' in output_keys): error_msg = "The imager_prepare master script did not"\ "return correct data. missing: {0}".format( 'slices_mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) if not ('ms_per_image_mapfile' in output_keys): error_msg = "The imager_prepare master script did not"\ "return correct data. missing: {0}".format( 'ms_per_image_mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) # Return the mapfiles paths with processed data return output_mapfile, outputs["slices_mapfile"], ms_per_image_mapfile, \ processed_ms_dir @xml_node def _create_dbs(self, input_map_path, timeslice_map_path, source_list_map_path, skip_create_dbs=False): """ Create for each of the concatenated input measurement sets an instrument model and parmdb """ # Create the parameters set parset = self.parset.makeSubset("GSM.") # create the files that will contain the output of the recipe parmdbs_map_path = self._write_datamap_to_file( None, "parmdbs", "parmdbs output mapfile") sourcedb_map_path = self._write_datamap_to_file( None, "sky_files", "source db output mapfile") # run the master script if skip_create_dbs: pass else: self.run_task( "imager_create_dbs", input_map_path, monetdb_hostname=parset.getString("monetdb_hostname"), monetdb_port=parset.getInt("monetdb_port"), monetdb_name=parset.getString("monetdb_name"), monetdb_user=parset.getString("monetdb_user"), monetdb_password=parset.getString("monetdb_password"), assoc_theta=parset.getString("assoc_theta"), sourcedb_suffix=".sourcedb", slice_paths_mapfile=timeslice_map_path, parmdb_suffix=".parmdb", parmdbs_map_path=parmdbs_map_path, sourcedb_map_path=sourcedb_map_path, source_list_map_path=source_list_map_path, working_directory=self.scratch_directory) return parmdbs_map_path, sourcedb_map_path # TODO: Move these helpers to the parent class def _write_parset_to_file(self, parset, parset_name, message): """ Write the suplied the suplied parameterset to the parameter set directory in the jobs dir with the filename suplied in parset_name. Return the full path to the created file. """ parset_dir = os.path.join(self.config.get("layout", "job_directory"), "parsets") # create the parset dir if it does not exist create_directory(parset_dir) # write the content to a new parset file parset_path = os.path.join(parset_dir, "{0}.parset".format(parset_name)) parset.writeFile(parset_path) # display a debug log entrie with path and message self.logger.debug("Wrote parset to path <{0}> : {1}".format( parset_path, message)) return parset_path def _write_datamap_to_file(self, datamap, mapfile_name, message=""): """ Write the suplied the suplied map to the mapfile. directory in the jobs dir with the filename suplied in mapfile_name. Return the full path to the created file. Id supllied data is None then the file is touched if not existing, but existing files are kept as is """ mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") # create the mapfile_dir if it does not exist create_directory(mapfile_dir) # write the content to a new parset file mapfile_path = os.path.join(mapfile_dir, "{0}.map".format(mapfile_name)) # display a debug log entrie with path and message if datamap != None: datamap.save(mapfile_path) self.logger.debug("Wrote mapfile <{0}>: {1}".format( mapfile_path, message)) else: if not os.path.exists(mapfile_path): DataMap().save(mapfile_path) self.logger.debug("Touched mapfile <{0}>: {1}".format( mapfile_path, message)) return mapfile_path
def _create_mapfile_ato(inmap): return MultiDataMap(DataMap.load(inmap))
def plugin_main(args, **kwargs): """ Takes in a catalogue with a target and returns an appropriate mapfile Parameters ---------- mapfile_in: str mapfile_dir: str Directory for output mapfile filename: str Name of output mapfile target_file: str file containing target info Returns ------- result : dict Output datamap filename """ # parse the inputs infile_map = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] target_file = kwargs['target_file'] all_to_one = kwargs['all_to_one'].lower().capitalize() # the input data data = DataMap.load(infile_map) datalist = [data[i].file for i in xrange(len(data))] # outfile information fileid = os.path.join(mapfile_dir, filename) coordfileid = os.path.join(mapfile_dir, 'coords_' + filename) # initialise the output data map for the coordinates map_out_coords = DataMap([]) # read in the catalogue to get source_id, RA, and DEC t = Table.read(target_file, format='csv') RA_val = t['RA_LOTSS'].data[0] DEC_val = t['DEC_LOTSS'].data[0] Source_id = t['Source_id'].data[0] if str(Source_id)[0:1] == 'I': pass elif str(Source_id)[0:1] == 'S': pass else: Source_id = 'S' + str(Source_id) # make a string of coordinates for the NDPPP command ss = '["' + str(RA_val) + 'deg","' + str(DEC_val) + 'deg"]' # save the coordinate information map_out_coords.data.append(DataProduct(ss, Source_id, False)) map_out_coords.save(coordfileid) # save the coords to a variable to return current_coords = map_out_coords[0].host # get the name (source_id) current_name = map_out_coords[0].file # initialise an output data map map_out = DataMap([]) if all_to_one == 'True': msID = 0 ms_file = datalist[0] map_out.data.append( DataProduct( data[msID].host, '/'.join(data[msID].file.split('/')[:-1]) + '/' + current_name + '_' + data[msID].file.split('/')[-1], data[msID].skip)) else: print('HELLO HELLO HELLO') for msID, ms_file in enumerate(datalist): map_out.data.append( DataProduct( data[msID].host, '/'.join(data[msID].file.split('/')[:-1]) + '/' + current_name + '_' + data[msID].file.split('/')[-1], data[msID].skip)) # save the file map_out.save(fileid) result = { 'coordfile': coordfileid, 'coords': current_coords, 'name': current_name, 'mapfile': fileid } return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, mergeLastGroup=False, truncateLastSBs=True, firstSB=None): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. mergeLastGroup, truncateLastSBs : bool, optional mergeLastGroup = True, truncateLastSBs = True: not allowed mergeLastGroup = True, truncateLastSBs = False: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). mergeLastGroup = False, truncateLastSBs = True: ignore last files, that don't make for a full group (not all files are used). mergeLastGroup = False, truncateLastSBs = False: keep inclomplete last group, or - with NDPPPfill=True - fill last group with dummies. firstSB : int, optional If set, then reference the grouping of files to this station-subband. As if a file with this station-subband would be included in the input files. (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz) Returns ------- result : dict Dict with the name of the generated mapfile """ NDPPPfill = input2bool(NDPPPfill) mergeLastGroup = input2bool(mergeLastGroup) truncateLastSBs = input2bool(truncateLastSBs) firstSB = input2int(firstSB) numSB = int(numSB) if not filename or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: filename and mapfile_dir are needed!') if mergeLastGroup and truncateLastSBs: raise ValueError( 'sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!' ) # if mergeLastGroup: # raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError( 'sort_times_into_freqGroups: type of "ms_input" unknown!') if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on", len( ms_list), "files (including flagged files)." time_groups = {} # sort by time for i, ms in enumerate(ms_list): # work only on files selected by a previous step if ms.lower() != 'none': # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = { 'files': [ms], 'basename': os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found", len(time_groups), "time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True nchans = 0 for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0] nchans = sw.col('CHAN_WIDTH')[0].shape[0] chwidth = sw.col('CHAN_WIDTH')[0][0] freqset = set([freq]) first = False else: assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0] assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] assert chwidth == sw.col('CHAN_WIDTH')[0][0] freqset.add(freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs, time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" freqliste = np.array(list(freqset)) freqliste.sort() freq_width = np.min(freqliste[1:] - freqliste[:-1]) if file_bandwidth > freq_width: raise ValueError( "Bandwidth of files is larger than minimum frequency step between two files!" ) if file_bandwidth < (freq_width / 2.): raise ValueError( "Bandwidth of files is smaller than half the minimum frequency step between two files! (More than half the data is missing.)" ) #the new output map filemap = MultiDataMap() groupmap = DataMap() # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border maxfreq = np.max(freqliste) + freq_width * 0.51 if firstSB != None: minfreq = (float(firstSB) / 512. * 100e6) + 100e6 - freq_width / 2. if np.min(freqliste) < minfreq: raise ValueError( 'sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!' ) else: minfreq = np.min(freqliste) - freq_width / 2. groupBW = freq_width * numSB if groupBW < 1e6: print 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!' freqborders = np.arange(minfreq, maxfreq, groupBW) if mergeLastGroup: freqborders[-1] = maxfreq elif truncateLastSBs: pass #nothing to do! # left to make the logic more clear! elif not truncateLastSBs and NDPPPfill: freqborders = np.append(freqborders, (freqborders[-1] + groupBW)) elif not truncateLastSBs and not NDPPPfill: freqborders = np.append(freqborders, maxfreq) freqborders = freqborders[freqborders > (np.min(freqliste) - groupBW)] ngroups = len(freqborders) - 1 if ngroups == 0: raise ValueError( 'sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!' ) print "sort_times_into_freqGroups: Will create", ngroups, "group(s) with", numSB, "file(s) each." hostID = 0 for time in timestamps: (freq, fname) = time_groups[time]['freq_names'].pop(0) for groupIdx in xrange(ngroups): files = [] skip_this = True filefreqs_low = np.arange(freqborders[groupIdx], freqborders[groupIdx + 1], freq_width) for lower_freq in filefreqs_low: if freq > lower_freq and freq < lower_freq + freq_width: assert freq != 1e12 files.append(fname) if len(time_groups[time]['freq_names']) > 0: (freq, fname) = time_groups[time]['freq_names'].pop(0) else: (freq, fname) = (1e12, 'This_shouldn\'t_show_up') skip_this = False elif NDPPPfill: files.append('dummy.ms') if not skip_this: filemap.append( MultiDataProduct(hosts[hostID % numhosts], files, skip_this)) freqID = int( (freqborders[groupIdx] + freqborders[groupIdx + 1]) / 2e6) groupname = time_groups[time]['basename'] + '_%Xt_%dMHz.ms' % ( time, freqID) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path, os.path.basename(groupname)) groupmap.append( DataProduct(hosts[hostID % numhosts], groupname, skip_this)) orphan_files = len(time_groups[time]['freq_names']) if freq < 1e12: orphan_files += 1 if orphan_files > 0: print "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt." % ( orphan_files, time) filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename + '_groups') groupmap.save(groupmapname) # genertate map with edge-channels to flag flagmap = _calc_edge_chans(filemap, nchans) flagmapname = os.path.join(mapfile_dir, filename + '_flags') flagmap.save(flagmapname) result = { 'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname } return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' ) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] print "InitSubtract_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) numfiles = 0 for band in bands: print "InitSubtract_sort_and_compute.py: Working on Band:", band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes(float(cellsize_highres_deg), float(cellsize_lowres_deg), float(fieldsize_highres), float(fieldsize_lowres)) high_size_map.append( DataProduct('localhost', str(imsize_high_res) + " " + str(imsize_high_res), False)) low_size_map.append( DataProduct('localhost', str(imsize_low_res) + " " + str(imsize_low_res), False)) print "InitSubtract_sort_and_compute.py: Computing averaging steps." (freqstep, timestep) = bands[0].get_averaging_steps() # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'freqstep': freqstepname, 'timestep': timestepname } return result