def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append : str String to append append_index : bool If True, append a unique index to each file mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] if 'append_index' in kwargs: append_index = kwargs['append_index'] if type(append_index) is str: if append_index.lower() == 'true': append_index = True else: append_index = False else: append_index = False append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if append_index: map_out.data.append( DataProduct(item.host, item.file + append_str + '_{}'.format(i), item.skip)) else: map_out.data.append( DataProduct(item.host, item.file + append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def from_folder(self, folder, pattern=None, exclude_pattern=False): measurements = os.listdir(folder) measurements.sort() if pattern: rePattern = pattern.strip().replace('.', '\\.').replace('?', '.').replace('*', '.*') + '$' PatternReg = re.compile(rePattern) for ms in measurements: if pattern: if not exclude_pattern and PatternReg.match(ms): self._append(DataProduct('localhost', folder + '/' + ms, False)) elif exclude_pattern and not PatternReg.match(ms): self._append(DataProduct('localhost', folder + '/' + ms, False)) else: self._append(DataProduct('localhost', folder + '/' + ms, False))
def from_folder(self, folder, pattern=None, exclude_pattern=False): measurements = os.listdir(folder) measurements.sort() for ms in measurements: if pattern: if pattern in ms and not exclude_pattern: self._append( DataProduct('localhost', folder + '/' + ms, False)) elif pattern in ms and exclude_pattern: pass elif pattern not in ms and exclude_pattern: self._append( DataProduct('localhost', folder + '/' + ms, False)) else: self._append(DataProduct('localhost', folder + '/' + ms, False))
def from_parts(self, host='localhost', data='dummy', skip=False, ntimes=1): hostlist = self._input_to_list(host) datalist = self._input_to_list(data) skiplist = self._input_to_list(skip) if len(hostlist) is not len(datalist) or len(hostlist) is not len( skiplist) or len(hostlist) is not ntimes: print 'Length of parts is not equal. Will expand to max length given.' maxval = max(len(hostlist), len(datalist), len(skiplist), ntimes) lastval = hostlist[-1] if len(hostlist) is not maxval: for x in range(len(hostlist), maxval): hostlist.append(lastval) lastval = datalist[-1] if len(datalist) is not maxval: for x in range(len(datalist), maxval): datalist.append(lastval) lastval = skiplist[-1] if len(skiplist) is not maxval: for x in range(len(skiplist), maxval): skiplist.append(lastval) prodlist = [] for h, f, z in zip(hostlist, datalist, skiplist): prodlist.append(DataProduct(h, f, z)) self._set_data(prodlist)
def expand(self, number, hostlist=None, filelist=None): if hostlist: if len(hostlist) != number: print 'Error: length of hostlist should correspond to number of expansions' exit(1) else: print 'Info: no hostlist given. Will use "localhost" instead' hostlist = [] for item in range(number): hostlist.append('localhost') if filelist: if len(filelist) != number: print 'Error: length of hostlist should correspond to number of expansions' exit(1) else: print 'Info: no filelist given. Will use "dummy" instead' filelist = [] for item in range(number): filelist.append('dummy') prodlist = [] for h, f in zip(hostlist, filelist): prodlist.append(DataProduct(h, f, False)) self._set_data(prodlist)
def _create_mapfile_from_folder(folder): maps = DataMap([]) measurements = os.listdir(folder) measurements.sort() for ms in measurements: maps.data.append(DataProduct('localhost', folder + '/' + ms, False)) return maps
def plugin_main(args, **kwargs): """ Prunes entries from a mapfile Parameters ---------- mapfile_in : str Filename of datamap to trim prune_str : str Entries starting with this string will be removed. Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] prune_str = kwargs['prune_str'].lower() mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] prunelen = len(prune_str) map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if item.file[:prunelen].lower() != prune_str: map_out.data.append(DataProduct(item.host, item.file, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile for selfcal images (assuming standard naming conventions) Parameters ---------- selfcal_dir : str Full path of selfcal directory hosts : list or str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ selfcal_dir = kwargs['selfcal_dir'] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if os.path.exists(selfcal_dir): selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-MFS-image.fits')) tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-MFS-image.fits')) if len(tec_iter_images) == 0: tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-MFS-image.fits')) selfcal_images += tec_iter_images selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-MFS-image.fits')) selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-MFS-image.fits')) if len(selfcal_images) == 0: selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-image.fits')) tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-image.fits')) if len(tec_iter_images) == 0: tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-image.fits')) selfcal_images += tec_iter_images selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-image.fits')) selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-image.fits')) selfcal_images.sort() else: selfcal_images = [] # Save image list as a string to the output mapfile image_list = '[{0}]'.format(','.join(selfcal_images)) map_out = DataMap([]) map_out.data.append(DataProduct(hosts[0], image_list, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _set_file(self, data): try: if isinstance(data, list): self.file = data if isinstance(data, DataProduct): self._from_dataproduct(data) if isinstance(data, DataMap): self._from_datamap(data) except TypeError: raise DataProduct('No known method to set a filelist from %s' % str(file))
def plugin_main(args, **kwargs): """ Makes a mapfile by compressing input mapfile items into one item Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile list_format : bool, optional If True, the compreseed item will use a Python list format (e.g., '[file1, file2, ...]'. If False, it will be a space-separated list (e.g., 'file1 file2 ...' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'list_format' in kwargs: list_format = kwargs['list_format'] else: list_format = True if type(list_format) is str: if list_format.lower() == 'true': list_format = True else: list_format = False map_in = DataMap.load(mapfile_in) map_out = DataMap([]) map_in.iterator = DataMap.SkipIterator file_list = [item.file for item in map_in] if list_format: newlist = '[{0}]'.format(','.join(file_list)) else: newlist = '{0}'.format(' '.join(file_list)) # Just assign host of first file to compressed file hosts = [item.host for item in map_in] map_out.data.append(DataProduct(hosts[0], newlist, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _create_mapfile_ato(inmap): maps = DataMap([]) mapsin = DataMap.load(inmap) mapsin.iterator = DataMap.SkipIterator newlist = '' for i, item in enumerate(mapsin): newlist = newlist + item.file + ',' newlist = newlist.rstrip(',') newlist = '[' + newlist + ']' maps.data.append(DataProduct('localhost', newlist, False)) return maps
def plugin_main(args, **kwargs): """ Makes a mapfile for list of files Parameters ---------- files : list or str List of files or mapfile with such a list as the only entry. May be given as a list of strings or as a string (e.g., '[s1.skymodel, s2.skymodel]' hosts : list or str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ if type(kwargs['files']) is str: try: # Check if input is mapfile containing list as a string map_in = DataMap.load(kwargs['files']) in_files = [item.file for item in map_in] files = [] for f in in_files: files += f.strip('[]').split(',') except: files = kwargs['files'] files = files.strip('[]').split(',') files = [f.strip() for f in files] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] for i in range(len(files) - len(hosts)): hosts.append(hosts[i]) map_out = DataMap([]) for h, f in zip(hosts, files): map_out.data.append(DataProduct(h, f, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _create_mapfile_pythonlist(folder): maps = DataMap([]) measurements = os.listdir(folder) measurements.sort() msfull = '' for ms in measurements: msfull = msfull + os.path.join(folder, ms) + ',' msfull = msfull.rstrip(',') msfull = '[' + msfull + ']' maps.data.append(DataProduct('localhost', msfull, False)) return maps
def plugin_main(args, **kwargs): """ Trims a string from filenames in a mapfile Note that everything from the last instance of the matching string to the end is trimmed. Parameters ---------- mapfile_in : str Filename of datamap to trim trim_str : str String to remove mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile counter : int If counter is greater than 0, replace "image32" with "image42". This is a special argument for facetselfcal looping only Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] trim_str = kwargs['trim'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'counter' in kwargs: counter = int(kwargs['counter']) else: counter = 0 map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): index = item.file.rfind(trim_str) if index >= 0: item_trim = item.file[:index] if counter > 0: item_trim = item_trim.replace('image32', 'image42') map_out.data.append(DataProduct(item.host, item_trim, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _set_file(self, data): try: # Try parsing as a list if isinstance(data, list): self.file = data if isinstance(data, DataProduct): self._from_dataproduct(data) if isinstance(data, DataMap): self._from_datamap(data) except TypeError: raise (DataProduct("No known method to set a filelist from %s" % str(file)))
def _create_mapfile_list(folder): maps = DataMap([]) measurements = os.listdir(folder) measurements.sort() msfulll = [] msfull = '' for ms in measurements: msfulll.append(os.path.join(folder, ms)) #msfull.append(os.path.join(folder,ms).replace("'","")) msfull = msfull + os.path.join(folder, ms) + ' ' #msfull = msfull.rstrip(',') #msfull = '[' + msfull + ']' maps.data.append(DataProduct('localhost', msfull, False)) #maps.file = msfulll return maps
def _combine_local_map(inmap): map_out = DataMap([]) map_in = DataMap.load(inmap) map_in.iterator = DataMap.SkipIterator local_files = {} for item in map_in: if item.host in local_files: local_files[item.host] += item.file + ',' else: local_files[item.host] = item.file + ',' for k, v in local_files.iteritems(): v = v.rstrip(',') v = '[' + v + ']' map_out.data.append(DataProduct(k, v, False)) return map_out
def plugin_main(args, **kwargs): """ Copies each entry of mapfile_in as often as the the length of the corresponding group into a new mapfile Parameters ---------- mapfile_in : str Name of the input mapfile to be expanded. (E.g. with the skymodels for the different groups.) mapfile_groups : str Name of the multi-mapfile with the given groups. Number of groups need to be the same as the number of files in mapfile_in. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) if len(inmap) != len(groupmap): raise ValueError( 'PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'. format(kwargs['mapfile_in'], kwargs['mapfile_groups'])) map_out = DataMap([]) inindex = 0 for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): map_out.data.append( DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip))) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile by uncompressing input mapfile list item into separate items Parameters ---------- mapfile_in : str Filename of datamap containing list of MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile hosts : str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) files = map_in[0].file.strip('[]').split(',') files = [f.strip() for f in files] for i in range(len(files) - len(hosts)): hosts.append(hosts[i]) for file, host in zip(files, hosts): map_out.data.append(DataProduct(host, file, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Checks a "check" mapfile for values of 'None' and, if found, changes the input mapfile "file" to "empty". Note: the check and input mapfiles must have the same length Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_check : str Name of the mapfile to check for None mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) checkmap = DataMap.load(kwargs['mapfile_check']) if len(inmap) != len(checkmap): raise ValueError('Input and check mapfiles must have the same length') map_out = DataMap([]) for checkitem, item in zip(checkmap, inmap): if checkitem.file.lower() == 'none': map_out.data.append(DataProduct(item.host, 'empty', item.skip)) else: map_out.append(item) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _split_listmap(map_in, number): print 'MAP_IN: ', map_in map_out = DataMap([]) for item in map_in: filelist = ((item.file.rstrip(']')).lstrip('[')).split(',') chunks = [ filelist[i:i + number] for i in xrange(0, len(filelist), number) ] print 'FILELIST: ', filelist print 'CHUNKS: ', chunks for slist in chunks: for i, name in enumerate(slist): #print 'NAMEB: ', name slist[i] = '"' + name + '"' #print 'NAMEA: ', name print 'SLIST: ', slist map_out.data.append(DataProduct(item.host, slist, False)) return map_out
def plugin_main(args, **kwargs): """ Makes a mapfile by repeating max size in input mapfile items Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) # Find max size in x and y xsize_list = [] ysize_list = [] for item in map_in: xsize, ysize = [int(s) for s in item.file.split(' ')] xsize_list.append(xsize) ysize_list.append(ysize) maxsize = '{0} {1}'.format(max(xsize_list), max(ysize_list)) for item in map_in: map_out.data.append(DataProduct(item.host, maxsize, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _calc_edge_chans(inmap, numch, edgeFactor=32): """ Generates a map with strings that can be used as input for NDPPP to flag the edges of the input MSs during (or after) concatenation. inmap - MultiDataMap (not mapfilename!) with the files to be concatenated. numch - Number of channels per input file (All files are assumed to have the same number of channels.) edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end") """ outmap = DataMap([]) for group in inmap: flaglist = [] for i in xrange(len(group.file)): flaglist.extend(range(i*numch,i*numch+numch/edgeFactor)) flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch)) outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip)) print str(flaglist).replace(' ','') return outmap
def plugin_main(args, **kwargs): """ Makes a mapfile by filtering input mapfile items into one item (the middle one) Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) map_in.iterator = DataMap.SkipIterator files = [item.file for item in map_in] hosts = [item.host for item in map_in] if 'index' in kwargs: index = int(kwargs['index']) else: index = len(files) / 2 map_out.data.append(DataProduct(hosts[index], files[index], False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append_str : str String to append mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): map_out.data.append( DataProduct(item.host, item.file + append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile by expanding single input mapfile item into many items Parameters ---------- mapfile_in : str Filename of datamap containing single item mapfile_to_match : str Filename of datamap containing multiple items mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_to_match = kwargs['mapfile_to_match'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_match = DataMap.load(mapfile_to_match) map_out = DataMap([]) map_match.iterator = DataMap.SkipIterator for item in map_match: map_out.data.append(DataProduct(item.host, map_in[0].file, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Takes in mapfile_in, containing many files, and returns only one Parameters ---------- mapfile_in : str Parmdbs containing phase solutions mapfile_dir : str mapfile directory filename : str output filename mapfile_comp : str target MSs Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] mapfile_in = kwargs['mapfile_in'] mapfile_comp = kwargs['mapfile_comp'] filename = kwargs['filename'] value = DataMap.load(mapfile_in)[ 0] # this the the single mapfile to be expanded n = len(DataMap.load(mapfile_comp)) # these are actual MS files map_out = DataMap([]) for i in range(n): map_out.data.append(DataProduct(value.host, value.file, value.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Takes in mapfile_in, containing many files, and returns only one Parameters ---------- mapfile_in : str Name of the input mapfile to be trimmed back. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) if len(inmap) < 1: print('ERROR: GroupToSingle: mapfile_in has size < 1.') return (1) map_out = DataMap([]) map_out.data.append( DataProduct(inmap[0].host, inmap[0].file, inmap[0].skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, mergeLastGroup=False, truncateLastSBs=True, firstSB=None): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. mergeLastGroup, truncateLastSBs : bool, optional mergeLastGroup = True, truncateLastSBs = True: not allowed mergeLastGroup = True, truncateLastSBs = False: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). mergeLastGroup = False, truncateLastSBs = True: ignore last files, that don't make for a full group (not all files are used). mergeLastGroup = False, truncateLastSBs = False: keep inclomplete last group, or - with NDPPPfill=True - fill last group with dummies. firstSB : int, optional If set, then reference the grouping of files to this station-subband. As if a file with this station-subband would be included in the input files. (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz) Returns ------- result : dict Dict with the name of the generated mapfile """ NDPPPfill = input2bool(NDPPPfill) mergeLastGroup = input2bool(mergeLastGroup) truncateLastSBs = input2bool(truncateLastSBs) firstSB = input2int(firstSB) numSB = int(numSB) if not filename or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: filename and mapfile_dir are needed!') if mergeLastGroup and truncateLastSBs: raise ValueError( 'sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!' ) # if mergeLastGroup: # raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError( 'sort_times_into_freqGroups: type of "ms_input" unknown!') if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print("sort_times_into_freqGroups: Working on", len(ms_list), "files (including flagged files).") time_groups = {} # sort by time for i, ms in enumerate(ms_list): # work only on files selected by a previous step if ms.lower() != 'none': # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = { 'files': [ms], 'basename': os.path.splitext(ms)[0] } print("sort_times_into_freqGroups: found", len(time_groups), "time-groups") # sort time-groups by frequency timestamps = list(time_groups.keys()) timestamps.sort() # not needed now, but later first = True nchans = 0 for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0] nchans = sw.col('CHAN_WIDTH')[0].shape[0] chwidth = sw.col('CHAN_WIDTH')[0][0] freqset = set([freq]) first = False else: assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0] assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] assert chwidth == sw.col('CHAN_WIDTH')[0][0] freqset.add(freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = list( zip(freqs, time_groups[time]['files'])) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print( "sort_times_into_freqGroups: Collected the frequencies for the time-groups" ) freqliste = np.array(list(freqset)) freqliste.sort() freq_width = np.min(freqliste[1:] - freqliste[:-1]) if file_bandwidth > freq_width: raise ValueError( "Bandwidth of files is larger than minimum frequency step between two files!" ) if file_bandwidth < (freq_width * 0.51): #raise ValueError("Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)") logging.warning( "Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)" ) #the new output map filemap = MultiDataMap() groupmap = DataMap() # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border maxfreq = np.max(freqliste) + freq_width * 0.51 if firstSB != None: if freqliste[0] < 100e6: # LBA Data minfreq = (float(firstSB) / 512. * 100e6) - freq_width / 2. elif freqliste[0] > 100e6 and freqliste[0] < 200e6: # HBA-Low minfreq = (float(firstSB) / 512. * 100e6) + 100e6 - freq_width / 2. elif freqliste[0] > 200e6 and freqliste[0] < 300e6: # HBA-high minfreq = (float(firstSB) / 512. * 100e6) + 200e6 - freq_width / 2. else: raise ValueError( 'sort_times_into_freqGroups: Frequency of lowest input data is higher than 300MHz!' ) if np.min(freqliste) < minfreq: raise ValueError( 'sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!' ) else: minfreq = np.min(freqliste) - freq_width / 2. groupBW = freq_width * numSB if groupBW < 1e6 and groupBW > 0: print( 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!' ) if groupBW < 0: # this is the case for concatenating all subbands groupBW = maxfreq - minfreq truncateLastSBs = input2bool(False) NDPPPfill = input2bool(True) freqborders = np.arange(minfreq, maxfreq, groupBW) if mergeLastGroup: freqborders[-1] = maxfreq elif truncateLastSBs: pass #nothing to do! # left to make the logic more clear! elif not truncateLastSBs and NDPPPfill: freqborders = np.append(freqborders, (freqborders[-1] + groupBW)) elif not truncateLastSBs and not NDPPPfill: freqborders = np.append(freqborders, maxfreq) freqborders = freqborders[freqborders > (np.min(freqliste) - groupBW)] ngroups = len(freqborders) - 1 if ngroups == 0: raise ValueError( 'sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!' ) print("sort_times_into_freqGroups: Will create", ngroups, "group(s) with", numSB, "file(s) each.") hostID = 0 for time in timestamps: (freq, fname) = time_groups[time]['freq_names'].pop(0) for groupIdx in range(ngroups): files = [] skip_this = True filefreqs_low = np.arange(freqborders[groupIdx], freqborders[groupIdx + 1], freq_width) for lower_freq in filefreqs_low: if freq > lower_freq and freq < lower_freq + freq_width: assert freq != 1e12 files.append(fname) if len(time_groups[time]['freq_names']) > 0: (freq, fname) = time_groups[time]['freq_names'].pop(0) else: (freq, fname) = (1e12, 'This_shouldn\'t_show_up') skip_this = False elif NDPPPfill: files.append('dummy.ms') if not skip_this: filemap.append( MultiDataProduct(hosts[hostID % numhosts], files, skip_this)) freqID = int( (freqborders[groupIdx] + freqborders[groupIdx + 1]) / 2e6) groupname = time_groups[time]['basename'] + '_%Xt_%dMHz.ms' % ( time, freqID) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path, os.path.basename(groupname)) groupmap.append( DataProduct(hosts[hostID % numhosts], groupname, skip_this)) orphan_files = len(time_groups[time]['freq_names']) if freq < 1e12: orphan_files += 1 if orphan_files > 0: print( "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt." % (orphan_files, time)) filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename + '_groups') groupmap.save(groupmapname) # genertate map with edge-channels to flag flagmap = _calc_edge_chans(filemap, nchans) flagmapname = os.path.join(mapfile_dir, filename + '_flags') flagmap.save(flagmapname) result = { 'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname } return (result)
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' ) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] print "InitSubtract_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 for band in bands: print "InitSubtract_sort_and_compute.py: Working on Band:", band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes( cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) imsize_high_res_stretch = band.get_optimum_size( int(imsize_high_res * y_axis_stretch)) high_size_map.append( DataProduct( 'localhost', str(imsize_high_res) + " " + str(imsize_high_res_stretch), False)) imsize_low_res_stretch = band.get_optimum_size( int(imsize_low_res * y_axis_stretch)) low_size_map.append( DataProduct( 'localhost', str(imsize_low_res) + " " + str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size( int(imsize_high_res * image_padding)) imsize_high_pad_stretch = band.get_optimum_size( int(imsize_high_res * image_padding * y_axis_stretch)) high_paddedsize_map.append( DataProduct( 'localhost', str(imsize_high_pad) + " " + str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size( int(imsize_low_res * image_padding)) imsize_low_pad_stretch = band.get_optimum_size( int(imsize_low_res * image_padding * y_axis_stretch)) low_paddedsize_map.append( DataProduct( 'localhost', str(imsize_low_pad) + " " + str(imsize_low_pad_stretch), False)) print "InitSubtract_sort_and_compute.py: Computing averaging steps." (freqstep, timestep) = bands[0].get_averaging_steps() # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname + '_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname + '_low_padded_size') low_paddedsize_map.save(low_padsize_name) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'high_padsize_mapfile': high_padsize_name, 'low_padsize_mapfile': low_padsize_name, 'freqstep': freqstepname, 'timestep': timestepname } return result