def plugin_main(args, **kwargs): """ Matches a mapfile with one in which the MSs are distributed Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dist : str Filename of mapfile with distributed MS files mapfile_full : str Filename of mapfile with all MS files from which distributed one was made mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dist = kwargs['mapfile_dist'] mapfile_full = kwargs['mapfile_full'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_dist = DataMap.load(mapfile_dist) map_dist.iterator = DataMap.SkipIterator map_full = DataMap.load(mapfile_full) map_full.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} # find matches all_files_hosts = [(item.file, item.host) for item in map_full] dist_files = [item.file for item in map_dist] for i, (f, h) in enumerate(all_files_hosts): if f in dist_files: map_out.append(DataProduct(h, map_in[i].file, False)) map_out.save(fileid) del (map_in) del (map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Matches a mapfile with one in which the MSs are distributed Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dist : str Filename of mapfile with distributed MS files mapfile_full : str Filename of mapfile with all MS files from which distributed one was made mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dist = kwargs['mapfile_dist'] mapfile_full = kwargs['mapfile_full'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_dist = DataMap.load(mapfile_dist) map_dist.iterator = DataMap.SkipIterator map_full = DataMap.load(mapfile_full) map_full.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} # find matches all_files_hosts = [(item.file, item.host) for item in map_full] dist_files = [item.file for item in map_dist] for i, (f, h) in enumerate(all_files_hosts): if f in dist_files: map_out.append(DataProduct(h, map_in[i].file, False)) map_out.save(fileid) del(map_in) del(map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile with the MSs spread across the full bandwidth Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'num' in kwargs: num = int(kwargs['num']) else: num = 6 fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: # Get the frequency info from the MS file sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # select frequencies freqs = freq_groups.keys() freqs.sort() num_freqs = len(freqs) if num > num_freqs: print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.'%(num, num_freqs) num = num_freqs dist_ind = get_distributed_indices(0, num_freqs-1, num) selfreqs = [freqs[ind] for ind in dist_ind] if len(selfreqs) < 1: print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band." raise ValueError("Selected fewer than one frequency band.") all_files = [] for selfreq in selfreqs: all_files.extend(freq_groups[selfreq]) # extend the hosts-list for i in range(len(all_files)-len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host,fname) in zip(hosts,all_files): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del(map_in) del(map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile with the MSs spread across the full bandwidth Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'num' in kwargs: num = int(kwargs['num']) else: num = 6 fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: # Get the frequency info from the MS file sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # select frequencies freqs = freq_groups.keys() freqs.sort() num_freqs = len(freqs) if num > num_freqs: print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.' % ( num, num_freqs) num = num_freqs dist_ind = get_distributed_indices(0, num_freqs - 1, num) selfreqs = [freqs[ind] for ind in dist_ind] if len(selfreqs) < 1: print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band." raise ValueError("Selected fewer than one frequency band.") all_files = [] for selfreq in selfreqs: all_files.extend(freq_groups[selfreq]) # extend the hosts-list for i in range(len(all_files) - len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host, fname) in zip(hosts, all_files): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del (map_in) del (map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile with only the MSs at the middle Frequency Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile index: int, optional Index of the frequency band to use. Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'include' in kwargs: include = kwargs['include'] else: include = None fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: if include is not None: if include in item.file: # Get the frequency info sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) else: # Get the frequency info sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # find maximum number of files per frequency-group maxfiles = max([len(group) for group in freq_groups.values()]) # find the center-frequency freqs = freq_groups.keys() freqs.sort() selfreq = freqs[len(freqs)/2] if 'index' in kwargs: selfreq = int(kwargs['index']) else: # make sure that chosen frequncy has maxfiles entries while len(freq_groups[selfreq]) < maxfiles: freqs.remove(selfreq) selfreq = freqs[len(freqs)/2] # extend the hosts-list for i in range(len(freq_groups[selfreq])-len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host,fname) in zip(hosts,freq_groups[selfreq]): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del(map_in) del(map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile with only the MSs at the middle Frequency Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile index: int, optional Index of the frequency band to use. Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'include' in kwargs: include = kwargs['include'] else: include = None fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: if include is not None: if include in item.file: # Get the frequency info sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) else: # Get the frequency info sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # find maximum number of files per frequency-group maxfiles = max([len(group) for group in freq_groups.values()]) # find the center-frequency freqs = freq_groups.keys() freqs.sort() selfreq = freqs[len(freqs) / 2] if 'index' in kwargs: selfreq = int(kwargs['index']) else: # make sure that chosen frequncy has maxfiles entries while len(freq_groups[selfreq]) < maxfiles: freqs.remove(selfreq) selfreq = freqs[len(freqs) / 2] # extend the hosts-list for i in range(len(freq_groups[selfreq]) - len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host, fname) in zip(hosts, freq_groups[selfreq]): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del (map_in) del (map_out) result = {'mapfile': fileid} return result