Ejemplo n.º 1
0
    def test_append_item_non_skip(self):
        data_map = DataMap(self.new_style_map)
        data_map.append(("host","file", False))

        data_map.iterator = DataMap.TupleIterator
        tuples = [item for item in data_map]
        self.assertEqual(len(tuples), 5)
        self.assertTrue(all(isinstance(item, tuple) for item in tuples))
        self.assertTrue(all(len(item) == 2 for item in tuples))
        self.assertEqual(tuples[-1], ('host', 'file'))
Ejemplo n.º 2
0
    def test_append_item_non_skip(self):
        data_map = DataMap(self.new_style_map)
        data_map.append(("host", "file", False))

        data_map.iterator = DataMap.TupleIterator
        tuples = [item for item in data_map]
        self.assertEqual(len(tuples), 5)
        self.assertTrue(all(isinstance(item, tuple) for item in tuples))
        self.assertTrue(all(len(item) == 2 for item in tuples))
        self.assertEqual(tuples[-1], ('host', 'file'))
Ejemplo n.º 3
0
    def test_append_item_skip(self):
        data_map = DataMap(self.new_style_map)
        data_map.append(("host", "file", True))

        data_map.iterator = DataMap.SkipIterator
        dataProducts = [item for item in data_map]
        # default contains 2 nonskipped items
        self.assertEqual(len(dataProducts), 2)
        self.assertTrue(
            all(isinstance(item, DataProduct) for item in dataProducts))
        # The map already contains 2 skipped items, the final item is tested
        # here
        self.assertEqual(dataProducts[-1].host, 'locus004')
        self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS')
Ejemplo n.º 4
0
    def test_append_item_skip(self):
        data_map = DataMap(self.new_style_map)
        data_map.append(("host","file", True))

        data_map.iterator = DataMap.SkipIterator
        dataProducts = [item for item in data_map]
        # default contains 2 nonskipped items
        self.assertEqual(len(dataProducts), 2) 
        self.assertTrue(all(isinstance(item, DataProduct) 
                        for item in dataProducts))
        # The map already contains 2 skipped items, the final item is tested 
        # here
        self.assertEqual(dataProducts[-1].host, 'locus004')
        self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS')
Ejemplo n.º 5
0
def plugin_main(args, **kwargs):
    """
    Checks a "check" mapfile for values of 'None' and, if found, changes the
    input mapfile "file" to "empty".

    Note: the check and input mapfiles must have the same length

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_check : str
        Name of the mapfile to check for None
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    checkmap = DataMap.load(kwargs['mapfile_check'])

    if len(inmap) != len(checkmap):
        raise ValueError('Input and check mapfiles must have the same length')

    map_out = DataMap([])
    for checkitem, item in zip(checkmap, inmap):
        if checkitem.file.lower() == 'none':
            map_out.data.append(DataProduct(item.host, 'empty', item.skip))
        else:
            map_out.append(item)

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Selects those files from mapfile_in that have the same filename-base as the one in
    mapfile_reference.

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_reference : str
        Name of the reference mapfile
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    refmap = DataMap.load(kwargs['mapfile_reference'])

    map_out = DataMap([])

    basenames = [
        os.path.splitext(os.path.basename(item.file))[0] for item in inmap
    ]
    for refitem in refmap:
        refbase = os.path.splitext(os.path.basename(refitem.file))[0]
        idx = basenames.index(refbase)
        map_out.append(inmap[idx])

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 7
0
def _calc_edge_chans(inmap, numch, edgeFactor=32):
    """
    Generates a map with strings that can be used as input for NDPPP to flag the edges 
    of the input MSs during (or after) concatenation.
    
    inmap      - MultiDataMap (not mapfilename!) with the files to be concatenated.
    numch      - Number of channels per input file (All files are assumed to have the same number 
                 of channels.)
    edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. 
                 (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end")
    """
    outmap = DataMap([])
    for group in inmap:
        flaglist = []
        for i in xrange(len(group.file)):
            flaglist.extend(range(i*numch,i*numch+numch/edgeFactor))
            flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch))
        outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip))
        print str(flaglist).replace(' ','')
    return outmap
def plugin_main(args, **kwargs):
    """
    Selects those files from mapfile_in that have the same filename-base as the one in
    mapfile_reference.

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_reference : str
        Name of the reference mapfile
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    refmap = DataMap.load(kwargs['mapfile_reference'])

    map_out = DataMap([])

    basenames = [ os.path.splitext(os.path.basename(item.file))[0] for item in inmap]
    for refitem in refmap:
        refbase = os.path.splitext(os.path.basename(refitem.file))[0]
        idx = basenames.index(refbase)
        map_out.append(inmap[idx])

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 9
0
def _sort_ObsSB(Mapfile, SBperGroup, filllNDPPPdummies=False, mergeLastGroup=False, startFromZero=False, 
                truncateLastSBs=False, cdir=''):
    """
    Sorts the MSs in "inmap" into groups with "SBperGroup" consecutive subband numbers for each 
    observation number. Expects files that contain a string like: "L123456_SAP007_SB890" 
    or: "L123456_SB890". The files in the groups will be sorted by subband number.
    The hostname is taken from the first file found for each group.

    Mapfile           - Name of the mapfile with the input data, should contain single MSs
    SBperGroup        - Group that many subbands into one group.
    filllNDPPPdummies - If True, add dummy file-names for missing subbands, so that NDPPP can
                        fill the data with flagged dummy data.
    mergeLastGroup    - True: put the files from the last group that doesn't have SBperGroup subbands 
                        into the second last group (which will then have more than SBperGroup entries). 
                        False: keep inclomplete last group, or - with filllNDPPPdummies=True - fill
                        last group with dummies.
    startFromZero     - Start grouping with 0, even if there is no SB000 in the map.
    """
    if mergeLastGroup and truncateLastSBs:
        raise ValueError('_sort_ObsSB: Can either merge the last partial group or truncate at last full group, not both!')
    inmap = MiniMapfileManager.load(Mapfile)
    # I'm going to need that:
    PatternReg = re.compile(r'(L\d+)(_SAP\d+)?_(SB\d+)')
    # get smallest and largest subband number: sort by subband and get SB-number of first and last entry
    inmap.sortMS_by_SBnum()
    if not startFromZero:
        minSB = int(PatternReg.search(inmap.data[0].file).group(3)[2:])
    else:
        minSB = 0
    maxSB = int(PatternReg.search(inmap.data[-1].file).group(3)[2:])
    # sort data into dictionary
    sortDict = {}

    for item in inmap.data:
        if not item.skip:
            regerg = PatternReg.search(item.file)
            Obs = regerg.group(1)
            SBnum = int(regerg.group(3)[2:])
            SBgroup = int((SBnum-minSB)/SBperGroup)
            if not Obs in sortDict:
                sortDict[Obs] = { }
            if not SBgroup in sortDict[Obs]:
                replacestring = Obs+'_SBgr%03d-%d_%s' %(SBgroup,SBperGroup,cdir)
                reffile = PatternReg.sub(replacestring,item.file,1)
                sortDict[Obs][SBgroup] = { 'host' : item.host , 'files' : [], 'firstfile' : reffile }
            #the data is sorted by SBnum, so if files with lower SBnum are not already 
            #in the list, then they are missing!
            while filllNDPPPdummies and len(sortDict[Obs][SBgroup]['files']) < ((SBnum-minSB) % SBperGroup) :
                sortDict[Obs][SBgroup]['files'].append('dummy_entry')
            sortDict[Obs][SBgroup]['files'].append(item.file)
    # now go through the dictionary and put the data into the new map

    newmap = MultiDataMap([])
    firstfileMap = DataMap([])
    numGroups = (maxSB-minSB+1)/SBperGroup
    SBs_in_last = (maxSB-minSB+1)%SBperGroup
    obsNames = sortDict.keys()
    obsNames.sort()
    for obs in obsNames:
        obsDict = sortDict[obs]
        for SBgroup in xrange(numGroups-1):
            if SBgroup in obsDict:
                while filllNDPPPdummies and len(obsDict[SBgroup]['files']) < SBperGroup :
                    obsDict[SBgroup]['files'].append('dummy_entry')
                newmap.append(MultiDataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['files'], False))
                firstfileMap.append(DataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['firstfile'], False))
        #work on last full group
        SBgroup = numGroups-1
        if SBgroup in obsDict:
            while filllNDPPPdummies and len(obsDict[SBgroup]['files']) < SBperGroup :
                obsDict[SBgroup]['files'].append('dummy_entry')
        if mergeLastGroup and SBs_in_last != 0:
            lastList = []
            if numGroups in obsDict:
                lastList = obsDict[numGroups]['files']
            while filllNDPPPdummies and len(lastList) < SBs_in_last :
                lastList.append('dummy_entry')
            obsDict[SBgroup]['files'].extend(lastList)
        newmap.append(MultiDataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['files'], False))
        firstfileMap.append(DataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['firstfile'], False))
        #need to process incomplete group
        if SBs_in_last != 0 and not mergeLastGroup and not truncateLastSBs:
            if numGroups in obsDict:
                while filllNDPPPdummies and len(obsDict[numGroups]['files']) < SBs_in_last :
                    obsDict[numGroups]['files'].append('dummy_entry')
                newmap.append(MultiDataProduct(obsDict[numGroups]['host'], obsDict[numGroups]['files'], False))
                firstfileMap.append(DataProduct(obsDict[numGroups]['host'], obsDict[numGroups]['firstfile'], False))
    #That should be it!
    return (newmap,firstfileMap)
def plugin_main(args, **kwargs):
    """
    Reads in closure phase file and returns the best delay calibrator mapfile

    Parameters
    ----------
    mapfile_dir : str
        Directory for output mapfile
    closurePhaseMap: str
        Name of output mapfile
    closurePhase_file: str
	Name of file with closure phase scatter

    Returns
    -------
    result : dict
        Output datamap closurePhaseFile

    """
    mapfile_dir = kwargs['mapfile_dir']
    mapfile_out = kwargs['mapfile_out']
    closurePhaseMap = kwargs['closurePhaseMap']

    fileid = os.path.join(
        mapfile_dir,
        mapfile_out)  # this file holds all the output measurement sets

    with open(closurePhaseMap, 'r') as f:
        lines = f.readlines()
    f.close()
    closurePhaseFile = lines[0].split(',')[1].split(':')[1].strip().strip("'")

    # read the file
    with open(closurePhaseFile, 'r') as f:
        lines = f.readlines()
    f.close()

    ## get lists of directions and scatter
    direction = []
    scatter = []
    for l in lines:
        direction.append(l.split()[4])
        scatter.append(np.float(l.split()[6]))

    ## convert to numpy arrays
    direction = np.asarray(direction)
    scatter = np.asarray(scatter)

    ## find the minimum scatter
    if len(scatter) > 1:
        min_scatter_index = np.where(scatter == np.min(scatter))[0]
        best_calibrator = direction[min_scatter_index[0]]
    else:
        best_calibrator = direction[0][0]

    job_dir = closurePhaseFile.replace('closure_phases.txt', '')

    delayCal = job_dir + best_calibrator + '*' + 'concat'

    delay_ms = glob.glob(delayCal)[0]

    map_out = DataMap([])
    map_out.append(DataProduct('localhost', delay_ms, False))

    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None,
         mergeLastGroup=False, truncateLastSBs=True, firstSB=None):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional 
        How many files should go into one frequency group. Values <= 0 mean put 
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files 
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.
    mergeLastGroup, truncateLastSBs : bool, optional
        mergeLastGroup = True, truncateLastSBs = True:
          not allowed
        mergeLastGroup = True, truncateLastSBs = False:
          put the files from the last group that doesn't have SBperGroup subbands 
          into the second last group (which will then have more than SBperGroup entries). 
        mergeLastGroup = False, truncateLastSBs = True:
          ignore last files, that don't make for a full group (not all files are used).
        mergeLastGroup = False, truncateLastSBs = False:
          keep inclomplete last group, or - with NDPPPfill=True - fill
          last group with dummies.      
    firstSB : int, optional
        If set, then reference the grouping of files to this station-subband. As if a file 
        with this station-subband would be included in the input files.
        (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz)

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """

    NDPPPfill = input2bool(NDPPPfill)
    mergeLastGroup = input2bool(mergeLastGroup)
    truncateLastSBs = input2bool(truncateLastSBs)
    firstSB = input2int(firstSB)
    numSB = int(numSB)

    if not filename or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!')
    if mergeLastGroup and truncateLastSBs:
        raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!')
#    if mergeLastGroup:
#        raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))  
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!')

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on",len(ms_list),"files (including flagged files)."

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # work only on files selected by a previous step
        if ms.lower() != 'none':
            # use the slower but more reliable way:
            obstable = pt.table(ms, ack=False)
            timestamp = int(round(np.min(obstable.getcol('TIME'))))
            #obstable = pt.table(ms+'::OBSERVATION', ack=False)
            #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
            obstable.close()
            if timestamp in time_groups:
                time_groups[timestamp]['files'].append(ms)
            else:
                time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] }
    print "sort_times_into_freqGroups: found",len(time_groups),"time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()   # not needed now, but later
    first = True
    nchans = 0
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]            
            if first:
                file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0]
                nchans = sw.col('CHAN_WIDTH')[0].shape[0]
                chwidth = sw.col('CHAN_WIDTH')[0][0]
                freqset = set([freq])
                first = False
            else:
                assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0]
                assert nchans == sw.col('CHAN_WIDTH')[0].shape[0]
                assert chwidth == sw.col('CHAN_WIDTH')[0][0]
                freqset.add(freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    freqliste = np.array(list(freqset))
    freqliste.sort()
    freq_width = np.min(freqliste[1:]-freqliste[:-1])
    if file_bandwidth > freq_width:
        raise ValueError("Bandwidth of files is larger than minimum frequency step between two files!")
    if file_bandwidth < (freq_width/2.):
        raise ValueError("Bandwidth of files is smaller than half the minimum frequency step between two files! (More than half the data is missing.)")
    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border
    maxfreq = np.max(freqliste)+freq_width*0.51
    if firstSB != None:
        minfreq = (float(firstSB)/512.*100e6)+100e6-freq_width/2.
        if np.min(freqliste) < minfreq:
            raise ValueError('sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!')
    else:
        minfreq = np.min(freqliste)-freq_width/2.
    groupBW = freq_width*numSB
    if groupBW < 1e6:
        print 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!'
    freqborders = np.arange(minfreq,maxfreq,groupBW)
    if mergeLastGroup:
        freqborders[-1] = maxfreq
    elif truncateLastSBs:
        pass #nothing to do! # left to make the logic more clear!
    elif not truncateLastSBs and NDPPPfill:
        freqborders = np.append(freqborders,(freqborders[-1]+groupBW))
    elif not truncateLastSBs and not NDPPPfill:
        freqborders = np.append(freqborders,maxfreq)

    freqborders = freqborders[freqborders>(np.min(freqliste)-groupBW)]
    ngroups = len(freqborders)-1
    if ngroups == 0:
        raise ValueError('sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!')
    
    print "sort_times_into_freqGroups: Will create",ngroups,"group(s) with",numSB,"file(s) each."

    hostID = 0
    for time in timestamps:
        (freq,fname) = time_groups[time]['freq_names'].pop(0)
        for groupIdx in xrange(ngroups):
            files = []
            skip_this = True
            filefreqs_low = np.arange(freqborders[groupIdx],freqborders[groupIdx+1],freq_width)
            for lower_freq in filefreqs_low:
                if freq > lower_freq and freq < lower_freq+freq_width:
                    assert freq!=1e12
                    files.append(fname)
                    if len(time_groups[time]['freq_names'])>0:
                        (freq,fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq,fname) = (1e12,'This_shouldn\'t_show_up')
                    skip_this = False
                elif NDPPPfill:
                    files.append('dummy.ms')
            if not skip_this:
                filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this))
                freqID = int((freqborders[groupIdx]+freqborders[groupIdx+1])/2e6)
                groupname = time_groups[time]['basename']+'_%Xt_%dMHz.ms'%(time,freqID)
                if type(stepname) is str:
                    groupname += stepname
                if type(target_path) is str:
                    groupname = os.path.join(target_path,os.path.basename(groupname))
                groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this))
        orphan_files = len(time_groups[time]['freq_names'])
        if freq < 1e12:
            orphan_files += 1
        if orphan_files > 0:
            print "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt."%(orphan_files, time)
    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename+'_groups')
    groupmap.save(groupmapname)
    # genertate map with edge-channels to flag
    flagmap = _calc_edge_chans(filemap, nchans)
    flagmapname = os.path.join(mapfile_dir, filename+'_flags')
    flagmap.save(flagmapname)
    result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname}
    return result
Ejemplo n.º 12
0
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5,
         image_padding=1.,
         y_axis_stretch=1.):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed. 

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """

    if not outmapname or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        )
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    bandfreqs = []
    print "InitSubtract_deep_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))
        bandfreqs.append(Band(msdict[MSkey]).freq)

    ## min freq gives largest image size for deep image
    bandfreqs = np.array(bandfreqs)
    minfreq = np.min(bandfreqs)
    bandmin = np.argmin(bandfreqs)
    ## need to map the output from wsclean channels to the right frequencies
    ## just put the bands in the right freq order
    wsclean_channum = np.argsort(bandfreqs)
    bands = np.array(bands)
    bands = bands[wsclean_channum]

    #minfreq = 1e9
    #for ib, band in enumerate(bands):
    #if band.freq < minfreq:
    #minfreq = band.freq
    #bandmin = ib

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    nbands = len(bands)
    if nbands > 8:
        nchansout_clean1 = np.int(nbands / 4)
    elif nbands > 4:
        nchansout_clean1 = np.int(nbands / 2)
    else:
        nchansout_clean1 = np.int(nbands)

    (freqstep, timestep) = bands[0].get_averaging_steps()
    int_time_sec = bands[
        0].timestep_sec * timestep  # timestep_sec gets added to band object in get_averaging_steps()
    nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg,
                                                  int_time_sec)
    nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg,
                                                 int_time_sec)

    for band in bands:
        print "InitSubtract_deep_sort_and_compute.py: Working on Band:", band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(
            cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres,
            fieldsize_lowres)
        imsize_high_res_stretch = band.get_optimum_size(
            int(imsize_high_res * y_axis_stretch))
        high_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_res) + " " + str(imsize_high_res_stretch),
                False))
        imsize_low_res_stretch = band.get_optimum_size(
            int(imsize_low_res * y_axis_stretch))
        low_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_res) + " " + str(imsize_low_res_stretch),
                False))
        imsize_high_pad = band.get_optimum_size(
            int(imsize_high_res * image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(
            int(imsize_high_res * image_padding * y_axis_stretch))
        high_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_pad) + " " + str(imsize_high_pad_stretch),
                False))
        imsize_low_pad = band.get_optimum_size(
            int(imsize_low_res * image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(
            int(imsize_low_res * image_padding * y_axis_stretch))
        low_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_pad) + " " + str(imsize_low_pad_stretch),
                False))

        print band.freq / 1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch, nwavelengths_high, nwavelengths_low

        if band.freq == minfreq:
            deep_imsize_high_res = imsize_high_res
            deep_imsize_high_res_stretch = imsize_high_res_stretch
            deep_imsize_high_pad = imsize_high_pad
            deep_imsize_high_pad_stretch = imsize_high_pad_stretch
            deep_imsize_low_res = imsize_low_res
            deep_imsize_low_res_stretch = imsize_low_res_stretch
            deep_imsize_low_pad = imsize_low_pad
            deep_imsize_low_pad_stretch = imsize_low_pad_stretch

            print '*', band.freq / 1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch

    deep_high_size_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_high_res) + " " +
            str(deep_imsize_high_res_stretch), False)
    ])
    deep_high_paddedsize_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_high_pad) + " " +
            str(deep_imsize_high_pad_stretch), False)
    ])
    deep_low_size_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_low_res) + " " + str(deep_imsize_low_res_stretch),
            False)
    ])
    deep_low_paddedsize_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_low_pad) + " " + str(deep_imsize_low_pad_stretch),
            False)
    ])
    nbands_map = DataMap([DataProduct('localhost', str(nbands), False)])
    nchansout_clean1_map = DataMap(
        [DataProduct('localhost', str(nchansout_clean1), False)])
    print "InitSubtract_deep_sort_and_compute.py: Computing averaging steps."

    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    nwavelengths_high_map = DataMap([])
    nwavelengths_low_map = DataMap([])

    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
    nwavelengths_high_map.append(
        DataProduct('localhost', str(nwavelengths_high), False))
    nwavelengths_low_map.append(
        DataProduct('localhost', str(nwavelengths_low), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)

    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir,
                                     outmapname + '_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir,
                                    outmapname + '_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)

    deep_high_sizename = os.path.join(mapfile_dir,
                                      outmapname + '_deep_high_size')
    deep_high_size_map.save(deep_high_sizename)
    deep_low_sizename = os.path.join(mapfile_dir,
                                     outmapname + '_deep_low_size')
    deep_low_size_map.save(deep_low_sizename)
    deep_high_padsize_name = os.path.join(
        mapfile_dir, outmapname + '_deep_high_padded_size')
    deep_high_paddedsize_map.save(deep_high_padsize_name)
    deep_low_padsize_name = os.path.join(mapfile_dir,
                                         outmapname + '_deep_low_padded_size')
    deep_low_paddedsize_map.save(deep_low_padsize_name)

    nbands_mapname = os.path.join(mapfile_dir, outmapname + '_nbands')
    nbands_map.save(nbands_mapname)
    nchansout_clean1_mapname = os.path.join(mapfile_dir,
                                            outmapname + '_nchansout_clean1')
    nchansout_clean1_map.save(nchansout_clean1_mapname)

    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    nwavelengths_high_name = os.path.join(mapfile_dir,
                                          outmapname + '_nwavelengths_high')
    nwavelengths_high_map.save(nwavelengths_high_name)
    nwavelengths_low_name = os.path.join(mapfile_dir,
                                         outmapname + '_nwavelengths_low')
    nwavelengths_low_map.save(nwavelengths_low_name)

    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'high_padsize_mapfile': high_padsize_name,
        'low_padsize_mapfile': low_padsize_name,
        'deep_high_size_mapfile': deep_high_sizename,
        'deep_low_size_mapfile': deep_low_sizename,
        'deep_high_padsize_mapfile': deep_high_padsize_name,
        'deep_low_padsize_mapfile': deep_low_padsize_name,
        'nbands': nbands_mapname,
        'nchansout_clean1': nchansout_clean1_mapname,
        'freqstep': freqstepname,
        'timestep': timestepname,
        'nwavelengths_high_mapfile': nwavelengths_high_name,
        'nwavelengths_low_mapfile': nwavelengths_low_name
    }
    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with only the MSs at the middle Frequency
    Quite a bit of a hack for a plugin, but right now I don't care.

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    index: int, optional
        Index of the frequency band to use.

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        # Get the frequency info
        sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
        freq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if freq in freq_groups:
            freq_groups[freq].append(item.file)
        else:
            freq_groups[freq] = [item.file]
        if not item.host in hosts:
            hosts.append(item.host)
    # find maximum number of files per frequency-group
    maxfiles = max([len(group) for group in freq_groups.values()])
    # find the center-frequency
    freqs = freq_groups.keys()
    freqs.sort()
    selfreq = freqs[len(freqs) / 2]
    if 'index' in kwargs:
        selfreq = int(kwargs['index'])
    else:
        # make sure that chosen frequncy has maxfiles entries
        while len(freq_groups[selfreq]) < maxfiles:
            freqs.remove(selfreq)
            selfreq = freqs[len(freqs) / 2]
    # extend the hosts-list
    for i in range(len(freq_groups[selfreq]) - len(hosts)):
        hosts.append(hosts[i])
    # fill the output-map
    for (host, fname) in zip(hosts, freq_groups[selfreq]):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 14
0
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5,
         image_padding=1.,
         y_axis_stretch=1.,
         calc_y_axis_stretch=False,
         apply_y_axis_stretch_highres=True,
         apply_y_axis_stretch_lowres=True):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed.
    calc_y_axis_stretch : bool, optional
        Adjust the image sizes returned by this script for the mean elevation.
        If True, the value of y_axis_stretch above is ignored
    apply_y_axis_stretch_highres : bool, optional
        Apply the y-axis stretch to the high-res image sizes
    apply_y_axis_stretch_lowres : bool, optional
        Apply the y-axis stretch to the low-res image sizes

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    if not outmapname or not mapfile_dir:
        raise (ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        ))
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise (TypeError('sort_into_freqBands: type of "ms_input" unknown!'))

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)
    calc_y_axis_stretch = input2bool(calc_y_axis_stretch)
    apply_y_axis_stretch_highres = input2bool(apply_y_axis_stretch_highres)
    apply_y_axis_stretch_lowres = input2bool(apply_y_axis_stretch_lowres)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    print("InitSubtract_sort_and_compute.py: Putting files into bands.")
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    for i, band in enumerate(bands):
        print("InitSubtract_sort_and_compute.py: Working on Band:", band.name)
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(
            cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres,
            fieldsize_lowres)

        # Calculate y_axis_stretch if desired
        if calc_y_axis_stretch:
            if i == 0:
                y_axis_stretch = 1.0 / np.sin(band.mean_el_rad)
                print(
                    "InitSubtract_sort_and_compute.py: Using y-axis stretch of:",
                    y_axis_stretch)
                y_axis_stretch_lowres = y_axis_stretch
                y_axis_stretch_highres = y_axis_stretch
        else:
            y_axis_stretch_lowres = 1.0
            y_axis_stretch_highres = 1.0

        # Adjust sizes so that we get the correct ones below
        if not apply_y_axis_stretch_highres:
            y_axis_stretch_highres = 1.0
        if not apply_y_axis_stretch_lowres:
            y_axis_stretch_lowres = 1.0
        imsize_low_res /= y_axis_stretch_lowres
        imsize_high_res /= y_axis_stretch_highres

        imsize_high_res = band.get_optimum_size(int(imsize_high_res))
        imsize_high_res_stretch = band.get_optimum_size(
            int(imsize_high_res * y_axis_stretch_highres))
        high_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_res) + " " + str(imsize_high_res_stretch),
                False))

        imsize_low_res = band.get_optimum_size(int(imsize_low_res))
        imsize_low_res_stretch = band.get_optimum_size(
            int(imsize_low_res * y_axis_stretch_lowres))
        low_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_res) + " " + str(imsize_low_res_stretch),
                False))

        imsize_high_pad = band.get_optimum_size(
            int(imsize_high_res * image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(
            int(imsize_high_res * image_padding * y_axis_stretch_highres))
        high_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_pad) + " " + str(imsize_high_pad_stretch),
                False))

        imsize_low_pad = band.get_optimum_size(
            int(imsize_low_res * image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(
            int(imsize_low_res * image_padding * y_axis_stretch_lowres))
        low_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_pad) + " " + str(imsize_low_pad_stretch),
                False))

    print("InitSubtract_sort_and_compute.py: Computing averaging steps.")
    (freqstep, timestep) = bands[0].get_averaging_steps()
    (nwavelengths_high,
     nwavelengths_low) = bands[0].nwavelengths(cellsize_highres_deg,
                                               cellsize_lowres_deg, timestep)

    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    nwavelengths_high_map = DataMap([])
    nwavelengths_low_map = DataMap([])
    for index in range(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
        nwavelengths_high_map.append(
            DataProduct('localhost', str(nwavelengths_high), False))
        nwavelengths_low_map.append(
            DataProduct('localhost', str(nwavelengths_low), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)
    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir,
                                     outmapname + '_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir,
                                    outmapname + '_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)
    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    nwavelengths_high_name = os.path.join(mapfile_dir,
                                          outmapname + '_nwavelengths_high')
    nwavelengths_high_map.save(nwavelengths_high_name)
    nwavelengths_low_name = os.path.join(mapfile_dir,
                                         outmapname + '_nwavelengths_low')
    nwavelengths_low_map.save(nwavelengths_low_name)

    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'high_padsize_mapfile': high_padsize_name,
        'low_padsize_mapfile': low_padsize_name,
        'freqstep': freqstepname,
        'timestep': timestepname,
        'nwavelengths_high_mapfile': nwavelengths_high_name,
        'nwavelengths_low_mapfile': nwavelengths_low_name
    }
    return (result)
Ejemplo n.º 15
0
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, nband_pad=0):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional
        How many files should go into one frequency group. Values <= 0 mean put
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files
    nband_pad : int, optional
        Add this number of bands of dummy data to the high-frequency end
        of the list

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """
    if not filename or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!')

    # convert input to needed types
    ms_list = input2strlist(ms_input)
    NDPPPfill = input2bool(NDPPPfill)
    nband_pad = int(nband_pad)

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on",len(ms_list),"files"

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # use the slower but more reliable way:
        obstable = pt.table(ms, ack=False)
        timestamp = int(round(np.min(obstable.getcol('TIME'))))
        #obstable = pt.table(ms+'::OBSERVATION', ack=False)
        #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
        obstable.close()
        if timestamp in time_groups:
            time_groups[timestamp]['files'].append(ms)
        else:
            time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] }
    print "sort_times_into_freqGroups: found",len(time_groups),"time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()   # not needed now, but later
    first = True
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                freq_width = sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = freq
                minfreq = freq
                first = False
            else:
                assert freq_width == sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = max(maxfreq,freq)
                minfreq = min(minfreq,freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    maxfreq = maxfreq+freq_width/2.
    minfreq = minfreq-freq_width/2.
    numFiles = round((maxfreq-minfreq)/freq_width)
    if numSB > 0:
        ngroups = int(np.ceil(numFiles/numSB))
    else:
        ngroups = 1
        numSB = int(numFiles)
    hostID = 0
    for time in timestamps:
        (freq,fname) = time_groups[time]['freq_names'].pop(0)
        for fgroup in range(ngroups):
            files = []
            skip_this = True
            for fIdx in range(numSB):
                if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq:
                    files.append('dummy.ms')
                else:
                    files.append(fname)
                    if len(time_groups[time]['freq_names'])>0:
                        (freq,fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq,fname) = (1e12,'This_shouldn\'t_show_up')
                    skip_this = False

            for i in range(nband_pad):
                files.append('dummy.ms')

            filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this))
            groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup)
            if type(stepname) is str:
                groupname += stepname
            if type(target_path) is str:
                groupname = os.path.join(target_path,os.path.basename(groupname))
            groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this))
            hostID += 1
        assert freq==1e12

    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename+'_groups')
    groupmap.save(groupmapname)
    result = {'mapfile': filemapname, 'groupmapfile': groupmapname}
    return result
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5,
         image_padding=1.,
         y_axis_stretch=1.):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed. 

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    if not outmapname or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        )
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    print "InitSubtract_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    for band in bands:
        print "InitSubtract_sort_and_compute.py: Working on Band:", band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(
            cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres,
            fieldsize_lowres)
        imsize_high_res_stretch = band.get_optimum_size(
            int(imsize_high_res * y_axis_stretch))
        high_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_res) + " " + str(imsize_high_res_stretch),
                False))
        imsize_low_res_stretch = band.get_optimum_size(
            int(imsize_low_res * y_axis_stretch))
        low_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_res) + " " + str(imsize_low_res_stretch),
                False))
        imsize_high_pad = band.get_optimum_size(
            int(imsize_high_res * image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(
            int(imsize_high_res * image_padding * y_axis_stretch))
        high_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_pad) + " " + str(imsize_high_pad_stretch),
                False))
        imsize_low_pad = band.get_optimum_size(
            int(imsize_low_res * image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(
            int(imsize_low_res * image_padding * y_axis_stretch))
        low_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_pad) + " " + str(imsize_low_pad_stretch),
                False))

    print "InitSubtract_sort_and_compute.py: Computing averaging steps."
    (freqstep, timestep) = bands[0].get_averaging_steps()
    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)
    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir,
                                     outmapname + '_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir,
                                    outmapname + '_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)
    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'high_padsize_mapfile': high_padsize_name,
        'low_padsize_mapfile': low_padsize_name,
        'freqstep': freqstepname,
        'timestep': timestepname
    }
    return result
Ejemplo n.º 17
0
    def test_run_dppp(self):
        """
        This unittest border a functional test:
        framework is mucked by using an muckable function
        """
        working_dir = ""

        time_slice_dir_path = tempfile.mkdtemp(suffix=".%s" %
                                               (os.path.basename(__file__), ))
        slices_per_image = 2
        input_map = [("lce072", "test_file_path1"),
                     ("lce072", "test_file_path2"),
                     ("lce072", "test_file_path3"),
                     ("lce072", "test_file_path4")]

        input_datamap = DataMap()
        for entry in input_map:
            input_datamap.append(entry)

        subbands_per_image = 2
        collected_ms_dir_name = ""
        fp = open(os.path.join(self.test_path, "parset"), 'w')
        fp.write("key=value\n")
        fp.close()
        parset = os.path.join(self.test_path, "parset")
        ndppp = ""
        init_script = ""

        sut = ImagerPrepareTestWrapper()
        output = sut._run_dppp(working_dir, time_slice_dir_path,
                               slices_per_image, input_datamap,
                               subbands_per_image, collected_ms_dir_name,
                               parset, ndppp)

        # The output should contain two timeslices ms prepended with the time_slice_dir_path
        expected_output = [
            os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms"),
            os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")
        ]
        self.assertTrue(
            output == expected_output,
            "_run_dppp did not return timeslice ms: {0} !=  {1}".format(
                output, expected_output))

        # Two parset should be written in the time_slice_dir_path
        parset_1_content_expected = [
            ('replace', 'uselogger', 'True'),
            ('replace', 'avg1.freqstep', '4'),
            ('replace', 'msin', "['test_file_path1', 'test_file_path2']"),
            ('replace', 'msout', '{0}'.format(
                os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms")))
        ]

        parset_1_output = eval(open(os.path.join(time_slice_dir_path, \
                "time_slice_0.dppp.ms.ndppp.par")).read())
        self.assertTrue(
            parset_1_output == parset_1_content_expected,
            "\n{0} != \n{1}".format(parset_1_output,
                                    parset_1_content_expected))

        # Two parset should be written in the time_slice_dir_path
        parset_2_content_expected = [
            ('replace', 'uselogger', 'True'),
            ('replace', 'avg1.freqstep', '4'),
            ('replace', 'msin', "['test_file_path3', 'test_file_path4']"),
            ('replace', 'msout', '{0}'.format(
                os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")))
        ]

        parset_2_output = eval(open(os.path.join(time_slice_dir_path, \
                "time_slice_1.dppp.ms.ndppp.par")).read())
        self.assertTrue(
            parset_2_output == parset_2_content_expected,
            "\n{0} != \n{1}".format(parset_2_output,
                                    parset_2_content_expected))

        shutil.rmtree(time_slice_dir_path)
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with the MSs spread across the full bandwidth

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'num' in kwargs:
        num = int(kwargs['num'])
    else:
        num = 6
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return  {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        # Get the frequency info from the MS file
        sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False)
        freq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if freq in freq_groups:
            freq_groups[freq].append(item.file)
        else:
            freq_groups[freq] = [item.file]
        if not item.host in hosts:
            hosts.append(item.host)

    # select frequencies
    freqs =  freq_groups.keys()
    freqs.sort()
    num_freqs = len(freqs)
    if num > num_freqs:
        print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.'%(num, num_freqs)
        num = num_freqs
    dist_ind = get_distributed_indices(0, num_freqs-1, num)
    selfreqs = [freqs[ind] for ind in dist_ind]
    if len(selfreqs) < 1:
        print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band."
        raise ValueError("Selected fewer than one frequency band.")

    all_files = []
    for selfreq in selfreqs:
        all_files.extend(freq_groups[selfreq])

    # extend the hosts-list
    for i in range(len(all_files)-len(hosts)):
        hosts.append(hosts[i])

    # fill the output-map
    for (host,fname) in zip(hosts,all_files):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del(map_in)
    del(map_out)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 19
0
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None,
         mergeLastGroup=False, truncateLastSBs=True):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional 
        How many files should go into one frequency group. Values <= 0 mean put 
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files 
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.
    mergeLastGroup, truncateLastSBs : bool, optional
        mergeLastGroup = True, truncateLastSBs = True:
          not allowed
        mergeLastGroup = True, truncateLastSBs = False:
          put the files from the last group that doesn't have SBperGroup subbands 
          into the second last group (which will then have more than SBperGroup entries). 
        mergeLastGroup = False, truncateLastSBs = True:
          ignore last files, that don't make for a full group (not all files are used).
        mergeLastGroup = False, truncateLastSBs = False:
          keep inclomplete last group, or - with NDPPPfill=True - fill
          last group with dummies.      

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """
    if not filename or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!')
    if mergeLastGroup and truncateLastSBs:
        raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!')
    if mergeLastGroup:
        raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))  
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!')

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on",len(ms_list),"files"

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # use the slower but more reliable way:
        obstable = pt.table(ms, ack=False)
        timestamp = int(round(np.min(obstable.getcol('TIME'))))
        #obstable = pt.table(ms+'::OBSERVATION', ack=False)
        #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
        obstable.close()
        if timestamp in time_groups:
            time_groups[timestamp]['files'].append(ms)
        else:
            time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] }
    print "sort_times_into_freqGroups: found",len(time_groups),"time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()   # not needed now, but later
    first = True
    nchans = 0
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]            
            if first:
                freq_width = sw.col('TOTAL_BANDWIDTH')[0]
                nchans = sw.col('CHAN_WIDTH')[0].shape[0]
                chwidth = sw.col('CHAN_WIDTH')[0][0]
                maxfreq = freq
                minfreq = freq
                first = False
            else:
                assert freq_width == sw.col('TOTAL_BANDWIDTH')[0]
                assert nchans == sw.col('CHAN_WIDTH')[0].shape[0]
                assert chwidth == sw.col('CHAN_WIDTH')[0][0]
                maxfreq = max(maxfreq,freq)
                minfreq = min(minfreq,freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    maxfreq = maxfreq+freq_width/2.
    minfreq = minfreq-freq_width/2.
    numFiles = round((maxfreq-minfreq)/freq_width)
    if numSB > 0:
        if truncateLastSBs:
            ngroups = int(np.floor(numFiles/numSB))
        else:
            ngroups = int(np.ceil(numFiles/numSB))
    else:
        ngroups = 1
        numSB = int(numFiles)
    hostID = 0
    for time in timestamps:
        (freq,fname) = time_groups[time]['freq_names'].pop(0)
        for fgroup in range(ngroups):
            files = []
            skip_this = True
            for fIdx in range(numSB):
                if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq:
                    if NDPPPfill:
                        files.append('dummy.ms')
                else:
                    files.append(fname)
                    if len(time_groups[time]['freq_names'])>0:
                        (freq,fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq,fname) = (1e12,'This_shouldn\'t_show_up')
                    skip_this = False
            filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this))
            groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup)
            if type(stepname) is str:
                groupname += stepname
            if type(target_path) is str:
                groupname = os.path.join(target_path,os.path.basename(groupname))
            groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this))
        assert freq==1e12

    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename+'_groups')
    groupmap.save(groupmapname)
    # genertate map with edge-channels to flag
    flagmap = _calc_edge_chans(filemapname, nchans)
    flagmapname = os.path.join(mapfile_dir, filename+'_flags')
    flagmap.save(flagmapname)
    result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname}
    return result
Ejemplo n.º 20
0
def main(ms_input,
         filename=None,
         mapfile_dir=None,
         numSB=-1,
         enforce_numSB=True,
         hosts=None,
         NDPPPfill=True,
         target_path=None,
         stepname=None,
         nband_pad=0,
         make_dummy_files=False,
         skip_flagged_groups=True):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional
        How many files should go into one frequency group. Values <= 0 mean put
        all files of the same time-step into one group.
        default = -1
    enforce_numSB : bool, optional
        If True and numSB > 0, then add flagged dummy data to ensure that the
        last block has exactly numSB files. If False, then the last block can
        have fewer files (as long as there are no gaps in frequency)
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files
    nband_pad : int, optional
        Add this number of bands of dummy data to the high-frequency end
        of the list
    make_dummy_files : bool, optional
        If True, make MS files for all dummy data
    skip_flagged_groups : bool, optional
        If True, groups that are missing have their skip flag set to True. If
        False, these groups are filled with dummy data and their skip flag set
        to False

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """
    if not filename or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: filename and mapfile_dir are needed!')

    # convert input to needed types
    ms_list = input2strlist(ms_input)
    NDPPPfill = input2bool(NDPPPfill)
    numSB = int(numSB)
    nband_pad = int(nband_pad)
    enforce_numSB = input2bool(enforce_numSB)
    make_dummy_files = input2bool(make_dummy_files)
    skip_flagged_groups = input2bool(skip_flagged_groups)

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on", len(ms_list), "files"

    dirname = os.path.dirname(ms_list[0])

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # use the slower but more reliable way:
        obstable = pt.table(ms, ack=False)
        timestamp = int(round(np.min(obstable.getcol('TIME'))))
        #obstable = pt.table(ms+'::OBSERVATION', ack=False)
        #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
        obstable.close()
        if timestamp in time_groups:
            time_groups[timestamp]['files'].append(ms)
        else:
            time_groups[timestamp] = {
                'files': [ms],
                'basename': os.path.splitext(ms)[0]
            }
    print "sort_times_into_freqGroups: found", len(time_groups), "time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()  # not needed now, but later
    first = True
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                freq_width = sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = freq
                minfreq = freq
                first = False
            else:
                assert freq_width == sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = max(maxfreq, freq)
                minfreq = min(minfreq, freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,
                                              time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    maxfreq = maxfreq + freq_width / 2.
    minfreq = minfreq - freq_width / 2.
    numFiles = round((maxfreq - minfreq) / freq_width)
    if numSB > 0:
        ngroups = int(np.ceil(numFiles / numSB))
    else:
        ngroups = 1
        numSB = int(numFiles)
    hostID = 0
    for time in timestamps:
        (freq, fname) = time_groups[time]['freq_names'].pop(0)
        nbands = 0
        all_group_files = []
        for fgroup in range(ngroups):
            files = []
            skip_this = True
            for fIdx in range(numSB):
                thisfreq = (fIdx + fgroup * numSB + 1) * freq_width + minfreq
                if freq > thisfreq:
                    if enforce_numSB or thisfreq - freq_width / 2. < maxfreq:
                        files.append('dummy.ms')
                else:
                    files.append(fname)
                    skip_this = False
                    if len(time_groups[time]['freq_names']) > 0:
                        (freq, fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        # Set freq to high value to pad the rest of the group
                        # with dummy data
                        (freq, fname) = (1e12, 'This_shouldn\'t_show_up')

            if fgroup == ngroups - 1:
                # Append dummy data to last frequency group only
                for i in range(nband_pad):
                    files.append('dummy.ms')
            if not skip_this:
                nbands += len(files)

            if make_dummy_files:
                for i, ms in enumerate(files):
                    if ms == 'dummy.ms':
                        # Replace dummy.ms in files list with new filename
                        files[i] = os.path.join(
                            dirname, '{0}_{1}.ms'.format(
                                os.path.splitext(ms)[0],
                                uuid.uuid4().urn.split('-')[-1]))

            if not skip_flagged_groups:
                # Don't set skip flag to True, even if group is missing all files
                if not make_dummy_files:
                    raise ValueError(
                        'skip_flagged_groups cannot be False if make_dummy_files is also False'
                    )
                else:
                    skip_this = False

            filemap.append(
                MultiDataProduct(hosts[hostID % numhosts], files, skip_this))
            groupname = time_groups[time]['basename'] + '_%Xt_%dg.ms' % (
                time, fgroup)
            if type(stepname) is str:
                groupname += stepname
            if type(target_path) is str:
                groupname = os.path.join(target_path,
                                         os.path.basename(groupname))
            groupmap.append(
                DataProduct(hosts[hostID % numhosts], groupname, skip_this))
            hostID += 1
            all_group_files.extend(files)

        assert freq == 1e12

        if make_dummy_files:
            # Find at least one existing ms for this timestamp
            ms_exists = None
            for ms in all_group_files:
                if os.path.exists(ms):
                    ms_exists = ms
                    sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms))
                    ms_exists_ref_freq = sw.getcol('REF_FREQUENCY')[0]
                    sw.close()
                    break

            for i, ms in enumerate(all_group_files):
                if 'dummy' in ms:
                    # Alter SPECTRAL_WINDOW subtable as appropriate to fill gap
                    ref_freq = minfreq + freq_width * (i + 0.5)
                    pt.tableutil.tablecopy(ms_exists, ms)
                    sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms),
                                  readonly=False)
                    chan_freq = sw.getcol(
                        'CHAN_FREQ') - ms_exists_ref_freq + ref_freq
                    sw.putcol('REF_FREQUENCY', ref_freq)
                    sw.putcol('CHAN_FREQ', chan_freq)
                    sw.close()

                    # Flag all data
                    t = pt.table(ms, readonly=False)
                    t.putcol('FLAG_ROW', np.ones(len(t), dtype=bool))
                    f = t.getcol('FLAG')
                    t.putcol('FLAG', np.ones(f.shape, dtype=bool))
                    t.close()

    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename + '_groups')
    groupmap.save(groupmapname)
    result = {
        'mapfile': filemapname,
        'groupmapfile': groupmapname,
        'nbands': nbands
    }
    return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed. 

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    if not outmapname or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: outmapname and mapfile_dir are needed!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))  
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    print "InitSubtract_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append( Band(msdict[MSkey]) )

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    for band in bands:
        print "InitSubtract_sort_and_compute.py: Working on Band:",band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(cellsize_highres_deg, cellsize_lowres_deg,
                                                                 fieldsize_highres, fieldsize_lowres)
        imsize_high_res_stretch = band.get_optimum_size(int(imsize_high_res*y_axis_stretch))
        high_size_map.append(DataProduct('localhost', str(imsize_high_res)+" "+str(imsize_high_res_stretch), False))
        imsize_low_res_stretch = band.get_optimum_size(int(imsize_low_res*y_axis_stretch))
        low_size_map.append(DataProduct('localhost', str(imsize_low_res)+" "+str(imsize_low_res_stretch), False))
        imsize_high_pad = band.get_optimum_size(int(imsize_high_res*image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(int(imsize_high_res*image_padding*y_axis_stretch))
        high_paddedsize_map.append(DataProduct('localhost', str(imsize_high_pad)+" "+str(imsize_high_pad_stretch), False))
        imsize_low_pad = band.get_optimum_size(int(imsize_low_res*image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(int(imsize_low_res*image_padding*y_axis_stretch))
        low_paddedsize_map.append(DataProduct('localhost', str(imsize_low_pad)+" "+str(imsize_low_pad_stretch), False))

    print "InitSubtract_sort_and_compute.py: Computing averaging steps."
    (freqstep, timestep) = bands[0].get_averaging_steps()
    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([]) 
    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
    
    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname+'_single')
    file_single_map.save(file_single_mapname)
    high_sizename = os.path.join(mapfile_dir, outmapname+'_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname+'_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir, outmapname+'_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir, outmapname+'_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)
    freqstepname = os.path.join(mapfile_dir, outmapname+'_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname+'_timestep')
    timestep_map.save(timestepname)
    result = {'groupmap': groupmapname, 'single_mapfile' : file_single_mapname,
              'high_size_mapfile' : high_sizename, 'low_size_mapfile' : low_sizename,
              'high_padsize_mapfile' : high_padsize_name, 'low_padsize_mapfile' : low_padsize_name,
              'freqstep' : freqstepname, 'timestep' : timestepname}
    return result
def plugin_main(args, **kwargs):
    """
    Matches a mapfile with one in which the MSs are distributed

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dist : str
        Filename of mapfile with distributed MS files
    mapfile_full : str
        Filename of mapfile with all MS files from which distributed one was
        made
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dist = kwargs['mapfile_dist']
    mapfile_full = kwargs['mapfile_full']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_dist = DataMap.load(mapfile_dist)
    map_dist.iterator = DataMap.SkipIterator
    map_full = DataMap.load(mapfile_full)
    map_full.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return  {'mapfile': fileid}

    # find matches
    all_files_hosts = [(item.file, item.host) for item in map_full]
    dist_files = [item.file for item in map_dist]
    for i, (f, h) in enumerate(all_files_hosts):
        if f in dist_files:
            map_out.append(DataProduct(h, map_in[i].file, False))

    map_out.save(fileid)
    del(map_in)
    del(map_out)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 23
0
    def test_run_dppp(self):
        """
        This unittest border a functional test:
        framework is mucked by using an muckable function
        """
        working_dir = ""

        time_slice_dir_path = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
        slices_per_image = 2
        input_map = [("lce072", "test_file_path1"),
                         ("lce072", "test_file_path2"),
                         ("lce072", "test_file_path3"),
                         ("lce072", "test_file_path4")]

        input_datamap = DataMap()
        for entry in input_map:
            input_datamap.append(entry)

        subbands_per_image = 2
        collected_ms_dir_name = ""
        fp = open(os.path.join(self.test_path, "parset"), 'w')
        fp.write("key=value\n")
        fp.close()
        parset = os.path.join(self.test_path, "parset")
        ndppp = ""
        init_script = ""

        sut = ImagerPrepareTestWrapper()
        output = sut._run_dppp(working_dir, time_slice_dir_path, slices_per_image,
                  input_datamap, subbands_per_image, collected_ms_dir_name, parset,
                  ndppp)

        # The output should contain two timeslices ms prepended with the time_slice_dir_path
        expected_output = [os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms"),
                           os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")]
        self.assertTrue(output == expected_output,
            "_run_dppp did not return timeslice ms: {0} !=  {1}".format(output,
                                 expected_output))

        # Two parset should be written in the time_slice_dir_path
        parset_1_content_expected = [('replace', 'uselogger', 'True'),
                                     ('replace', 'avg1.freqstep', '4'),
                 ('replace', 'msin', "['test_file_path1', 'test_file_path2']"),
                 ('replace', 'msout', '{0}'.format(
                    os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms")))]

        parset_1_output = eval(open(os.path.join(time_slice_dir_path, \
                "time_slice_0.dppp.ms.ndppp.par")).read())
        self.assertTrue(parset_1_output == parset_1_content_expected,
                "\n{0} != \n{1}".format(parset_1_output, parset_1_content_expected))

        # Two parset should be written in the time_slice_dir_path
        parset_2_content_expected = [('replace', 'uselogger', 'True'),
                                     ('replace', 'avg1.freqstep', '4'),
                 ('replace', 'msin', "['test_file_path3', 'test_file_path4']"),
                 ('replace', 'msout', '{0}'.format(
                    os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")))]

        parset_2_output = eval(open(os.path.join(time_slice_dir_path, \
                "time_slice_1.dppp.ms.ndppp.par")).read())
        self.assertTrue(parset_2_output == parset_2_content_expected,
                "\n{0} != \n{1}".format(parset_2_output, parset_2_content_expected))
        
        shutil.rmtree(time_slice_dir_path)
Ejemplo n.º 24
0
def plugin_main(args, **kwargs):
    """
    Matches a mapfile with one in which the MSs are distributed

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dist : str
        Filename of mapfile with distributed MS files
    mapfile_full : str
        Filename of mapfile with all MS files from which distributed one was
        made
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dist = kwargs['mapfile_dist']
    mapfile_full = kwargs['mapfile_full']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_dist = DataMap.load(mapfile_dist)
    map_dist.iterator = DataMap.SkipIterator
    map_full = DataMap.load(mapfile_full)
    map_full.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    # find matches
    all_files_hosts = [(item.file, item.host) for item in map_full]
    dist_files = [item.file for item in map_dist]
    for i, (f, h) in enumerate(all_files_hosts):
        if f in dist_files:
            map_out.append(DataProduct(h, map_in[i].file, False))

    map_out.save(fileid)
    del (map_in)
    del (map_out)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 25
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with the MSs spread across the full bandwidth

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'num' in kwargs:
        num = int(kwargs['num'])
    else:
        num = 6
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        # Get the frequency info from the MS file
        sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
        freq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if freq in freq_groups:
            freq_groups[freq].append(item.file)
        else:
            freq_groups[freq] = [item.file]
        if not item.host in hosts:
            hosts.append(item.host)

    # select frequencies
    freqs = freq_groups.keys()
    freqs.sort()
    num_freqs = len(freqs)
    if num > num_freqs:
        print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.' % (
            num, num_freqs)
        num = num_freqs
    dist_ind = get_distributed_indices(0, num_freqs - 1, num)
    selfreqs = [freqs[ind] for ind in dist_ind]
    if len(selfreqs) < 1:
        print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band."
        raise ValueError("Selected fewer than one frequency band.")

    all_files = []
    for selfreq in selfreqs:
        all_files.extend(freq_groups[selfreq])

    # extend the hosts-list
    for i in range(len(all_files) - len(hosts)):
        hosts.append(hosts[i])

    # fill the output-map
    for (host, fname) in zip(hosts, all_files):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del (map_in)
    del (map_out)
    result = {'mapfile': fileid}

    return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed. 

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    
    if not outmapname or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: outmapname and mapfile_dir are needed!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))  
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    bandfreqs = []
    print "InitSubtract_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append( Band(msdict[MSkey]) )
        bandfreqs.append( Band(msdict[MSkey]).freq )
        
    ## min freq gives largest image size for deep image
    bandfreqs = np.array(bandfreqs)
    minfreq = np.min(bandfreqs)
    bandmin = np.argmin(bandfreqs)
    ## need to map the output from wsclean channels to the right frequencies
    ## just put the bands in the right freq order
    wsclean_channum = np.argsort(bandfreqs)
    bands = np.array(bands)
    bands = bands[wsclean_channum]
        
    #minfreq = 1e9
    #for ib, band in enumerate(bands):
        #if band.freq < minfreq:
            #minfreq = band.freq
            #bandmin = ib

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    nbands = len(bands)
    if nbands > 8:
        nchansout_clean1 = np.int(nbands/4)
    elif nbands > 4:
        nchansout_clean1 = np.int(nbands/2)
    else:
        nchansout_clean1 = np.int(nbands)
        
    (freqstep, timestep) = bands[0].get_averaging_steps()
    int_time_sec = self.timestep_sec * timestep
    nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg, int_time_sec)
    nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg, int_time_sec)
    
    for band in bands:
        print "InitSubtract_sort_and_compute.py: Working on Band:",band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(cellsize_highres_deg, cellsize_lowres_deg,
                                                                 fieldsize_highres, fieldsize_lowres)
        imsize_high_res_stretch = band.get_optimum_size(int(imsize_high_res*y_axis_stretch))
        high_size_map.append(DataProduct('localhost', str(imsize_high_res)+" "+str(imsize_high_res_stretch), False))
        imsize_low_res_stretch = band.get_optimum_size(int(imsize_low_res*y_axis_stretch))
        low_size_map.append(DataProduct('localhost', str(imsize_low_res)+" "+str(imsize_low_res_stretch), False))
        imsize_high_pad = band.get_optimum_size(int(imsize_high_res*image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(int(imsize_high_res*image_padding*y_axis_stretch))
        high_paddedsize_map.append(DataProduct('localhost', str(imsize_high_pad)+" "+str(imsize_high_pad_stretch), False))
        imsize_low_pad = band.get_optimum_size(int(imsize_low_res*image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(int(imsize_low_res*image_padding*y_axis_stretch))
        low_paddedsize_map.append(DataProduct('localhost', str(imsize_low_pad)+" "+str(imsize_low_pad_stretch), False))
        
        print band.freq/1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch, nwavelengths_high, nwavelengths_low

        
    
        if  band.freq == minfreq:
            deep_imsize_high_res = imsize_high_res
            deep_imsize_high_res_stretch = imsize_high_res_stretch
            deep_imsize_high_pad = imsize_high_pad
            deep_imsize_high_pad_stretch = imsize_high_pad_stretch
            deep_imsize_low_res = imsize_low_res
            deep_imsize_low_res_stretch = imsize_low_res_stretch
            deep_imsize_low_pad = imsize_low_pad
            deep_imsize_low_pad_stretch = imsize_low_pad_stretch
            
            print '*', band.freq/1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch
    
    
    deep_high_size_map = DataMap([DataProduct('localhost', str(deep_imsize_high_res)+" "+str(deep_imsize_high_res_stretch), False)])
    deep_high_paddedsize_map = DataMap([DataProduct('localhost', str(deep_imsize_high_pad)+" "+str(deep_imsize_high_pad_stretch), False)])
    deep_low_size_map = DataMap([DataProduct('localhost', str(deep_imsize_low_res)+" "+str(deep_imsize_low_res_stretch), False)])
    deep_low_paddedsize_map = DataMap([DataProduct('localhost', str(deep_imsize_low_pad)+" "+str(deep_imsize_low_pad_stretch), False)])
    nbands_map = DataMap([DataProduct('localhost', str(nbands), False)])
    nchansout_clean1_map = DataMap([DataProduct('localhost', str(nchansout_clean1), False)])
    print "InitSubtract_sort_and_compute.py: Computing averaging steps."
    
    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([]) 
    nwavelengths_high_map        = DataMap([])
    nwavelengths_low_map         = DataMap([])
    
    
    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
    nwavelengths_high_map.append(DataProduct('localhost', str(nwavelengths_high), False))
    nwavelengths_low_map.append(DataProduct('localhost', str(nwavelengths_low), False))
    
    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname+'_single')
    file_single_map.save(file_single_mapname)
    
    high_sizename = os.path.join(mapfile_dir, outmapname+'_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname+'_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir, outmapname+'_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir, outmapname+'_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)
    
    deep_high_sizename = os.path.join(mapfile_dir, outmapname+'_deep_high_size')
    deep_high_size_map.save(deep_high_sizename)
    deep_low_sizename = os.path.join(mapfile_dir, outmapname+'_deep_low_size')
    deep_low_size_map.save(deep_low_sizename)
    deep_high_padsize_name = os.path.join(mapfile_dir, outmapname+'_deep_high_padded_size')
    deep_high_paddedsize_map.save(deep_high_padsize_name)
    deep_low_padsize_name = os.path.join(mapfile_dir, outmapname+'_deep_low_padded_size')
    deep_low_paddedsize_map.save(deep_low_padsize_name)
    
    nbands_mapname = os.path.join(mapfile_dir, outmapname+'_nbands')
    nbands_map.save(nbands_mapname)
    nchansout_clean1_mapname = os.path.join(mapfile_dir, outmapname+'_nchansout_clean1')
    nchansout_clean1_map.save(nchansout_clean1_mapname)
    
    freqstepname = os.path.join(mapfile_dir, outmapname+'_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname+'_timestep')
    timestep_map.save(timestepname)
    nwavelengths_high_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_high')
    nwavelengths_high_map.save(nwavelengths_high_name)
    nwavelengths_low_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_low')
    nwavelengths_low_map.save(nwavelengths_low_name)
    
    result = {'groupmap': groupmapname, 'single_mapfile' : file_single_mapname,
              'high_size_mapfile' : high_sizename, 'low_size_mapfile' : low_sizename,
              'high_padsize_mapfile' : high_padsize_name, 'low_padsize_mapfile' : low_padsize_name,
              'deep_high_size_mapfile' : deep_high_sizename, 'deep_low_size_mapfile' : deep_low_sizename,
              'deep_high_padsize_mapfile' : deep_high_padsize_name, 'deep_low_padsize_mapfile' : deep_low_padsize_name,
              'nbands' : nbands_mapname, 'nchansout_clean1' : nchansout_clean1_mapname,
              'freqstep' : freqstepname, 'timestep' : timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, 
              'nwavelengths_low_mapfile': nwavelengths_low_name}
    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with only the MSs at the middle Frequency
    Quite a bit of a hack for a plugin, but right now I don't care.

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    index: int, optional
        Index of the frequency band to use.

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and  not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!'
        return  {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        # Get the frequency info
        sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False)
        freq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if freq in freq_groups:
            freq_groups[freq].append(item.file)
        else:
            freq_groups[freq] = [item.file]
        if not item.host in hosts:
            hosts.append(item.host)
    # find maximum number of files per frequency-group
    maxfiles = max([len(group) for group in freq_groups.values()])
    # find the center-frequency
    freqs =  freq_groups.keys()
    freqs.sort()
    selfreq = freqs[len(freqs)/2]
    if 'index' in kwargs:
        selfreq = int(kwargs['index'])
    else:
        # make sure that chosen frequncy has maxfiles entries
        while len(freq_groups[selfreq]) < maxfiles:
            freqs.remove(selfreq)
            selfreq = freqs[len(freqs)/2]
    # extend the hosts-list
    for i in range(len(freq_groups[selfreq])-len(hosts)):
        hosts.append(hosts[i])
    # fill the output-map
    for (host,fname) in zip(hosts,freq_groups[selfreq]):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 28
0
def main(ms_input,
         filename=None,
         mapfile_dir=None,
         numSB=-1,
         hosts=None,
         NDPPPfill=True,
         target_path=None,
         stepname=None,
         mergeLastGroup=False,
         truncateLastSBs=True,
         firstSB=None):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional 
        How many files should go into one frequency group. Values <= 0 mean put 
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files 
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.
    mergeLastGroup, truncateLastSBs : bool, optional
        mergeLastGroup = True, truncateLastSBs = True:
          not allowed
        mergeLastGroup = True, truncateLastSBs = False:
          put the files from the last group that doesn't have SBperGroup subbands 
          into the second last group (which will then have more than SBperGroup entries). 
        mergeLastGroup = False, truncateLastSBs = True:
          ignore last files, that don't make for a full group (not all files are used).
        mergeLastGroup = False, truncateLastSBs = False:
          keep inclomplete last group, or - with NDPPPfill=True - fill
          last group with dummies.      
    firstSB : int, optional
        If set, then reference the grouping of files to this station-subband. As if a file 
        with this station-subband would be included in the input files.
        (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz)

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """

    NDPPPfill = input2bool(NDPPPfill)
    mergeLastGroup = input2bool(mergeLastGroup)
    truncateLastSBs = input2bool(truncateLastSBs)
    firstSB = input2int(firstSB)
    numSB = int(numSB)

    if not filename or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: filename and mapfile_dir are needed!')
    if mergeLastGroup and truncateLastSBs:
        raise ValueError(
            'sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!'
        )


#    if mergeLastGroup:
#        raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError(
            'sort_times_into_freqGroups: type of "ms_input" unknown!')

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print("sort_times_into_freqGroups: Working on", len(ms_list),
          "files (including flagged files).")

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # work only on files selected by a previous step
        if ms.lower() != 'none':
            # use the slower but more reliable way:
            obstable = pt.table(ms, ack=False)
            timestamp = int(round(np.min(obstable.getcol('TIME'))))
            #obstable = pt.table(ms+'::OBSERVATION', ack=False)
            #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
            obstable.close()
            if timestamp in time_groups:
                time_groups[timestamp]['files'].append(ms)
            else:
                time_groups[timestamp] = {
                    'files': [ms],
                    'basename': os.path.splitext(ms)[0]
                }
    print("sort_times_into_freqGroups: found", len(time_groups), "time-groups")

    # sort time-groups by frequency
    timestamps = list(time_groups.keys())
    timestamps.sort()  # not needed now, but later
    first = True
    nchans = 0
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0]
                nchans = sw.col('CHAN_WIDTH')[0].shape[0]
                chwidth = sw.col('CHAN_WIDTH')[0][0]
                freqset = set([freq])
                first = False
            else:
                assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0]
                assert nchans == sw.col('CHAN_WIDTH')[0].shape[0]
                assert chwidth == sw.col('CHAN_WIDTH')[0][0]
                freqset.add(freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = list(
            zip(freqs, time_groups[time]['files']))
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print(
        "sort_times_into_freqGroups: Collected the frequencies for the time-groups"
    )

    freqliste = np.array(list(freqset))
    freqliste.sort()
    freq_width = np.min(freqliste[1:] - freqliste[:-1])
    if file_bandwidth > freq_width:
        raise ValueError(
            "Bandwidth of files is larger than minimum frequency step between two files!"
        )
    if file_bandwidth < (freq_width * 0.51):
        #raise ValueError("Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)")
        logging.warning(
            "Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)"
        )
    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border
    maxfreq = np.max(freqliste) + freq_width * 0.51
    if firstSB != None:
        if freqliste[0] < 100e6:
            # LBA Data
            minfreq = (float(firstSB) / 512. * 100e6) - freq_width / 2.
        elif freqliste[0] > 100e6 and freqliste[0] < 200e6:
            # HBA-Low
            minfreq = (float(firstSB) / 512. * 100e6) + 100e6 - freq_width / 2.
        elif freqliste[0] > 200e6 and freqliste[0] < 300e6:
            # HBA-high
            minfreq = (float(firstSB) / 512. * 100e6) + 200e6 - freq_width / 2.
        else:
            raise ValueError(
                'sort_times_into_freqGroups: Frequency of lowest input data is higher than 300MHz!'
            )
        if np.min(freqliste) < minfreq:
            raise ValueError(
                'sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!'
            )
    else:
        minfreq = np.min(freqliste) - freq_width / 2.
    groupBW = freq_width * numSB
    if groupBW < 1e6 and groupBW > 0:
        print(
            'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!'
        )
    if groupBW < 0:
        # this is the case for concatenating all subbands
        groupBW = maxfreq - minfreq
    truncateLastSBs = input2bool(False)
    NDPPPfill = input2bool(True)
    freqborders = np.arange(minfreq, maxfreq, groupBW)
    if mergeLastGroup:
        freqborders[-1] = maxfreq
    elif truncateLastSBs:
        pass  #nothing to do! # left to make the logic more clear!
    elif not truncateLastSBs and NDPPPfill:
        freqborders = np.append(freqborders, (freqborders[-1] + groupBW))
    elif not truncateLastSBs and not NDPPPfill:
        freqborders = np.append(freqborders, maxfreq)

    freqborders = freqborders[freqborders > (np.min(freqliste) - groupBW)]
    ngroups = len(freqborders) - 1
    if ngroups == 0:
        raise ValueError(
            'sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!'
        )

    print("sort_times_into_freqGroups: Will create", ngroups, "group(s) with",
          numSB, "file(s) each.")

    hostID = 0
    for time in timestamps:
        (freq, fname) = time_groups[time]['freq_names'].pop(0)
        for groupIdx in range(ngroups):
            files = []
            skip_this = True
            filefreqs_low = np.arange(freqborders[groupIdx],
                                      freqborders[groupIdx + 1], freq_width)
            for lower_freq in filefreqs_low:
                if freq > lower_freq and freq < lower_freq + freq_width:
                    assert freq != 1e12
                    files.append(fname)
                    if len(time_groups[time]['freq_names']) > 0:
                        (freq, fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq, fname) = (1e12, 'This_shouldn\'t_show_up')
                    skip_this = False
                elif NDPPPfill:
                    files.append('dummy.ms')
            if not skip_this:
                filemap.append(
                    MultiDataProduct(hosts[hostID % numhosts], files,
                                     skip_this))
                freqID = int(
                    (freqborders[groupIdx] + freqborders[groupIdx + 1]) / 2e6)
                groupname = time_groups[time]['basename'] + '_%Xt_%dMHz.ms' % (
                    time, freqID)
                if type(stepname) is str:
                    groupname += stepname
                if type(target_path) is str:
                    groupname = os.path.join(target_path,
                                             os.path.basename(groupname))
                groupmap.append(
                    DataProduct(hosts[hostID % numhosts], groupname,
                                skip_this))
        orphan_files = len(time_groups[time]['freq_names'])
        if freq < 1e12:
            orphan_files += 1
        if orphan_files > 0:
            print(
                "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt."
                % (orphan_files, time))
    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename + '_groups')
    groupmap.save(groupmapname)
    # genertate map with edge-channels to flag
    flagmap = _calc_edge_chans(filemap, nchans)
    flagmapname = os.path.join(mapfile_dir, filename + '_flags')
    flagmap.save(flagmapname)
    result = {
        'mapfile': filemapname,
        'groupmapfile': groupmapname,
        'flagmapfile': flagmapname
    }
    return (result)
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional
        How many files should go into one frequency group. Values <= 0 mean put
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """
    if not filename or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!')

    # convert input to needed types
    ms_list = input2strlist(ms_input)
    NDPPPfill = input2bool(NDPPPfill)

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on",len(ms_list),"files"

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # use the slower but more reliable way:
        obstable = pt.table(ms, ack=False)
        timestamp = int(round(np.min(obstable.getcol('TIME'))))
        #obstable = pt.table(ms+'::OBSERVATION', ack=False)
        #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
        obstable.close()
        if timestamp in time_groups:
            time_groups[timestamp]['files'].append(ms)
        else:
            time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] }
    print "sort_times_into_freqGroups: found",len(time_groups),"time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()   # not needed now, but later
    first = True
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                freq_width = sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = freq
                minfreq = freq
                first = False
            else:
                assert freq_width == sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = max(maxfreq,freq)
                minfreq = min(minfreq,freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    maxfreq = maxfreq+freq_width/2.
    minfreq = minfreq-freq_width/2.
    numFiles = round((maxfreq-minfreq)/freq_width)
    if numSB > 0:
        ngroups = int(np.ceil(numFiles/numSB))
    else:
        ngroups = 1
        numSB = int(numFiles)
    hostID = 0
    for time in timestamps:
        (freq,fname) = time_groups[time]['freq_names'].pop(0)
        for fgroup in range(ngroups):
            files = []
            skip_this = True
            for fIdx in range(numSB):
                if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq:
                    files.append('dummy.ms')
                else:
                    files.append(fname)
                    if len(time_groups[time]['freq_names'])>0:
                        (freq,fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq,fname) = (1e12,'This_shouldn\'t_show_up')
                    skip_this = False
            filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this))
            groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup)
            if type(stepname) is str:
                groupname += stepname
            if type(target_path) is str:
                groupname = os.path.join(target_path,os.path.basename(groupname))
            groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this))
            hostID += 1
        assert freq==1e12

    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename+'_groups')
    groupmap.save(groupmapname)
    result = {'mapfile': filemapname, 'groupmapfile': groupmapname}
    return result