Esempio n. 1
0
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append : str
        String to append
    append_index : bool
        If True, append a unique index to each file
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']

    if 'append_index' in kwargs:
        append_index = kwargs['append_index']
        if type(append_index) is str:
            if append_index.lower() == 'true':
                append_index = True
            else:
                append_index = False
    else:
        append_index = False

    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if append_index:
            map_out.data.append(
                DataProduct(item.host,
                            item.file + append_str + '_{}'.format(i),
                            item.skip))
        else:
            map_out.data.append(
                DataProduct(item.host, item.file + append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 2
0
 def from_folder(self, folder, pattern=None, exclude_pattern=False):
     measurements = os.listdir(folder)
     measurements.sort()
     if pattern:
         rePattern = pattern.strip().replace('.', '\\.').replace('?', '.').replace('*', '.*') + '$'
         PatternReg = re.compile(rePattern)
     for ms in measurements:
         if pattern:
             if not exclude_pattern and PatternReg.match(ms):
                 self._append(DataProduct('localhost', folder + '/' + ms, False))
             elif exclude_pattern and not PatternReg.match(ms):
                 self._append(DataProduct('localhost', folder + '/' + ms, False))
         else:
             self._append(DataProduct('localhost', folder + '/' + ms, False))
Esempio n. 3
0
 def from_folder(self, folder, pattern=None, exclude_pattern=False):
     measurements = os.listdir(folder)
     measurements.sort()
     for ms in measurements:
         if pattern:
             if pattern in ms and not exclude_pattern:
                 self._append(
                     DataProduct('localhost', folder + '/' + ms, False))
             elif pattern in ms and exclude_pattern:
                 pass
             elif pattern not in ms and exclude_pattern:
                 self._append(
                     DataProduct('localhost', folder + '/' + ms, False))
         else:
             self._append(DataProduct('localhost', folder + '/' + ms,
                                      False))
Esempio n. 4
0
    def from_parts(self, host='localhost', data='dummy', skip=False, ntimes=1):
        hostlist = self._input_to_list(host)
        datalist = self._input_to_list(data)
        skiplist = self._input_to_list(skip)
        if len(hostlist) is not len(datalist) or len(hostlist) is not len(
                skiplist) or len(hostlist) is not ntimes:
            print 'Length of parts is not equal. Will expand to max length given.'
            maxval = max(len(hostlist), len(datalist), len(skiplist), ntimes)
            lastval = hostlist[-1]
            if len(hostlist) is not maxval:
                for x in range(len(hostlist), maxval):
                    hostlist.append(lastval)

            lastval = datalist[-1]
            if len(datalist) is not maxval:
                for x in range(len(datalist), maxval):
                    datalist.append(lastval)

            lastval = skiplist[-1]
            if len(skiplist) is not maxval:
                for x in range(len(skiplist), maxval):
                    skiplist.append(lastval)
        prodlist = []
        for h, f, z in zip(hostlist, datalist, skiplist):
            prodlist.append(DataProduct(h, f, z))
        self._set_data(prodlist)
Esempio n. 5
0
    def expand(self, number, hostlist=None, filelist=None):
        if hostlist:
            if len(hostlist) != number:
                print 'Error: length of hostlist should correspond to number of expansions'
                exit(1)
        else:
            print 'Info: no hostlist given. Will use "localhost" instead'
            hostlist = []
            for item in range(number):
                hostlist.append('localhost')

        if filelist:
            if len(filelist) != number:
                print 'Error: length of hostlist should correspond to number of expansions'
                exit(1)
        else:
            print 'Info: no filelist given. Will use "dummy" instead'
            filelist = []
            for item in range(number):
                filelist.append('dummy')

        prodlist = []
        for h, f in zip(hostlist, filelist):
            prodlist.append(DataProduct(h, f, False))

        self._set_data(prodlist)
Esempio n. 6
0
def _create_mapfile_from_folder(folder):
    maps = DataMap([])
    measurements = os.listdir(folder)
    measurements.sort()
    for ms in measurements:
        maps.data.append(DataProduct('localhost', folder + '/' + ms, False))
    return maps
def plugin_main(args, **kwargs):
    """
    Prunes entries from a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to trim
    prune_str : str
        Entries starting with this string will be removed.

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    prune_str = kwargs['prune_str'].lower()
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    prunelen = len(prune_str)

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if item.file[:prunelen].lower() != prune_str:
            map_out.data.append(DataProduct(item.host, item.file, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile for selfcal images (assuming standard naming conventions)

    Parameters
    ----------
    selfcal_dir : str
        Full path of selfcal directory
    hosts : list or str
        List of hosts/nodes. May be given as a list or as a string (e.g.,
        '[host1, host2]'
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    selfcal_dir = kwargs['selfcal_dir']
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    if os.path.exists(selfcal_dir):
        selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-MFS-image.fits'))
        tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-MFS-image.fits'))
        if len(tec_iter_images) == 0:
            tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-MFS-image.fits'))
        selfcal_images += tec_iter_images
        selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-MFS-image.fits'))
        selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-MFS-image.fits'))
        if len(selfcal_images) == 0:
            selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-image.fits'))
            tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-image.fits'))
            if len(tec_iter_images) == 0:
                tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-image.fits'))
            selfcal_images += tec_iter_images
            selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-image.fits'))
            selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-image.fits'))
        selfcal_images.sort()
    else:
        selfcal_images = []

    # Save image list as a string to the output mapfile
    image_list = '[{0}]'.format(','.join(selfcal_images))
    map_out = DataMap([])
    map_out.data.append(DataProduct(hosts[0], image_list, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 9
0
 def _set_file(self, data):
     try:
         if isinstance(data, list):
             self.file = data
         if isinstance(data, DataProduct):
             self._from_dataproduct(data)
         if isinstance(data, DataMap):
             self._from_datamap(data)
     except TypeError:
         raise DataProduct('No known method to set a filelist from %s' % str(file))
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by compressing input mapfile items into one item

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    list_format : bool, optional
        If True, the compreseed item will use a Python list format (e.g.,
        '[file1, file2, ...]'. If False, it will be a space-separated list (e.g.,
        'file1 file2 ...'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'list_format' in kwargs:
        list_format = kwargs['list_format']
    else:
        list_format = True
    if type(list_format) is str:
        if list_format.lower() == 'true':
            list_format = True
        else:
            list_format = False

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])
    map_in.iterator = DataMap.SkipIterator
    file_list = [item.file for item in map_in]
    if list_format:
        newlist = '[{0}]'.format(','.join(file_list))
    else:
        newlist = '{0}'.format(' '.join(file_list))

    # Just assign host of first file to compressed file
    hosts = [item.host for item in map_in]
    map_out.data.append(DataProduct(hosts[0], newlist, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 11
0
def _create_mapfile_ato(inmap):
    maps = DataMap([])
    mapsin = DataMap.load(inmap)
    mapsin.iterator = DataMap.SkipIterator
    newlist = ''
    for i, item in enumerate(mapsin):
        newlist = newlist + item.file + ','
    newlist = newlist.rstrip(',')
    newlist = '[' + newlist + ']'
    maps.data.append(DataProduct('localhost', newlist, False))
    return maps
Esempio n. 12
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile for list of files

    Parameters
    ----------
    files : list or str
        List of files or mapfile with such a list as the only entry. May be
        given as a list of strings or as a string (e.g.,
        '[s1.skymodel, s2.skymodel]'
    hosts : list or str
        List of hosts/nodes. May be given as a list or as a string (e.g.,
        '[host1, host2]'
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    if type(kwargs['files']) is str:
        try:
            # Check if input is mapfile containing list as a string
            map_in = DataMap.load(kwargs['files'])
            in_files = [item.file for item in map_in]
            files = []
            for f in in_files:
                files += f.strip('[]').split(',')
        except:
            files = kwargs['files']
            files = files.strip('[]').split(',')
        files = [f.strip() for f in files]
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    for i in range(len(files) - len(hosts)):
        hosts.append(hosts[i])

    map_out = DataMap([])
    for h, f in zip(hosts, files):
        map_out.data.append(DataProduct(h, f, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 13
0
def _create_mapfile_pythonlist(folder):
    maps = DataMap([])
    measurements = os.listdir(folder)
    measurements.sort()
    msfull = ''
    for ms in measurements:
        msfull = msfull + os.path.join(folder, ms) + ','
    msfull = msfull.rstrip(',')
    msfull = '[' + msfull + ']'
    maps.data.append(DataProduct('localhost', msfull, False))
    return maps
Esempio n. 14
0
def plugin_main(args, **kwargs):
    """
    Trims a string from filenames in a mapfile

    Note that everything from the last instance of the matching string to the
    end is trimmed.

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to trim
    trim_str : str
        String to remove
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    counter : int
        If counter is greater than 0, replace "image32" with "image42". This is
        a special argument for facetselfcal looping only

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    trim_str = kwargs['trim']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'counter' in kwargs:
        counter = int(kwargs['counter'])
    else:
        counter = 0

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        index = item.file.rfind(trim_str)
        if index >= 0:
            item_trim = item.file[:index]
            if counter > 0:
                item_trim = item_trim.replace('image32', 'image42')
            map_out.data.append(DataProduct(item.host, item_trim,
                item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 15
0
    def _set_file(self, data):
        try:
            # Try parsing as a list
            if isinstance(data, list):
                self.file = data
            if isinstance(data, DataProduct):
                self._from_dataproduct(data)
            if isinstance(data, DataMap):
                self._from_datamap(data)

        except TypeError:
            raise (DataProduct("No known method to set a filelist from %s" %
                               str(file)))
def _create_mapfile_list(folder):
    maps = DataMap([])
    measurements = os.listdir(folder)
    measurements.sort()
    msfulll = []
    msfull = ''
    for ms in measurements:
        msfulll.append(os.path.join(folder, ms))
        #msfull.append(os.path.join(folder,ms).replace("'",""))
        msfull = msfull + os.path.join(folder, ms) + ' '
    #msfull = msfull.rstrip(',')
    #msfull = '[' + msfull + ']'
    maps.data.append(DataProduct('localhost', msfull, False))
    #maps.file = msfulll
    return maps
Esempio n. 17
0
def _combine_local_map(inmap):
    map_out = DataMap([])
    map_in = DataMap.load(inmap)
    map_in.iterator = DataMap.SkipIterator
    local_files = {}
    for item in map_in:
        if item.host in local_files:
            local_files[item.host] += item.file + ','
        else:
            local_files[item.host] = item.file + ','
    for k, v in local_files.iteritems():
        v = v.rstrip(',')
        v = '[' + v + ']'
        map_out.data.append(DataProduct(k, v, False))
    return map_out
def plugin_main(args, **kwargs):
    """
    Copies each entry of mapfile_in as often as the the length of the corresponding 
    group into a new mapfile

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be expanded. (E.g. with the skymodels for the 
        different groups.)
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Number of groups need
        to be the same as the number of files in mapfile_in. 
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    if len(inmap) != len(groupmap):
        raise ValueError(
            'PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.
            format(kwargs['mapfile_in'], kwargs['mapfile_groups']))

    map_out = DataMap([])
    inindex = 0
    for groupID in xrange(len(groupmap)):
        for fileID in xrange(len(groupmap[groupID].file)):
            map_out.data.append(
                DataProduct(inmap[groupID].host, inmap[groupID].file,
                            (inmap[groupID].skip or groupmap[groupID].skip)))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by uncompressing input mapfile list item into separate items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing list of MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    hosts : str
        List of hosts/nodes. May be given as a list or as a string
        (e.g., '[host1, host2]'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    files = map_in[0].file.strip('[]').split(',')
    files = [f.strip() for f in files]
    for i in range(len(files) - len(hosts)):
        hosts.append(hosts[i])

    for file, host in zip(files, hosts):
        map_out.data.append(DataProduct(host, file, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 20
0
def plugin_main(args, **kwargs):
    """
    Checks a "check" mapfile for values of 'None' and, if found, changes the
    input mapfile "file" to "empty".

    Note: the check and input mapfiles must have the same length

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_check : str
        Name of the mapfile to check for None
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    checkmap = DataMap.load(kwargs['mapfile_check'])

    if len(inmap) != len(checkmap):
        raise ValueError('Input and check mapfiles must have the same length')

    map_out = DataMap([])
    for checkitem, item in zip(checkmap, inmap):
        if checkitem.file.lower() == 'none':
            map_out.data.append(DataProduct(item.host, 'empty', item.skip))
        else:
            map_out.append(item)

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 21
0
def _split_listmap(map_in, number):
    print 'MAP_IN: ', map_in
    map_out = DataMap([])
    for item in map_in:
        filelist = ((item.file.rstrip(']')).lstrip('[')).split(',')
        chunks = [
            filelist[i:i + number] for i in xrange(0, len(filelist), number)
        ]
        print 'FILELIST: ', filelist
        print 'CHUNKS: ', chunks
        for slist in chunks:
            for i, name in enumerate(slist):
                #print 'NAMEB: ', name
                slist[i] = '"' + name + '"'
                #print 'NAMEA: ', name
            print 'SLIST: ', slist
            map_out.data.append(DataProduct(item.host, slist, False))
    return map_out
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by repeating max size in input mapfile items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    # Find max size in x and y
    xsize_list = []
    ysize_list = []
    for item in map_in:
        xsize, ysize = [int(s) for s in item.file.split(' ')]
        xsize_list.append(xsize)
        ysize_list.append(ysize)
    maxsize = '{0} {1}'.format(max(xsize_list), max(ysize_list))

    for item in map_in:
        map_out.data.append(DataProduct(item.host, maxsize, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 23
0
def _calc_edge_chans(inmap, numch, edgeFactor=32):
    """
    Generates a map with strings that can be used as input for NDPPP to flag the edges 
    of the input MSs during (or after) concatenation.
    
    inmap      - MultiDataMap (not mapfilename!) with the files to be concatenated.
    numch      - Number of channels per input file (All files are assumed to have the same number 
                 of channels.)
    edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. 
                 (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end")
    """
    outmap = DataMap([])
    for group in inmap:
        flaglist = []
        for i in xrange(len(group.file)):
            flaglist.extend(range(i*numch,i*numch+numch/edgeFactor))
            flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch))
        outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip))
        print str(flaglist).replace(' ','')
    return outmap
Esempio n. 24
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by filtering input mapfile items into one item (the middle
    one)

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    map_in.iterator = DataMap.SkipIterator
    files = [item.file for item in map_in]
    hosts = [item.host for item in map_in]
    if 'index' in kwargs:
        index = int(kwargs['index'])
    else:
        index = len(files) / 2
    map_out.data.append(DataProduct(hosts[index], files[index], False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 25
0
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append_str : str
        String to append
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        map_out.data.append(
            DataProduct(item.host, item.file + append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 26
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by expanding single input mapfile item into many items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing single item
    mapfile_to_match : str
        Filename of datamap containing multiple items
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_to_match = kwargs['mapfile_to_match']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_match = DataMap.load(mapfile_to_match)
    map_out = DataMap([])

    map_match.iterator = DataMap.SkipIterator
    for item in map_match:
        map_out.data.append(DataProduct(item.host, map_in[0].file, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Takes in mapfile_in, containing many files, and returns only one

    Parameters
    ----------
    mapfile_in : str
        Parmdbs containing phase solutions
    mapfile_dir : str
        mapfile directory
    filename : str
		output filename
    mapfile_comp : str
		target MSs

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    mapfile_in = kwargs['mapfile_in']
    mapfile_comp = kwargs['mapfile_comp']
    filename = kwargs['filename']

    value = DataMap.load(mapfile_in)[
        0]  # this the the single mapfile to be expanded
    n = len(DataMap.load(mapfile_comp))  # these are actual MS files

    map_out = DataMap([])
    for i in range(n):
        map_out.data.append(DataProduct(value.host, value.file, value.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Esempio n. 28
0
def plugin_main(args, **kwargs):
    """
    Takes in mapfile_in, containing many files, and returns only one

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be trimmed back.
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    if len(inmap) < 1:
        print('ERROR: GroupToSingle: mapfile_in has size < 1.')
        return (1)

    map_out = DataMap([])
    map_out.data.append(
        DataProduct(inmap[0].host, inmap[0].file, inmap[0].skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def main(ms_input,
         filename=None,
         mapfile_dir=None,
         numSB=-1,
         hosts=None,
         NDPPPfill=True,
         target_path=None,
         stepname=None,
         mergeLastGroup=False,
         truncateLastSBs=True,
         firstSB=None):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional 
        How many files should go into one frequency group. Values <= 0 mean put 
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files 
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.
    mergeLastGroup, truncateLastSBs : bool, optional
        mergeLastGroup = True, truncateLastSBs = True:
          not allowed
        mergeLastGroup = True, truncateLastSBs = False:
          put the files from the last group that doesn't have SBperGroup subbands 
          into the second last group (which will then have more than SBperGroup entries). 
        mergeLastGroup = False, truncateLastSBs = True:
          ignore last files, that don't make for a full group (not all files are used).
        mergeLastGroup = False, truncateLastSBs = False:
          keep inclomplete last group, or - with NDPPPfill=True - fill
          last group with dummies.      
    firstSB : int, optional
        If set, then reference the grouping of files to this station-subband. As if a file 
        with this station-subband would be included in the input files.
        (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz)

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """

    NDPPPfill = input2bool(NDPPPfill)
    mergeLastGroup = input2bool(mergeLastGroup)
    truncateLastSBs = input2bool(truncateLastSBs)
    firstSB = input2int(firstSB)
    numSB = int(numSB)

    if not filename or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: filename and mapfile_dir are needed!')
    if mergeLastGroup and truncateLastSBs:
        raise ValueError(
            'sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!'
        )


#    if mergeLastGroup:
#        raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError(
            'sort_times_into_freqGroups: type of "ms_input" unknown!')

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print("sort_times_into_freqGroups: Working on", len(ms_list),
          "files (including flagged files).")

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # work only on files selected by a previous step
        if ms.lower() != 'none':
            # use the slower but more reliable way:
            obstable = pt.table(ms, ack=False)
            timestamp = int(round(np.min(obstable.getcol('TIME'))))
            #obstable = pt.table(ms+'::OBSERVATION', ack=False)
            #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
            obstable.close()
            if timestamp in time_groups:
                time_groups[timestamp]['files'].append(ms)
            else:
                time_groups[timestamp] = {
                    'files': [ms],
                    'basename': os.path.splitext(ms)[0]
                }
    print("sort_times_into_freqGroups: found", len(time_groups), "time-groups")

    # sort time-groups by frequency
    timestamps = list(time_groups.keys())
    timestamps.sort()  # not needed now, but later
    first = True
    nchans = 0
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0]
                nchans = sw.col('CHAN_WIDTH')[0].shape[0]
                chwidth = sw.col('CHAN_WIDTH')[0][0]
                freqset = set([freq])
                first = False
            else:
                assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0]
                assert nchans == sw.col('CHAN_WIDTH')[0].shape[0]
                assert chwidth == sw.col('CHAN_WIDTH')[0][0]
                freqset.add(freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = list(
            zip(freqs, time_groups[time]['files']))
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print(
        "sort_times_into_freqGroups: Collected the frequencies for the time-groups"
    )

    freqliste = np.array(list(freqset))
    freqliste.sort()
    freq_width = np.min(freqliste[1:] - freqliste[:-1])
    if file_bandwidth > freq_width:
        raise ValueError(
            "Bandwidth of files is larger than minimum frequency step between two files!"
        )
    if file_bandwidth < (freq_width * 0.51):
        #raise ValueError("Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)")
        logging.warning(
            "Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)"
        )
    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border
    maxfreq = np.max(freqliste) + freq_width * 0.51
    if firstSB != None:
        if freqliste[0] < 100e6:
            # LBA Data
            minfreq = (float(firstSB) / 512. * 100e6) - freq_width / 2.
        elif freqliste[0] > 100e6 and freqliste[0] < 200e6:
            # HBA-Low
            minfreq = (float(firstSB) / 512. * 100e6) + 100e6 - freq_width / 2.
        elif freqliste[0] > 200e6 and freqliste[0] < 300e6:
            # HBA-high
            minfreq = (float(firstSB) / 512. * 100e6) + 200e6 - freq_width / 2.
        else:
            raise ValueError(
                'sort_times_into_freqGroups: Frequency of lowest input data is higher than 300MHz!'
            )
        if np.min(freqliste) < minfreq:
            raise ValueError(
                'sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!'
            )
    else:
        minfreq = np.min(freqliste) - freq_width / 2.
    groupBW = freq_width * numSB
    if groupBW < 1e6 and groupBW > 0:
        print(
            'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!'
        )
    if groupBW < 0:
        # this is the case for concatenating all subbands
        groupBW = maxfreq - minfreq
    truncateLastSBs = input2bool(False)
    NDPPPfill = input2bool(True)
    freqborders = np.arange(minfreq, maxfreq, groupBW)
    if mergeLastGroup:
        freqborders[-1] = maxfreq
    elif truncateLastSBs:
        pass  #nothing to do! # left to make the logic more clear!
    elif not truncateLastSBs and NDPPPfill:
        freqborders = np.append(freqborders, (freqborders[-1] + groupBW))
    elif not truncateLastSBs and not NDPPPfill:
        freqborders = np.append(freqborders, maxfreq)

    freqborders = freqborders[freqborders > (np.min(freqliste) - groupBW)]
    ngroups = len(freqborders) - 1
    if ngroups == 0:
        raise ValueError(
            'sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!'
        )

    print("sort_times_into_freqGroups: Will create", ngroups, "group(s) with",
          numSB, "file(s) each.")

    hostID = 0
    for time in timestamps:
        (freq, fname) = time_groups[time]['freq_names'].pop(0)
        for groupIdx in range(ngroups):
            files = []
            skip_this = True
            filefreqs_low = np.arange(freqborders[groupIdx],
                                      freqborders[groupIdx + 1], freq_width)
            for lower_freq in filefreqs_low:
                if freq > lower_freq and freq < lower_freq + freq_width:
                    assert freq != 1e12
                    files.append(fname)
                    if len(time_groups[time]['freq_names']) > 0:
                        (freq, fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq, fname) = (1e12, 'This_shouldn\'t_show_up')
                    skip_this = False
                elif NDPPPfill:
                    files.append('dummy.ms')
            if not skip_this:
                filemap.append(
                    MultiDataProduct(hosts[hostID % numhosts], files,
                                     skip_this))
                freqID = int(
                    (freqborders[groupIdx] + freqborders[groupIdx + 1]) / 2e6)
                groupname = time_groups[time]['basename'] + '_%Xt_%dMHz.ms' % (
                    time, freqID)
                if type(stepname) is str:
                    groupname += stepname
                if type(target_path) is str:
                    groupname = os.path.join(target_path,
                                             os.path.basename(groupname))
                groupmap.append(
                    DataProduct(hosts[hostID % numhosts], groupname,
                                skip_this))
        orphan_files = len(time_groups[time]['freq_names'])
        if freq < 1e12:
            orphan_files += 1
        if orphan_files > 0:
            print(
                "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt."
                % (orphan_files, time))
    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename + '_groups')
    groupmap.save(groupmapname)
    # genertate map with edge-channels to flag
    flagmap = _calc_edge_chans(filemap, nchans)
    flagmapname = os.path.join(mapfile_dir, filename + '_flags')
    flagmap.save(flagmapname)
    result = {
        'mapfile': filemapname,
        'groupmapfile': groupmapname,
        'flagmapfile': flagmapname
    }
    return (result)
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5,
         image_padding=1.,
         y_axis_stretch=1.):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed. 

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    if not outmapname or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        )
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    print "InitSubtract_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    for band in bands:
        print "InitSubtract_sort_and_compute.py: Working on Band:", band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(
            cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres,
            fieldsize_lowres)
        imsize_high_res_stretch = band.get_optimum_size(
            int(imsize_high_res * y_axis_stretch))
        high_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_res) + " " + str(imsize_high_res_stretch),
                False))
        imsize_low_res_stretch = band.get_optimum_size(
            int(imsize_low_res * y_axis_stretch))
        low_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_res) + " " + str(imsize_low_res_stretch),
                False))
        imsize_high_pad = band.get_optimum_size(
            int(imsize_high_res * image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(
            int(imsize_high_res * image_padding * y_axis_stretch))
        high_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_pad) + " " + str(imsize_high_pad_stretch),
                False))
        imsize_low_pad = band.get_optimum_size(
            int(imsize_low_res * image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(
            int(imsize_low_res * image_padding * y_axis_stretch))
        low_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_pad) + " " + str(imsize_low_pad_stretch),
                False))

    print "InitSubtract_sort_and_compute.py: Computing averaging steps."
    (freqstep, timestep) = bands[0].get_averaging_steps()
    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)
    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir,
                                     outmapname + '_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir,
                                    outmapname + '_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)
    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'high_padsize_mapfile': high_padsize_name,
        'low_padsize_mapfile': low_padsize_name,
        'freqstep': freqstepname,
        'timestep': timestepname
    }
    return result