Ejemplo n.º 1
0
def plugin_main(args, **kwargs):
    """
    Matches a mapfile with one in which the MSs are distributed

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dist : str
        Filename of mapfile with distributed MS files
    mapfile_full : str
        Filename of mapfile with all MS files from which distributed one was
        made
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dist = kwargs['mapfile_dist']
    mapfile_full = kwargs['mapfile_full']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_dist = DataMap.load(mapfile_dist)
    map_dist.iterator = DataMap.SkipIterator
    map_full = DataMap.load(mapfile_full)
    map_full.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    # find matches
    all_files_hosts = [(item.file, item.host) for item in map_full]
    dist_files = [item.file for item in map_dist]
    for i, (f, h) in enumerate(all_files_hosts):
        if f in dist_files:
            map_out.append(DataProduct(h, map_in[i].file, False))

    map_out.save(fileid)
    del (map_in)
    del (map_out)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Matches a mapfile with one in which the MSs are distributed

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dist : str
        Filename of mapfile with distributed MS files
    mapfile_full : str
        Filename of mapfile with all MS files from which distributed one was
        made
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dist = kwargs['mapfile_dist']
    mapfile_full = kwargs['mapfile_full']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_dist = DataMap.load(mapfile_dist)
    map_dist.iterator = DataMap.SkipIterator
    map_full = DataMap.load(mapfile_full)
    map_full.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return  {'mapfile': fileid}

    # find matches
    all_files_hosts = [(item.file, item.host) for item in map_full]
    dist_files = [item.file for item in map_dist]
    for i, (f, h) in enumerate(all_files_hosts):
        if f in dist_files:
            map_out.append(DataProduct(h, map_in[i].file, False))

    map_out.save(fileid)
    del(map_in)
    del(map_out)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with the MSs spread across the full bandwidth

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'num' in kwargs:
        num = int(kwargs['num'])
    else:
        num = 6
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return  {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        # Get the frequency info from the MS file
        sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False)
        freq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if freq in freq_groups:
            freq_groups[freq].append(item.file)
        else:
            freq_groups[freq] = [item.file]
        if not item.host in hosts:
            hosts.append(item.host)

    # select frequencies
    freqs =  freq_groups.keys()
    freqs.sort()
    num_freqs = len(freqs)
    if num > num_freqs:
        print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.'%(num, num_freqs)
        num = num_freqs
    dist_ind = get_distributed_indices(0, num_freqs-1, num)
    selfreqs = [freqs[ind] for ind in dist_ind]
    if len(selfreqs) < 1:
        print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band."
        raise ValueError("Selected fewer than one frequency band.")

    all_files = []
    for selfreq in selfreqs:
        all_files.extend(freq_groups[selfreq])

    # extend the hosts-list
    for i in range(len(all_files)-len(hosts)):
        hosts.append(hosts[i])

    # fill the output-map
    for (host,fname) in zip(hosts,all_files):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del(map_in)
    del(map_out)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 4
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with the MSs spread across the full bandwidth

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    num: int, optional
        Number of frequencies in output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'num' in kwargs:
        num = int(kwargs['num'])
    else:
        num = 6
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        # Get the frequency info from the MS file
        sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
        freq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if freq in freq_groups:
            freq_groups[freq].append(item.file)
        else:
            freq_groups[freq] = [item.file]
        if not item.host in hosts:
            hosts.append(item.host)

    # select frequencies
    freqs = freq_groups.keys()
    freqs.sort()
    num_freqs = len(freqs)
    if num > num_freqs:
        print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.' % (
            num, num_freqs)
        num = num_freqs
    dist_ind = get_distributed_indices(0, num_freqs - 1, num)
    selfreqs = [freqs[ind] for ind in dist_ind]
    if len(selfreqs) < 1:
        print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band."
        raise ValueError("Selected fewer than one frequency band.")

    all_files = []
    for selfreq in selfreqs:
        all_files.extend(freq_groups[selfreq])

    # extend the hosts-list
    for i in range(len(all_files) - len(hosts)):
        hosts.append(hosts[i])

    # fill the output-map
    for (host, fname) in zip(hosts, all_files):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del (map_in)
    del (map_out)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with only the MSs at the middle Frequency

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    index: int, optional
        Index of the frequency band to use.

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'include' in kwargs:
        include = kwargs['include']
    else:
        include = None
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and  not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!'
        return  {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        if include is not None:
            if include in item.file:
                # Get the frequency info
                sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False)
                freq = int(sw.col('REF_FREQUENCY')[0])
                sw.close()
                if freq in freq_groups:
                    freq_groups[freq].append(item.file)
                else:
                    freq_groups[freq] = [item.file]
                if not item.host in hosts:
                    hosts.append(item.host)
        else:
            # Get the frequency info
            sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False)
            freq = int(sw.col('REF_FREQUENCY')[0])
            sw.close()
            if freq in freq_groups:
                freq_groups[freq].append(item.file)
            else:
                freq_groups[freq] = [item.file]
            if not item.host in hosts:
                hosts.append(item.host)

    # find maximum number of files per frequency-group
    maxfiles = max([len(group) for group in freq_groups.values()])
    # find the center-frequency
    freqs =  freq_groups.keys()
    freqs.sort()
    selfreq = freqs[len(freqs)/2]
    if 'index' in kwargs:
        selfreq = int(kwargs['index'])
    else:
        # make sure that chosen frequncy has maxfiles entries
        while len(freq_groups[selfreq]) < maxfiles:
            freqs.remove(selfreq)
            selfreq = freqs[len(freqs)/2]
    # extend the hosts-list
    for i in range(len(freq_groups[selfreq])-len(hosts)):
        hosts.append(hosts[i])
    # fill the output-map
    for (host,fname) in zip(hosts,freq_groups[selfreq]):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del(map_in)
    del(map_out)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 6
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with only the MSs at the middle Frequency

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    index: int, optional
        Index of the frequency band to use.

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'include' in kwargs:
        include = kwargs['include']
    else:
        include = None
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        if include is not None:
            if include in item.file:
                # Get the frequency info
                sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
                freq = int(sw.col('REF_FREQUENCY')[0])
                sw.close()
                if freq in freq_groups:
                    freq_groups[freq].append(item.file)
                else:
                    freq_groups[freq] = [item.file]
                if not item.host in hosts:
                    hosts.append(item.host)
        else:
            # Get the frequency info
            sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
            freq = int(sw.col('REF_FREQUENCY')[0])
            sw.close()
            if freq in freq_groups:
                freq_groups[freq].append(item.file)
            else:
                freq_groups[freq] = [item.file]
            if not item.host in hosts:
                hosts.append(item.host)

    # find maximum number of files per frequency-group
    maxfiles = max([len(group) for group in freq_groups.values()])
    # find the center-frequency
    freqs = freq_groups.keys()
    freqs.sort()
    selfreq = freqs[len(freqs) / 2]
    if 'index' in kwargs:
        selfreq = int(kwargs['index'])
    else:
        # make sure that chosen frequncy has maxfiles entries
        while len(freq_groups[selfreq]) < maxfiles:
            freqs.remove(selfreq)
            selfreq = freqs[len(freqs) / 2]
    # extend the hosts-list
    for i in range(len(freq_groups[selfreq]) - len(hosts)):
        hosts.append(hosts[i])
    # fill the output-map
    for (host, fname) in zip(hosts, freq_groups[selfreq]):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del (map_in)
    del (map_out)
    result = {'mapfile': fileid}

    return result