Ejemplo n.º 1
0
def plugin_main(args, **kwargs):
    """
    Matchs the hosts in one datamap with those in another

    Parameters
    ----------
    mapfile_in : str, optional
        Filename of datamap to adjust
    mapfile_to_match : str, optional
        Filename of datamap to match

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_to_match = kwargs['mapfile_to_match']

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_to_match = DataMap.load(mapfile_to_match)
    map_to_match.iterator = DataMap.SkipIterator

    hosts_to_match = []
    for item in map_to_match:
        hosts_to_match.append(item.host)

    for item, host in zip(map_in, hosts_to_match):
        item.host = host

    map_in.save(mapfile_in)
Ejemplo n.º 2
0
    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed 
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.info(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])
        source_list_map = DataMap.load(self.inputs['source_list_map_path'])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(
                 input_map, slice_paths_map, assoc_theta,
                 source_list_map)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                    slice_paths_map)
Ejemplo n.º 3
0
    def _load_mapfiles(self):
        """
        Load data map file, instrument map file, and sky map file.
        Update the 'skip' fields in these map files: if 'skip' is True in any
        of the maps, then 'skip' must be set to True in all maps.
        """
        self.logger.debug("Loading map files:"
            "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % (
                self.inputs['args'][0], 
                self.inputs['instrument_mapfile'],
                self.inputs['sky_mapfile']
            )
        )
        self.data_map = DataMap.load(self.inputs['args'][0])
        self.inst_map = DataMap.load(self.inputs['instrument_mapfile'])
        self.sky_map = DataMap.load(self.inputs['sky_mapfile'])

        if not validate_data_maps(self.data_map, self.inst_map, self.sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Update the skip fields of the three maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y, z in zip(self.data_map, self.inst_map, self.sky_map):
            x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip)
        
        return True
 def _get_io_product_specs(self):
     """
     Get input- and output-data product specifications from the
     parset-file, and do some sanity checks.
     """
     dps = self.parset.makeSubset(
         self.parset.fullModuleName('DataProducts') + '.'
     )
     self.input_data = DataMap([
         tuple(os.path.join(location, filename).split(':')) + (skip,)
             for location, filename, skip in zip(
                 dps.getStringVector('Input_Correlated.locations'),
                 dps.getStringVector('Input_Correlated.filenames'),
                 dps.getBoolVector('Input_Correlated.skip'))
     ])
     self.logger.debug("%d Input_Correlated data products specified" %
                       len(self.input_data))
     self.output_data = DataMap([
         tuple(os.path.join(location, filename).split(':')) + (skip,)
             for location, filename, skip in zip(
                 dps.getStringVector('Output_Correlated.locations'),
                 dps.getStringVector('Output_Correlated.filenames'),
                 dps.getBoolVector('Output_Correlated.skip'))
     ])
     self.logger.debug("%d Output_Correlated data products specified" %
                       len(self.output_data))
     # Sanity checks on input- and output data product specifications
     if not validate_data_maps(self.input_data, self.output_data):
         raise PipelineException(
             "Validation of input/output data product specification failed!"
         )
def plugin_main(args, **kwargs):
    """
    Prunes entries from a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to trim
    prune_str : str
        Entries starting with this string will be removed.

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    prune_str = kwargs['prune_str'].lower()
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    prunelen = len(prune_str)

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if item.file[:prunelen].lower() != prune_str:
            map_out.data.append(DataProduct(item.host, item.file, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Copies each entry of mapfile_in as often as the the length of the corresponding 
    group into a new mapfile

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be expanded. (E.g. with the skymodels for the 
        different groups.)
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Number of groups need
        to be the same as the number of files in mapfile_in. 
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    ignore_dummies: str (optional)
        If true, do not count dummy entries when expanding

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    try:
        ignore_dummies = str(kwargs['ignore_dummies'])	# if the user has defined a dummy preference, follow it, otherwise count dummies as usual
        ignore_dummies = ignore_dummies in ['true', 'True', '1', 'T', 't']
    except:
        ignore_dummies = False

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    if len(inmap) != len(groupmap):
        raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups']))

    map_out = DataMap([])
    inindex = 0

    if ignore_dummies:
        for groupID in xrange(len(groupmap)):
            for fileID in xrange(len(groupmap[groupID].file)):
                if (groupmap[groupID].file)[fileID] != 'dummy_entry':
                        map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) ))
    else:
        for groupID in xrange(len(groupmap)):
            for fileID in xrange(len(groupmap[groupID].file)):
                map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) ))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile for selfcal images (assuming standard naming conventions)

    Parameters
    ----------
    selfcal_dir : str
        Full path of selfcal directory
    hosts : list or str
        List of hosts/nodes. May be given as a list or as a string (e.g.,
        '[host1, host2]'
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    selfcal_dir = kwargs['selfcal_dir']
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    if os.path.exists(selfcal_dir):
        selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-MFS-image.fits'))
        tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-MFS-image.fits'))
        if len(tec_iter_images) == 0:
            tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-MFS-image.fits'))
        selfcal_images += tec_iter_images
        selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-MFS-image.fits'))
        selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-MFS-image.fits'))
        if len(selfcal_images) == 0:
            selfcal_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[01]2-image.fits'))
            tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22_iter*-image.fits'))
            if len(tec_iter_images) == 0:
                tec_iter_images = glob.glob(os.path.join(selfcal_dir, '*.wsclean_image22-image.fits'))
            selfcal_images += tec_iter_images
            selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image[3]2-image.fits'))
            selfcal_images += glob.glob(os.path.join(selfcal_dir, '*.wsclean_image42_iter*-image.fits'))
        selfcal_images.sort()
    else:
        selfcal_images = []

    # Save image list as a string to the output mapfile
    image_list = '[{0}]'.format(','.join(selfcal_images))
    map_out = DataMap([])
    map_out.data.append(DataProduct(hosts[0], image_list, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 8
0
 def test_skip_iterator(self):
     data_map = DataMap(self.new_style_map)
     data_map.iterator = DataMap.SkipIterator
     unskipped = [item for item in data_map]
     self.assertEqual(len(unskipped), 2)
     self.assertTrue(all(isinstance(item, DataProduct) for item in unskipped))
     self.assertEqual(unskipped[0].host, 'locus002')
     self.assertEqual(unskipped[0].file, 'L12345_SB102.MS')
Ejemplo n.º 9
0
 def test_tuple_iterator(self):
     data_map = DataMap(self.new_style_map)
     data_map.iterator = DataMap.TupleIterator
     tuples = [item for item in data_map]
     self.assertEqual(len(tuples), 4)
     self.assertTrue(all(isinstance(item, tuple) for item in tuples))
     self.assertTrue(all(len(item) == 2 for item in tuples))
     self.assertEqual(tuples[0], ('locus001', 'L12345_SB101.MS'))
Ejemplo n.º 10
0
    def test_append_item_non_skip(self):
        data_map = DataMap(self.new_style_map)
        data_map.append(("host","file", False))

        data_map.iterator = DataMap.TupleIterator
        tuples = [item for item in data_map]
        self.assertEqual(len(tuples), 5)
        self.assertTrue(all(isinstance(item, tuple) for item in tuples))
        self.assertTrue(all(len(item) == 2 for item in tuples))
        self.assertEqual(tuples[-1], ('host', 'file'))
Ejemplo n.º 11
0
 def __init__(self):
     """
     Initialize our data members.
     """
     super(bbs_reducer, self).__init__()
     self.bbs_map = list()
     self.jobs = list()
     self.data_map = DataMap()
     self.inst_map = DataMap()
     self.sky_map = DataMap()
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append : str
        String to append
    append_index : bool
        If True, append a unique index to each file
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']

    if 'append_index' in kwargs:
        append_index = kwargs['append_index']
        if type(append_index) is str:
            if append_index.lower() == 'true':
                append_index = True
            else:
                append_index = False
    else:
        append_index = False

    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if append_index:
            map_out.data.append(DataProduct(item.host, item.file+append_str+'_{}'.format(i), item.skip))
        else:
            map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 13
0
 def __init__(self):
     """
     Initialize member variables and call superclass init function
     """
     control.__init__(self)
     self.input_data = DataMap()
     self.target_data = DataMap()
     self.output_data = DataMap()
     self.scratch_directory = None
     self.parset_dir = None
     self.mapfile_dir = None
def plugin_main(args, **kwargs):
    """
    Makes a mapfile for list of files

    Parameters
    ----------
    files : list or str
        List of files or mapfile with such a list as the only entry. May be
        given as a list of strings or as a string (e.g.,
        '[s1.skymodel, s2.skymodel]'
    hosts : list or str
        List of hosts/nodes. May be given as a list or as a string (e.g.,
        '[host1, host2]'
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    if type(kwargs['files']) is str:
        try:
            # Check if input is mapfile containing list as a string
            map_in = DataMap.load(kwargs['files'])
            in_files = [item.file for item in map_in]
            files = []
            for f in in_files:
                files += f.strip('[]').split(',')
        except:
            files = kwargs['files']
            files = files.strip('[]').split(',')
        files = [f.strip() for f in files]
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    for i in range(len(files)-len(hosts)):
        hosts.append(hosts[i])

    map_out = DataMap([])
    for h, f in zip(hosts, files):
        map_out.data.append(DataProduct(h, f, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 15
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by compressing input mapfile items into one item

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    list_format : bool, optional
        If True, the compreseed item will use a Python list format (e.g.,
        '[file1, file2, ...]'. If False, it will be a space-separated list (e.g.,
        'file1 file2 ...'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'list_format' in kwargs:
        list_format = kwargs['list_format']
    else:
        list_format = True
    if type(list_format) is str:
        if list_format.lower() == 'true':
            list_format = True
        else:
            list_format = False

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])
    map_in.iterator = DataMap.SkipIterator
    file_list = [item.file for item in map_in]
    if list_format:
        newlist = '[{0}]'.format(','.join(file_list))
    else:
        newlist = '{0}'.format(' '.join(file_list))

    # Just assign host of first file to compressed file
    hosts = [item.host for item in map_in]
    map_out.data.append(DataProduct(hosts[0], newlist, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Trims a string from filenames in a mapfile

    Note that everything from the last instance of the matching string to the
    end is trimmed.

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to trim
    trim_str : str
        String to remove
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    counter : int
        If counter is greater than 0, replace "image32" with "image42". This is
        a special argument for facetselfcal looping only

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    trim_str = kwargs['trim']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'counter' in kwargs:
        counter = int(kwargs['counter'])
    else:
        counter = 0

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        index = item.file.rfind(trim_str)
        if index >= 0:
            item_trim = item.file[:index]
            if counter > 0:
                item_trim = item_trim.replace('image32', 'image42')
            map_out.data.append(DataProduct(item.host, item_trim,
                item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 17
0
    def test_append_item_skip(self):
        data_map = DataMap(self.new_style_map)
        data_map.append(("host","file", True))

        data_map.iterator = DataMap.SkipIterator
        dataProducts = [item for item in data_map]
        # default contains 2 nonskipped items
        self.assertEqual(len(dataProducts), 2) 
        self.assertTrue(all(isinstance(item, DataProduct) 
                        for item in dataProducts))
        # The map already contains 2 skipped items, the final item is tested 
        # here
        self.assertEqual(dataProducts[-1].host, 'locus004')
        self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS')
Ejemplo n.º 18
0
def plugin_main(args, **kwargs):
    fileid = kwargs['mapfile_in']
    datamap = DataMap.load(fileid)
    hdf5File = os.path.join(kwargs['hdf5_dir'],kwargs['hdf5file'])
    if kwargs.has_key('instrument'):
        instrument = kwargs['instrument']
    else:
        instrument = '/instrument'
    if kwargs.has_key('compression'):
        compression = int(kwargs['compression'])
    else:
        compression = 5
    if kwargs.has_key('solset'):
        solsetName = kwargs['solset']
    else:
        solsetName = None


    # Check is all the necessary files are available
    antennaFile = os.path.join(datamap[0].file,'ANTENNA')
    if not os.path.isdir(antennaFile):
        logging.critical('Missing ANTENNA table.')
        sys.exit(1)
    fieldFile = os.path.join(datamap[0].file,'FIELD')
    if not os.path.isdir(fieldFile):
        logging.critical('Missing FIELD table.')
        sys.exit(1)
    skydbFile = os.path.join(datamap[0].file,'sky')
    if not os.path.isdir(skydbFile):
        logging.critical('Missing sky table.')
        sys.exit(1)
        
    #generate list of parmDB-filenames
    parmDBnames = [ MS.file+instrument for MS in datamap ]

    #create and fill the hdf5-file:
    solset = parmDBs2h5parm(hdf5File, parmDBnames, antennaFile, fieldFile, skydbFile, compression=compression, solsetName=solsetName)

    # Add CREATE entry to history 
    h5parmDB = h5parm(hdf5File, readonly = False)
    soltabs = h5parmDB.getSoltabs(solset=solset)
    for st in soltabs:
        sw = solWriter(soltabs[st])
        sw.addHistory('CREATE (by PipelineStep_losotoImporter from %s / %s - %s)' % (os.path.abspath(''), 
                                   os.path.basename(parmDBnames[0]), os.path.basename(parmDBnames[-1]) ) )
    h5parmDB.close()

    #generate mapfile and wrap up
    mapfileentry = {}
    mapfileentry['host'] = 'localhost'
    mapfileentry['file'] = hdf5File
    mapfileentry['skip'] = False            
    outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename'])
    outmap = open(outfileid, 'w')
    outmap.write(repr([mapfileentry]))
    outmap.close()
    result = {}
    result['mapfile'] = outfileid
    return result
Ejemplo n.º 19
0
 def __init__(self):
     """
     Constructor sets the python command used to call node scripts
     """
     super(copier, self).__init__(
         "python {0}".format(self.__file__.replace('master', 'nodes')))
     self.source_map = DataMap()
     self.target_map = DataMap()
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by uncompressing input mapfile list item into separate items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing list of MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    hosts : str
        List of hosts/nodes. May be given as a list or as a string
        (e.g., '[host1, host2]'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    files = map_in[0].file.strip('[]').split(',')
    files = [f.strip() for f in files]
    for i in range(len(files)-len(hosts)):
        hosts.append(hosts[i])

    for file, host in zip(files, hosts):
        map_out.data.append(DataProduct(host, file, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Copies each entry of mapfile_in as often as the the length of the corresponding
    group into a new mapfile

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be expanded. (E.g. with the skymodels for the
        different groups.)
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Number of groups need
        to be the same as the number of files in mapfile_in.
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    if len(inmap) != len(groupmap):
        raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups']))

    map_out = DataMap([])
    inindex = 0
    for groupID in xrange(len(groupmap)):
        for fileID in xrange(len(groupmap[groupID].file)):
            map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) ))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 22
0
def _create_mapfile_ato(inmap):
    maps = DataMap([])
    mapsin = DataMap.load(inmap)
    mapsin.iterator = DataMap.SkipIterator
    newlist = ''
    for i, item in enumerate(mapsin):
        newlist = newlist + item.file + ','
    newlist = newlist.rstrip(',')
    newlist = '[' + newlist + ']'
    maps.data.append(DataProduct('localhost', newlist, False))
    return maps
def _calc_edge_chans(inmap, numch, edgeFactor=32):
    """
    Generates a map with strings that can be used as input for NDPPP to flag the edges 
    of the input MSs during (or after) concatenation.
    
    inmap      - MultiDataMap (not mapfilename!) with the files to be concatenated.
    numch      - Number of channels per input file (All files are assumed to have the same number 
                 of channels.)
    edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. 
                 (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end")
    """
    outmap = DataMap([])
    for group in inmap:
        flaglist = []
        for i in xrange(len(group.file)):
            flaglist.extend(range(i*numch,i*numch+numch/edgeFactor))
            flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch))
        outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip))
        print str(flaglist).replace(' ','')
    return outmap
Ejemplo n.º 24
0
    def go(self):
        # TODO: Remove dependency on mapfile_dir 
        self.logger.info("Starting copier run")
        super(copier, self).go()

        # Load data from mapfiles
        self.source_map = DataMap.load(self.inputs['mapfile_source'])
        self.target_map = DataMap.load(self.inputs['mapfile_target'])

        # validate data in mapfiles
        if not self._validate_mapfiles(self.inputs['allow_rename']):
            return 1

        # Run the compute nodes with the node specific mapfiles
        for source, target in zip(self.source_map, self.target_map):
            args = [source.host, source.file, target.file]
            self.append_job(target.host, args)

        # start the jobs, return the exit status.
        return self.run_jobs()
Ejemplo n.º 25
0
def update_state(dir_input):
    """
    Updates the paths in mapfiles or state files

    Parameters
    ----------
    dir_input : str
        Directory containing files to update

    """
    file_list = glob.glob(os.path.join(dir_input, '*'))

    if dir_input.endswith('mapfiles'):
        # Assume path is a pipeline mapfiles directory. In this case, we can
        # simply substitute the new working_dir for the old one in each of the
        # mapfiles
        working_dir = dir_input.split('results/')[0]
        for f in file_list:
            map = DataMap.load(f)
            for item in map:
                if '/' in item.file:
                    old_working_dir = item.file.split('results/')[0]
                    item.file = item.file.replace(old_working_dir, working_dir)
            map.save(f)
    elif dir_input.endswith('state'):
        # Assume path is the Factor state directory. In this case, we can try to
        # load files as pickled state files and look for paths inside. If found,
        # substitute new working_dir for the old one
        working_dir = os.path.dirname(dir_input)
        for f in file_list:
            try:
                with open(f, "rb") as fp:
                    d = pickle.load(fp)
                    for k, v in d.iteritems():
                        if type(v) is str:
                            if k == 'working_dir':
                                d[k] = working_dir
                            if '/' in v:
                                for infix in ['results/', 'state/', 'chunks/']:
                                    parts = v.split(infix)
                                    if len(parts) > 1:
                                        d[k] = os.path.join(working_dir, infix, parts[-1])
                        elif type(v) is list:
                            for i, l in enumerate(v):
                                if '/' in l:
                                    for infix in ['results/', 'state/', 'chunks/']:
                                        parts = l.split(infix)
                                        if len(parts) > 1:
                                            v[i] = os.path.join(working_dir, infix, parts[-1])
                            d[k] = v
                with open(f, "w") as fp:
                    pickle.dump(d, fp)
            except:
                pass
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by filtering input mapfile items into one item (the middle
    one)

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    map_in.iterator = DataMap.SkipIterator
    files = [item.file for item in map_in]
    hosts = [item.host for item in map_in]
    if 'index' in kwargs:
        index = int(kwargs['index'])
    else:
        index = len(files)/2
    map_out.data.append(DataProduct(hosts[index], files[index], False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 27
0
    def _get_io_product_specs(self):
        """
        Get input- and output-data product specifications from the
        parset-file, and do some sanity checks.
        """
        dps = self.parset.makeSubset(
            self.parset.fullModuleName('DataProducts') + '.'
        )
        # convert input dataproducts from parset value to DataMap
        self.input_data = DataMap([
            tuple(os.path.join(location, filename).split(':')) + (skip,)
                for location, filename, skip in zip(
                    dps.getStringVector('Input_Correlated.locations'),
                    dps.getStringVector('Input_Correlated.filenames'),
                    dps.getBoolVector('Input_Correlated.skip'))
        ])
        self.logger.debug("%d Input_Correlated data products specified" %
                          len(self.input_data))

        self.output_data = DataMap([
            tuple(os.path.join(location, filename).split(':')) + (skip,)
                for location, filename, skip in zip(
                    dps.getStringVector('Output_SkyImage.locations'),
                    dps.getStringVector('Output_SkyImage.filenames'),
                    dps.getBoolVector('Output_SkyImage.skip'))
        ])
        self.logger.debug("%d Output_SkyImage data products specified" %
                          len(self.output_data))

        # # Sanity checks on input- and output data product specifications
        # if not validate_data_maps(self.input_data, self.output_data):
        #    raise PipelineException(
        #        "Validation of input/output data product specification failed!"
        #    )#Turned off untill DataMap is extended..

        # Target data is basically scratch data, consisting of one concatenated
        # MS per image. It must be stored on the same host as the final image.
        self.target_data = copy.deepcopy(self.output_data)

        for idx, item in enumerate(self.target_data):
            item.file = os.path.join(self.scratch_directory, 'ms_per_image_%d' % idx, 'concat.ms')
Ejemplo n.º 28
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by expanding single input mapfile item into many items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing single item
    mapfile_to_match : str
        Filename of datamap containing multiple items
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_to_match = kwargs['mapfile_to_match']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_match = DataMap.load(mapfile_to_match)
    map_out = DataMap([])

    map_match.iterator = DataMap.SkipIterator
    for item in map_match:
        map_out.data.append(DataProduct(item.host, map_in[0].file, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append_str : str
        String to append
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Takes in mapfile_in, containing many files, and returns only one

    Parameters
    ----------
    mapfile_in : str
        Parmdbs containing phase solutions
    mapfile_dir : str
        mapfile directory
    filename : str
		output filename
    mapfile_comp : str
		target MSs

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    mapfile_in = kwargs['mapfile_in']
    mapfile_comp = kwargs['mapfile_comp']
    filename = kwargs['filename']

    value = DataMap.load(mapfile_in)[0]		# this the the single mapfile to be expanded
    n = len(DataMap.load(mapfile_comp))	# these are actual MS files

    map_out = DataMap([])
    for i in range(n):
        map_out.data.append(DataProduct(value.host,value.file, value.skip ))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 31
0
def plugin_main(args, **kwargs):
    """
    Find the measurement set closest to a given solution table, suitable for reading station names.

    Parameters
    ----------
    mapfile_ms : str
        Mapfile of the measurement sets
    mapfile_grpd : str
        Mapfile of the (grouped) calibration tables
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """

    mapfile_dir = kwargs['mapfile_dir']
    mapfile_in = kwargs['mapfile_ms']
    mapfile_grpd = kwargs['mapfile_grpd']
    filename = kwargs['filename']

    result = {}
    data = DataMap.load(mapfile_in)  # these are actual MS files
    groups = DataMap.load(mapfile_grpd)  # these are probably parmdbs

    datalist = [data[i].file for i in xrange(len(data))]
    grp_list = [groups[i].file for i in xrange(len(groups))]

    frequency_groups = []
    map_out = DataMap([])
    map_out_addIS = DataMap([])
    map_out_addIS_tables = DataMap([])
    tomap_addIS = 0

    for grp_file in grp_list:
        table = pyrap.tables.table(grp_file, readonly=True)
        frequency_range = list(np.zeros(2))
        frequency_range[0] = float(table.getcol('STARTX')[0])
        frequency_range[1] = float(table.getcol('ENDX')[0])
        frequency_groups.append(frequency_range)
        table.close()
        pass

    for msID, ms_file in enumerate(datalist):
        table = pyrap.tables.table(ms_file + '/SPECTRAL_WINDOW', readonly=True)
        ref_frequency = float(table.getcol('REF_FREQUENCY')[0])
        table.close()
        for groupID, freq_group in enumerate(frequency_groups):
            if freq_group[0] <= ref_frequency <= freq_group[1]:
                map_out.data.append(
                    DataProduct(groups[groupID].host, groups[groupID].file,
                                (groups[groupID].skip or data[msID].skip)))
                if tomap_addIS <= groupID:
                    map_out_addIS.data.append(
                        DataProduct(data[msID].host, data[msID].file,
                                    (data[msID].skip or groups[groupID].skip)))
                    map_out_addIS_tables.data.append(
                        DataProduct(groups[groupID].host, groups[groupID].file,
                                    (groups[groupID].skip or data[msID].skip)))
                    tomap_addIS += 1
                    pass
                break
                pass
            else:
                continue
                pass
            pass
        pass

    if len(data) != len(map_out):
        raise ValueError(
            'PipelineStep_FindCorrespondingMS: length of mapfiles mismatch. Probably there are some phase solution tables missing.'
        )

    fileid = os.path.join(mapfile_dir, filename + '_parmdbs')
    map_out.save(fileid)
    result['parmdbs'] = fileid

    fileid = os.path.join(mapfile_dir, filename + '_tables')
    map_out_addIS_tables.save(fileid)
    result['tables'] = fileid

    fileid = os.path.join(mapfile_dir, filename)
    map_out_addIS.save(fileid)
    result['mapfile'] = fileid

    return result
    pass
Ejemplo n.º 32
0
    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        job_directory = self.config.get("layout", "job_directory")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python3 %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)  # needed for subsets in sb list

        globalfs = self.config.has_option(
            "remote", "globalfs") and self.config.getboolean(
                "remote", "globalfs")

        for idx_sb_group, item in enumerate(output_map):
            # create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}.map".format(idx_sb_group))

            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            # skip the current step if skip is set, cannot use skip due to
            # the enumerate: dependency on the index in the map
            if item.skip == True:
                # assure that the mapfile is correct
                paths_to_image_mapfiles.append(tuple([item.host, [], True]))
                continue

            # save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(
                self.inputs['working_directory'],
                "imager_prepare_{0}".format(idx_sb_group))

            arguments = [
                self.environment, self.inputs['parset'], working_dir,
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['do_rficonsole'], self.inputs['add_beam_tables'],
                globalfs
            ]

            jobs.append(
                ComputeJob(item.host,
                           node_command,
                           arguments,
                           resources={"cores": self.inputs['nthreads']}))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  # if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        # scan the return dict for completed key
        # loop over the potential jobs including the skipped
        # If we have a skipped item, add the item to the slices with skip set
        jobs_idx = 0
        for item in concat_ms:
            # If this is an item that is skipped via the skip parameter in
            # the parset, append a skipped
            if item.skip:
                slices.append(tuple([item.host, [], True]))
                continue

            # we cannot use the skip iterator so we need to manually get the
            # current job from the list
            job = jobs[jobs_idx]

            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, [], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

            # we have a non skipped workitem, increase the job idx
            jobs_idx += 1

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        # map with actual input mss.
        self._store_data_map(self.inputs["ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (used) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['ms_per_image_mapfile'] = \
            self.inputs["ms_per_image_mapfile"]
        return 0
Ejemplo n.º 33
0
    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(imager_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                        "the supplied input_ms mapfile and sourcedb mapfile"
                        "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)        
        for w, x, y in zip(input_map, output_map, sourcedb_map):
            w.skip = x.skip = y.skip = (
                w.skip or x.skip or y.skip
            )

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for idx, (measurement_item, source_item) in enumerate(zip(input_map, sourcedb_map)):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host , measurement_path = measurement_item.host, measurement_item.file
            host2 , sourcedb_path = source_item.host, source_item.file

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'], "imager_awimager_{0}".format(idx))

            # construct and save the output name
            arguments = [self.inputs['executable'],
                         self.environment,
                         self.inputs['parset'],
                         working_dir,
                         # put in unique dir, as node script wants to put private .par files next to it
                         "%s_%s/image" % (self.inputs['output_image'], idx), 
                         measurement_path,
                         sourcedb_path,
                         self.inputs['mask_patch_size'],
                         self.inputs['autogenerate_parameters'],
                         self.inputs['specify_fov'],
                         self.inputs['fov'],
                         ]

            jobs.append(ComputeJob(host, node_command, arguments,
                    resources={
                        "cores": self.inputs['nthreads']
                    }))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in  zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                    "None of the starter awimager run finished correct")
            self.logger.error(
                    "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.warn("Failed awimager node run detected. continue with"
                              "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0
Ejemplo n.º 34
0
    # Check options
    if len(args) != 1:
        opt.print_help()
        sys.exit()

    # first argument: pattern for measurement-sets
    inMSs = glob.glob(args[0])
    if options.randomize:
        random.shuffle(inMSs)
    if options.decimate:
        for i in range((len(inMSs) - 1), -1, -10):
            inMSs.pop(i)

    ergdict = main(inMSs,
                   options.filename,
                   '.',
                   numSB=options.numbands,
                   hosts=None,
                   NDPPPfill=True)

    groupmap = DataMap.load(ergdict['groupmapfile'])
    filemap = MultiDataMap.load(ergdict['mapfile'])
    print "len(groupmap) : %d , len(filemap) : %d " % (len(groupmap),
                                                       len(filemap))
    if len(groupmap) != len(filemap):
        print "groupmap and filemap have different length!"
        sys.exit(1)
    for i in xrange(len(groupmap)):
        print "Group \"%s\" has %d entries." % (groupmap[i].file,
                                                len(filemap[i].file))
Ejemplo n.º 35
0
def main(ms_input,
         filename=None,
         mapfile_dir=None,
         numSB=-1,
         enforce_numSB=True,
         hosts=None,
         NDPPPfill=True,
         target_path=None,
         stepname=None,
         nband_pad=0,
         make_dummy_files=False,
         skip_flagged_groups=True):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional
        How many files should go into one frequency group. Values <= 0 mean put
        all files of the same time-step into one group.
        default = -1
    enforce_numSB : bool, optional
        If True and numSB > 0, then add flagged dummy data to ensure that the
        last block has exactly numSB files. If False, then the last block can
        have fewer files (as long as there are no gaps in frequency)
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files
    nband_pad : int, optional
        Add this number of bands of dummy data to the high-frequency end
        of the list
    make_dummy_files : bool, optional
        If True, make MS files for all dummy data
    skip_flagged_groups : bool, optional
        If True, groups that are missing have their skip flag set to True. If
        False, these groups are filled with dummy data and their skip flag set
        to False

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """
    if not filename or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: filename and mapfile_dir are needed!')

    # convert input to needed types
    ms_list = input2strlist(ms_input)
    NDPPPfill = input2bool(NDPPPfill)
    numSB = int(numSB)
    nband_pad = int(nband_pad)
    enforce_numSB = input2bool(enforce_numSB)
    make_dummy_files = input2bool(make_dummy_files)
    skip_flagged_groups = input2bool(skip_flagged_groups)

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on", len(ms_list), "files"

    dirname = os.path.dirname(ms_list[0])

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # use the slower but more reliable way:
        obstable = pt.table(ms, ack=False)
        timestamp = int(round(np.min(obstable.getcol('TIME'))))
        #obstable = pt.table(ms+'::OBSERVATION', ack=False)
        #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
        obstable.close()
        if timestamp in time_groups:
            time_groups[timestamp]['files'].append(ms)
        else:
            time_groups[timestamp] = {
                'files': [ms],
                'basename': os.path.splitext(ms)[0]
            }
    print "sort_times_into_freqGroups: found", len(time_groups), "time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()  # not needed now, but later
    first = True
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                freq_width = sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = freq
                minfreq = freq
                first = False
            else:
                assert freq_width == sw.col('TOTAL_BANDWIDTH')[0]
                maxfreq = max(maxfreq, freq)
                minfreq = min(minfreq, freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,
                                              time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    maxfreq = maxfreq + freq_width / 2.
    minfreq = minfreq - freq_width / 2.
    numFiles = round((maxfreq - minfreq) / freq_width)
    if numSB > 0:
        ngroups = int(np.ceil(numFiles / numSB))
    else:
        ngroups = 1
        numSB = int(numFiles)
    hostID = 0
    for time in timestamps:
        (freq, fname) = time_groups[time]['freq_names'].pop(0)
        nbands = 0
        all_group_files = []
        for fgroup in range(ngroups):
            files = []
            skip_this = True
            for fIdx in range(numSB):
                thisfreq = (fIdx + fgroup * numSB + 1) * freq_width + minfreq
                if freq > thisfreq:
                    if enforce_numSB or thisfreq - freq_width / 2. < maxfreq:
                        files.append('dummy.ms')
                else:
                    files.append(fname)
                    skip_this = False
                    if len(time_groups[time]['freq_names']) > 0:
                        (freq, fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        # Set freq to high value to pad the rest of the group
                        # with dummy data
                        (freq, fname) = (1e12, 'This_shouldn\'t_show_up')

            if fgroup == ngroups - 1:
                # Append dummy data to last frequency group only
                for i in range(nband_pad):
                    files.append('dummy.ms')
            if not skip_this:
                nbands += len(files)

            if make_dummy_files:
                for i, ms in enumerate(files):
                    if ms == 'dummy.ms':
                        # Replace dummy.ms in files list with new filename
                        files[i] = os.path.join(
                            dirname, '{0}_{1}.ms'.format(
                                os.path.splitext(ms)[0],
                                uuid.uuid4().urn.split('-')[-1]))

            if not skip_flagged_groups:
                # Don't set skip flag to True, even if group is missing all files
                if not make_dummy_files:
                    raise ValueError(
                        'skip_flagged_groups cannot be False if make_dummy_files is also False'
                    )
                else:
                    skip_this = False

            filemap.append(
                MultiDataProduct(hosts[hostID % numhosts], files, skip_this))
            groupname = time_groups[time]['basename'] + '_%Xt_%dg.ms' % (
                time, fgroup)
            if type(stepname) is str:
                groupname += stepname
            if type(target_path) is str:
                groupname = os.path.join(target_path,
                                         os.path.basename(groupname))
            groupmap.append(
                DataProduct(hosts[hostID % numhosts], groupname, skip_this))
            hostID += 1
            all_group_files.extend(files)

        assert freq == 1e12

        if make_dummy_files:
            # Find at least one existing ms for this timestamp
            ms_exists = None
            for ms in all_group_files:
                if os.path.exists(ms):
                    ms_exists = ms
                    sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms))
                    ms_exists_ref_freq = sw.getcol('REF_FREQUENCY')[0]
                    sw.close()
                    break

            for i, ms in enumerate(all_group_files):
                if 'dummy' in ms:
                    # Alter SPECTRAL_WINDOW subtable as appropriate to fill gap
                    ref_freq = minfreq + freq_width * (i + 0.5)
                    pt.tableutil.tablecopy(ms_exists, ms)
                    sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms),
                                  readonly=False)
                    chan_freq = sw.getcol(
                        'CHAN_FREQ') - ms_exists_ref_freq + ref_freq
                    sw.putcol('REF_FREQUENCY', ref_freq)
                    sw.putcol('CHAN_FREQ', chan_freq)
                    sw.close()

                    # Flag all data
                    t = pt.table(ms, readonly=False)
                    t.putcol('FLAG_ROW', np.ones(len(t), dtype=bool))
                    f = t.getcol('FLAG')
                    t.putcol('FLAG', np.ones(f.shape, dtype=bool))
                    t.close()

    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename + '_groups')
    groupmap.save(groupmapname)
    result = {
        'mapfile': filemapname,
        'groupmapfile': groupmapname,
        'nbands': nbands
    }
    return result
Ejemplo n.º 36
0
    def go(self):
        """
        Steps:
        
        1. Load and validate the input datamaps
        2. Run the node parts of the recipe  
        3. Validate node output and format the recipe output   
        """
        super(imager_finalize, self).go()
        # *********************************************************************
        # 1. Load the datamaps
        awimager_output_map = DataMap.load(self.inputs["awimager_output_map"])
        raw_ms_per_image_map = DataMap.load(
            self.inputs["raw_ms_per_image_map"])
        sourcelist_map = DataMap.load(self.inputs["sourcelist_map"])
        sourcedb_map = DataMap.load(self.inputs["sourcedb_map"])
        target_mapfile = DataMap.load(self.inputs["target_mapfile"])
        output_image_mapfile = DataMap.load(
            self.inputs["output_image_mapfile"])
        processed_ms_dir = self.inputs["processed_ms_dir"]
        fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"]

        # Align the skip fields
        align_data_maps(awimager_output_map, raw_ms_per_image_map,
                        sourcelist_map, target_mapfile, output_image_mapfile,
                        sourcedb_map)

        # Set the correct iterator
        sourcelist_map.iterator = awimager_output_map.iterator = \
            raw_ms_per_image_map.iterator = target_mapfile.iterator = \
            output_image_mapfile.iterator = sourcedb_map.iterator = \
                DataMap.SkipIterator

        # *********************************************************************
        # 2. Run the node side of the recupe
        command = " python %s" % (self.__file__.replace("master", "nodes"))
        jobs = []
        for (awimager_output_item, raw_ms_per_image_item, sourcelist_item,
             target_item, output_image_item,
             sourcedb_item) in zip(awimager_output_map, raw_ms_per_image_map,
                                   sourcelist_map, target_mapfile,
                                   output_image_mapfile, sourcedb_map):
            # collect the files as argument
            arguments = [
                awimager_output_item.file, raw_ms_per_image_item.file,
                sourcelist_item.file, target_item.file, output_image_item.file,
                self.inputs["minbaseline"], self.inputs["maxbaseline"],
                processed_ms_dir, fillrootimagegroup_exec, self.environment,
                sourcedb_item.file
            ]

            self.logger.info(
                "Starting finalize with the folowing args: {0}".format(
                    arguments))
            jobs.append(ComputeJob(target_item.host, command, arguments))

        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Validate the performance of the node script and assign output
        succesful_run = False
        for (job, output_image_item) in zip(jobs, output_image_mapfile):
            if not "hdf5" in job.results:
                # If the output failed set the skip to True
                output_image_item.skip = True
            else:
                succesful_run = True
                # signal that we have at least a single run finished ok.
                # No need to set skip in this case

        if not succesful_run:
            self.logger.warn("Failed finalizer node run detected")
            return 1

        output_image_mapfile.save(self.inputs['placed_image_mapfile'])
        self.logger.debug(
            "Wrote mapfile containing placed hdf5 images: {0}".format(
                self.inputs['placed_image_mapfile']))
        self.outputs["placed_image_mapfile"] = self.inputs[
            'placed_image_mapfile']

        return 0
Ejemplo n.º 37
0
    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)
        for idx_sb_group, item in enumerate(output_map):
            #create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            job_directory = self.config.get("layout", "job_directory")
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}".format(idx_sb_group))
            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            #save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            arguments = [
                self.environment, self.inputs['parset'],
                self.inputs['working_directory'],
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['add_beam_tables']
            ]

            jobs.append(ComputeJob(item.host, node_command, arguments))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        #scan the return dict for completed key
        for (item, job) in zip(concat_ms, jobs):
            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, ["/Failed"], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        #map with actual input mss.
        self._store_data_map(self.inputs["raw_ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (raw) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['raw_ms_per_image_mapfile'] = \
            self.inputs["raw_ms_per_image_mapfile"]
        return 0
Ejemplo n.º 38
0
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5,
         image_padding=1.,
         y_axis_stretch=1.):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed.

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """

    if not outmapname or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        )
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    bandfreqs = []
    print("InitSubtract_deep_sort_and_compute.py: Putting files into bands.")
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))
        bandfreqs.append(Band(msdict[MSkey]).freq)

    ## min freq gives largest image size for deep image
    bandfreqs = np.array(bandfreqs)
    minfreq = np.min(bandfreqs)
    bandmin = np.argmin(bandfreqs)
    ## need to map the output from wsclean channels to the right frequencies
    ## just put the bands in the right freq order
    wsclean_channum = np.argsort(bandfreqs)
    bands = np.array(bands)
    bands = bands[wsclean_channum]

    #minfreq = 1e9
    #for ib, band in enumerate(bands):
    #if band.freq < minfreq:
    #minfreq = band.freq
    #bandmin = ib

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    nbands = np.int(len(bands) / 10)
    if nbands > 8:
        nchansout_clean1 = np.int(nbands / 4)
    elif nbands > 4:
        nchansout_clean1 = np.int(nbands / 2)
    else:
        nchansout_clean1 = np.int(nbands)
    if nchansout_clean1 < 2:
        nchansout_clean1 = 2

    (freqstep, timestep) = bands[0].get_averaging_steps()
    int_time_sec = bands[
        0].timestep_sec * timestep  # timestep_sec gets added to band object in get_averaging_steps()
    nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg,
                                                  int_time_sec)
    nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg,
                                                 int_time_sec)

    print("InitSubtract_deep_sort_and_compute.py: analyzing data...")
    for band in bands:
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(
            cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres,
            fieldsize_lowres)
        imsize_high_res_stretch = band.get_optimum_size(
            int(imsize_high_res * y_axis_stretch))
        high_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_res) + " " + str(imsize_high_res_stretch),
                False))
        imsize_low_res_stretch = band.get_optimum_size(
            int(imsize_low_res * y_axis_stretch))
        low_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_res) + " " + str(imsize_low_res_stretch),
                False))
        imsize_high_pad = band.get_optimum_size(
            int(imsize_high_res * image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(
            int(imsize_high_res * image_padding * y_axis_stretch))
        high_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_pad) + " " + str(imsize_high_pad_stretch),
                False))
        imsize_low_pad = band.get_optimum_size(
            int(imsize_low_res * image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(
            int(imsize_low_res * image_padding * y_axis_stretch))
        low_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_pad) + " " + str(imsize_low_pad_stretch),
                False))

        if band.freq == minfreq:
            deep_imsize_high_res = imsize_high_res
            deep_imsize_high_res_stretch = imsize_high_res_stretch
            deep_imsize_high_pad = imsize_high_pad
            deep_imsize_high_pad_stretch = imsize_high_pad_stretch
            deep_imsize_low_res = imsize_low_res
            deep_imsize_low_res_stretch = imsize_low_res_stretch
            deep_imsize_low_pad = imsize_low_pad
            deep_imsize_low_pad_stretch = imsize_low_pad_stretch

    deep_high_size_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_high_res) + " " +
            str(deep_imsize_high_res_stretch), False)
    ])
    deep_high_paddedsize_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_high_pad) + " " +
            str(deep_imsize_high_pad_stretch), False)
    ])
    deep_low_size_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_low_res) + " " + str(deep_imsize_low_res_stretch),
            False)
    ])
    deep_low_paddedsize_map = DataMap([
        DataProduct(
            'localhost',
            str(deep_imsize_low_pad) + " " + str(deep_imsize_low_pad_stretch),
            False)
    ])
    nbands_map = DataMap([DataProduct('localhost', str(nbands), False)])
    nchansout_clean1_map = DataMap(
        [DataProduct('localhost', str(nchansout_clean1), False)])

    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    nwavelengths_high_map = DataMap([])
    nwavelengths_low_map = DataMap([])

    for index in range(numfiles):
        # set time and frequency averaging values (in sec and Hz)
        freqstep_map.append(
            DataProduct('localhost', str(freqstep * bands[0].chan_width_hz),
                        False))
        timestep_map.append(
            DataProduct('localhost', str(timestep * bands[0].timestep_sec),
                        False))
    nwavelengths_high_map.append(
        DataProduct('localhost', str(nwavelengths_high), False))
    nwavelengths_low_map.append(
        DataProduct('localhost', str(nwavelengths_low), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)

    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir,
                                     outmapname + '_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir,
                                    outmapname + '_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)

    deep_high_sizename = os.path.join(mapfile_dir,
                                      outmapname + '_deep_high_size')
    deep_high_size_map.save(deep_high_sizename)
    deep_low_sizename = os.path.join(mapfile_dir,
                                     outmapname + '_deep_low_size')
    deep_low_size_map.save(deep_low_sizename)
    deep_high_padsize_name = os.path.join(
        mapfile_dir, outmapname + '_deep_high_padded_size')
    deep_high_paddedsize_map.save(deep_high_padsize_name)
    deep_low_padsize_name = os.path.join(mapfile_dir,
                                         outmapname + '_deep_low_padded_size')
    deep_low_paddedsize_map.save(deep_low_padsize_name)

    nbands_mapname = os.path.join(mapfile_dir, outmapname + '_nbands')
    nbands_map.save(nbands_mapname)
    nchansout_clean1_mapname = os.path.join(mapfile_dir,
                                            outmapname + '_nchansout_clean1')
    nchansout_clean1_map.save(nchansout_clean1_mapname)

    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    nwavelengths_high_name = os.path.join(mapfile_dir,
                                          outmapname + '_nwavelengths_high')
    nwavelengths_high_map.save(nwavelengths_high_name)
    nwavelengths_low_name = os.path.join(mapfile_dir,
                                         outmapname + '_nwavelengths_low')
    nwavelengths_low_map.save(nwavelengths_low_name)

    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'high_padsize_mapfile': high_padsize_name,
        'low_padsize_mapfile': low_padsize_name,
        'deep_high_size_mapfile': deep_high_sizename,
        'deep_low_size_mapfile': deep_low_sizename,
        'deep_high_padsize_mapfile': deep_high_padsize_name,
        'deep_low_padsize_mapfile': deep_low_padsize_name,
        'nbands': nbands_mapname,
        'nchansout_clean1': nchansout_clean1_mapname,
        'freqstep': freqstepname,
        'timestep': timestepname,
        'nwavelengths_high_mapfile': nwavelengths_high_name,
        'nwavelengths_low_mapfile': nwavelengths_low_name
    }
    return result
Ejemplo n.º 39
0
def _add_name(mapname, suffix):
    dmap = DataMap.load(mapname)
    for item in dmap:
        item.file = item.file + suffix

    return dmap
Ejemplo n.º 40
0
def plugin_main(args, **kwargs):
    fileid = kwargs['mapfile_in']
    datamap = DataMap.load(fileid)
    hdf5File = os.path.join(kwargs['hdf5_dir'],kwargs['hdf5file'])
    if kwargs.has_key('instrument'):
        instrument = kwargs['instrument']
    else:
        instrument = '/instrument'
    if kwargs.has_key('compression'):
        compression = int(kwargs['compression'])
    else:
        compression = 5
    if kwargs.has_key('solset'):
        solsetName = kwargs['solset']
    else:
        solsetName = None


    # Check is all the necessary files are available
    antennaFile = os.path.join(datamap[0].file,'ANTENNA')
    if not os.path.isdir(antennaFile):
        logging.critical('Missing ANTENNA table.')
        sys.exit(1)
    fieldFile = os.path.join(datamap[0].file,'FIELD')
    if not os.path.isdir(fieldFile):
        logging.critical('Missing FIELD table.')
        sys.exit(1)
    skydbFile = os.path.join(datamap[0].file,'sky')
    if not os.path.isdir(skydbFile):
        logging.warning('No sky table found. (Direction-dependent parameters will not work.)')
        skydbFile = 'None'

    #generate list of parmDB-filenames
    parmDBnames = [ MS.file+instrument for MS in datamap ]

    ##create and fill the hdf5-file:
    #solset = parmDBs2h5parm(hdf5File, parmDBnames, antennaFile, fieldFile, skydbFile, compression=compression, solsetName=solsetName)
    # call the create_h5parm function from losoto (will put a stupid create message in the h5parm file)
    create_h5parm(parmDBnames, antennaFile, fieldFile, skydbFile, hdf5File, compression, solsetName)

    ## Add CREATE entry to history
    #h5parmDB = h5parm(hdf5File, readonly = False)
    #soltabs = h5parmDB.getSoltabs(solset=solset)
    #for st in soltabs:
    #    sw = solWriter(soltabs[st])
    #    sw.addHistory('CREATE (by PipelineStep_losotoImporter from %s / %s - %s)' % (os.path.abspath(''),
    #                               os.path.basename(parmDBnames[0]), os.path.basename(parmDBnames[-1]) ) )
    #h5parmDB.close()

    #generate mapfile and wrap up
    mapfileentry = {}
    mapfileentry['host'] = 'localhost'
    mapfileentry['file'] = hdf5File
    mapfileentry['skip'] = False
    outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename'])
    outmap = open(outfileid, 'w')
    outmap.write(repr([mapfileentry]))
    outmap.close()
    result = {}
    result['mapfile'] = outfileid
    return result
Ejemplo n.º 41
0
def plugin_main(args, **kwargs):
    """
    Re-groups a simple (flat) mapfile into a multi-mapfile that has the same shape
    as a given multi-mapfile.

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be re-grouped.
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Total number of files needs
        to be the same as in mapfile_in.
    check_basename : Bool (str) , optional
        Check if the basenames (see os.path.basename()) minus extension match
        default = True
    join_groups : int (str), optional
        If it is set, then join so many groups into one new group. (Gives fewer
        groups but more files per group than in mapfile_groups.)
        default = keep same grouping as in mapfile_groups
    join_max_files : int (str), optional
        If it is set, then try to join as many groups together before the number of
        files per group woud exceed "join_max_files". Similar to "join_groups", but
        the number of joind groups is not fixed but depends on the number of files
        per group. Mutaully exclusive with "join_groups"!
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    check_names = True
    if 'check_basename' in kwargs:
        check_names = string2bool(kwargs['check_basename'])

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    map_out = MultiDataMap([])
    inindex = 0
    for group in groupmap:
        grouplist = []
        skip = False
        for fname in group.file:
            if check_names:
                refbase = os.path.splitext(os.path.basename(fname))[0]
                newbase = os.path.splitext(
                    os.path.basename(inmap[inindex].file))[0]
                if refbase != newbase:
                    raise ValueError(
                        'PipelineStep_reGroupMapfile: basenames {0} and {1} differ'
                        .format(refbase, newbase))
            grouplist.append(inmap[inindex].file)
            if inmap[inindex].skip:
                print 'PipelineStep_reGroupMapfile: Skipping full group for file:' + inmap[
                    inindex].file
                skip = True
            inindex += 1
        map_out.data.append(MultiDataProduct(group.host, grouplist, skip))
    assert inindex == len(inmap)

    if 'join_groups' in kwargs:
        if 'join_max_files' in kwargs:
            raise ValueError(
                "PipelineStep_reGroupMapfile: \"join_groups\" and \"join_max_files\" are mutually exclusive!"
            )
        groups_to_join = int(kwargs['join_groups'])
        if groups_to_join > 1:
            newmap = MultiDataMap([])
            for start_idx in xrange(0, len(map_out), groups_to_join):
                end_idx = min((start_idx + groups_to_join), len(map_out))
                grouplist = []
                for group in map_out[start_idx:end_idx]:
                    grouplist.extend(group.file)
                    if group.skip:
                        raise ValueError(
                            "PipelineStep_reGroupMapfile: Found group that should be skipped! "
                            "(I.e. there is probably something wrong with your data!)"
                        )
                newmap.data.append(
                    MultiDataProduct(map_out[start_idx].host, grouplist,
                                     False))
            map_out = newmap
    elif 'join_max_files' in kwargs:
        max_files = int(kwargs['join_max_files'])
        newmap = MultiDataMap([])
        grouplist = map_out[0].file
        grouphost = map_out[0].host
        for gindex in xrange(1, len(map_out)):
            if map_out[gindex].skip:
                raise ValueError(
                    "PipelineStep_reGroupMapfile: Found group that should be skipped! "
                    "(I.e. there is probably something wrong with your data!)")
            if (len(grouplist) + len(map_out[gindex].file)) > max_files:
                newmap.data.append(
                    MultiDataProduct(grouphost, grouplist, False))
                grouplist = map_out[gindex].file
                grouphost = map_out[gindex].host
            else:
                grouplist.extend(map_out[gindex].file)
        # add the final (partial?) group to the map
        newmap.data.append(MultiDataProduct(grouphost, grouplist, False))
        map_out = newmap

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 42
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
Ejemplo n.º 43
0
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None,
         mergeLastGroup=False, truncateLastSBs=True):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional 
        How many files should go into one frequency group. Values <= 0 mean put 
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files 
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.
    mergeLastGroup, truncateLastSBs : bool, optional
        mergeLastGroup = True, truncateLastSBs = True:
          not allowed
        mergeLastGroup = True, truncateLastSBs = False:
          put the files from the last group that doesn't have SBperGroup subbands 
          into the second last group (which will then have more than SBperGroup entries). 
        mergeLastGroup = False, truncateLastSBs = True:
          ignore last files, that don't make for a full group (not all files are used).
        mergeLastGroup = False, truncateLastSBs = False:
          keep inclomplete last group, or - with NDPPPfill=True - fill
          last group with dummies.      

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """
    if not filename or not mapfile_dir:
        raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!')
    if mergeLastGroup and truncateLastSBs:
        raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!')
    if mergeLastGroup:
        raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))  
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!')

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on",len(ms_list),"files"

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # use the slower but more reliable way:
        obstable = pt.table(ms, ack=False)
        timestamp = int(round(np.min(obstable.getcol('TIME'))))
        #obstable = pt.table(ms+'::OBSERVATION', ack=False)
        #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
        obstable.close()
        if timestamp in time_groups:
            time_groups[timestamp]['files'].append(ms)
        else:
            time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] }
    print "sort_times_into_freqGroups: found",len(time_groups),"time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()   # not needed now, but later
    first = True
    nchans = 0
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]            
            if first:
                freq_width = sw.col('TOTAL_BANDWIDTH')[0]
                nchans = sw.col('CHAN_WIDTH')[0].shape[0]
                chwidth = sw.col('CHAN_WIDTH')[0][0]
                maxfreq = freq
                minfreq = freq
                first = False
            else:
                assert freq_width == sw.col('TOTAL_BANDWIDTH')[0]
                assert nchans == sw.col('CHAN_WIDTH')[0].shape[0]
                assert chwidth == sw.col('CHAN_WIDTH')[0][0]
                maxfreq = max(maxfreq,freq)
                minfreq = min(minfreq,freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    maxfreq = maxfreq+freq_width/2.
    minfreq = minfreq-freq_width/2.
    numFiles = round((maxfreq-minfreq)/freq_width)
    if numSB > 0:
        if truncateLastSBs:
            ngroups = int(np.floor(numFiles/numSB))
        else:
            ngroups = int(np.ceil(numFiles/numSB))
    else:
        ngroups = 1
        numSB = int(numFiles)
    hostID = 0
    for time in timestamps:
        (freq,fname) = time_groups[time]['freq_names'].pop(0)
        for fgroup in range(ngroups):
            files = []
            skip_this = True
            for fIdx in range(numSB):
                if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq:
                    if NDPPPfill:
                        files.append('dummy.ms')
                else:
                    files.append(fname)
                    if len(time_groups[time]['freq_names'])>0:
                        (freq,fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq,fname) = (1e12,'This_shouldn\'t_show_up')
                    skip_this = False
            filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this))
            groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup)
            if type(stepname) is str:
                groupname += stepname
            if type(target_path) is str:
                groupname = os.path.join(target_path,os.path.basename(groupname))
            groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this))
        assert freq==1e12

    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename+'_groups')
    groupmap.save(groupmapname)
    # genertate map with edge-channels to flag
    flagmap = _calc_edge_chans(filemapname, nchans)
    flagmapname = os.path.join(mapfile_dir, filename+'_flags')
    flagmap.save(flagmapname)
    result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname}
    return result
Ejemplo n.º 44
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(self.inputs['working_directory'],
                                              self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile)

        number_of_major_cycles = self.parset.getInt(
            "Imaging.number_of_major_cycles")

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                            "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile)  # copy the output map
        for item in source_list_map:
            item.file = ""  # set all to empty string
        source_list_map.save(source_list_map_path)

        for idx_loop in range(number_of_major_cycles):
            # *****************************************************************
            # (2) Create dbs and sky model
            parmdbs_path, sourcedb_map_path = self._create_dbs(
                concat_ms_map_path,
                timeslice_map_path,
                source_list_map_path=source_list_map_path,
                skip_create_dbs=False)

            # *****************************************************************
            # (3)  bbs_imager recipe.
            bbs_output = self._bbs(timeslice_map_path,
                                   parmdbs_path,
                                   sourcedb_map_path,
                                   skip=False)

            # TODO: Extra recipe: concat timeslices using pyrap.concatms
            # (see prepare)

            # *****************************************************************
            # (4) Get parameters awimager from the prepare_parset and inputs
            aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                                                            idx_loop,
                                                            sourcedb_map_path,
                                                            skip=False)

            # *****************************************************************
            # (5) Source finding
            sourcelist_map, found_sourcedb_path = self._source_finding(
                aw_image_mapfile, idx_loop, skip=False)
            # should the output be a sourcedb? instead of a sourcelist

        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map = self._finalize(
            aw_image_mapfile, processed_ms_dir, ms_per_image_map_path,
            sourcelist_map, minbaseline, maxbaseline, target_mapfile,
            output_image_mapfile, found_sourcedb_path)

        # *********************************************************************
        # (7) Get metadata
        # Create a parset containing the metadata for MAC/SAS
        metadata_file = "%s_feedback_SkyImage" % (self.parset_file, )
        self.run_task(
            "get_metadata",
            placed_data_image_map,
            parset_prefix=(full_parset.getString('prefix') +
                           full_parset.fullModuleName('DataProducts')),
            product_type="SkyImage",
            metadata_file=metadata_file)

        self.send_feedback_processing(
            parameterset({'feedback_version': feedback_version}))
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Ejemplo n.º 45
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile with only the MSs at the middle Frequency

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    index: int, optional
        Index of the frequency band to use.

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'include' in kwargs:
        include = kwargs['include']
    else:
        include = None
    fileid = os.path.join(mapfile_dir, filename)

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_out = DataMap()
    map_out.data = []
    map_out._data = []

    # do not re-run if we already ran, and input files are deleted.
    if os.path.exists(fileid) and not os.path.exists(map_in[0].file):
        print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!'
        return {'mapfile': fileid}

    #sort into frequency groups
    freq_groups = {}
    hosts = []
    for item in map_in:
        if include is not None:
            if include in item.file:
                # Get the frequency info
                sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
                freq = int(sw.col('REF_FREQUENCY')[0])
                sw.close()
                if freq in freq_groups:
                    freq_groups[freq].append(item.file)
                else:
                    freq_groups[freq] = [item.file]
                if not item.host in hosts:
                    hosts.append(item.host)
        else:
            # Get the frequency info
            sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False)
            freq = int(sw.col('REF_FREQUENCY')[0])
            sw.close()
            if freq in freq_groups:
                freq_groups[freq].append(item.file)
            else:
                freq_groups[freq] = [item.file]
            if not item.host in hosts:
                hosts.append(item.host)

    # find maximum number of files per frequency-group
    maxfiles = max([len(group) for group in freq_groups.values()])
    # find the center-frequency
    freqs = freq_groups.keys()
    freqs.sort()
    selfreq = freqs[len(freqs) / 2]
    if 'index' in kwargs:
        selfreq = int(kwargs['index'])
    else:
        # make sure that chosen frequncy has maxfiles entries
        while len(freq_groups[selfreq]) < maxfiles:
            freqs.remove(selfreq)
            selfreq = freqs[len(freqs) / 2]
    # extend the hosts-list
    for i in range(len(freq_groups[selfreq]) - len(hosts)):
        hosts.append(hosts[i])
    # fill the output-map
    for (host, fname) in zip(hosts, freq_groups[selfreq]):
        map_out.append(DataProduct(host, fname, False))

    map_out.save(fileid)
    del (map_in)
    del (map_out)
    result = {'mapfile': fileid}

    return result
Ejemplo n.º 46
0
def plugin_main(args, **kwargs):

    infile_map   = kwargs['infile']
    mapfile_dir  = kwargs['mapfile_dir']
    jobname      = kwargs['jobname']
    filename     = kwargs['filename']
    current_loop = str(int(kwargs['counter'])+1)
    data         = DataMap.load(infile_map)	# these are actual MS files
    datalist     = [data[i].file for i in xrange(len(data))]
    
    globaldb_map     = os.path.join(mapfile_dir, filename + '_globaldb')     # this file holds all the globaldbs
    globaldbtec_map  = os.path.join(mapfile_dir, filename + '_globaldbtec')  # this file holds all the globaldbs
    globaldbtec2_map = os.path.join(mapfile_dir, filename + '_globaldbtec2') # this file holds all the globaldbs
    globaldbFR_map   = os.path.join(mapfile_dir, filename + '_globaldbFR')   # this file holds all the globaldbs
    globaldbCD_map   = os.path.join(mapfile_dir, filename + '_globaldbCD')   # this file holds all the globaldbs
    globaldbamp_map  = os.path.join(mapfile_dir, filename + '_globaldbamp')  # this file holds all the globaldbs
    h5parmtec_map    = os.path.join(mapfile_dir, filename + '_h5parmtec')    # this file holds all the h5parms
    h5parmtec2_map   = os.path.join(mapfile_dir, filename + '_h5parmtec2')   # this file holds all the h5parms
    h5parmFR_map     = os.path.join(mapfile_dir, filename + '_h5parmFR')     # this file holds all the h5parms
    h5parmCD_map     = os.path.join(mapfile_dir, filename + '_h5parmCD')     # this file holds all the h5parms
    h5parmamp_map    = os.path.join(mapfile_dir, filename + '_h5parmamp')    # this file holds all the h5parms
    
    map_out_globaldb     = DataMap([])
    map_out_globaldbtec  = DataMap([])
    map_out_globaldbtec2 = DataMap([])
    map_out_globaldbFR   = DataMap([])
    map_out_globaldbCD   = DataMap([])
    map_out_globaldbamp  = DataMap([])
    map_out_h5parmtec    = DataMap([])
    map_out_h5parmtec2   = DataMap([])
    map_out_h5parmFR     = DataMap([])
    map_out_h5parmCD     = DataMap([])
    map_out_h5parmamp    = DataMap([])

    map_out_globaldb.data.append(DataProduct( data[0].host, jobname + '.globaldb_loop' + current_loop, False))
    map_out_globaldbtec.data.append(DataProduct( data[0].host, jobname + '.globaldbtec_loop' + current_loop, False))
    map_out_globaldbtec2.data.append(DataProduct( data[0].host, jobname + '.globaldbtec2_loop' + current_loop, False))
    map_out_globaldbFR.data.append(DataProduct( data[0].host, jobname + '.globaldbFR_loop' + current_loop, False))
    map_out_globaldbCD.data.append(DataProduct( data[0].host, jobname + '.globaldbCD_loop' + current_loop, False))
    map_out_globaldbamp.data.append(DataProduct( data[0].host, jobname + '.globaldbamp_loop' + current_loop, False))
    map_out_h5parmtec.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmtec', False)) 
    map_out_h5parmtec2.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmtec2', False)) 
    map_out_h5parmFR.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmFR', False)) 
    map_out_h5parmCD.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmCD', False))
    map_out_h5parmamp.data.append(DataProduct( data[0].host, jobname + '_loop' + current_loop + '.h5parmamp', False))
    
    globaldbFR_folder  = jobname + '.globaldbFR_loop'  + current_loop
    globaldbCD_folder  = jobname + '.globaldbCD_loop'  + current_loop
    globaldbamp_folder = jobname + '.globaldbamp_loop' + current_loop
    
    image_high1        = jobname + '_image_high1_loop'        + current_loop
    image_high2        = jobname + '_image_high2_loop'        + current_loop
    image_mask         = jobname + '_mask_high1_loop'           + current_loop
    filter_model       = jobname + '_filter_model_loop'         + current_loop
    sourcedb_target    = jobname + '-make_sourcedb_target_loop' + current_loop
    
    image_high1_pattern = image_high1 + '-MFS-image.fits'
    image_high2_sources = image_high2 + '-sources.txt'
        
    map_out_globaldb.save(globaldb_map)
    map_out_globaldbtec.save(globaldbtec_map)
    map_out_globaldbtec2.save(globaldbtec2_map)
    map_out_globaldbFR.save(globaldbFR_map)
    map_out_globaldbCD.save(globaldbCD_map)
    map_out_globaldbamp.save(globaldbamp_map)
    map_out_h5parmtec.save(h5parmtec_map)
    map_out_h5parmtec2.save(h5parmtec2_map)
    map_out_h5parmFR.save(h5parmFR_map)
    map_out_h5parmCD.save(h5parmCD_map)
    map_out_h5parmamp.save(h5parmamp_map)

    result = {'globaldb':globaldb_map, 'globaldbtec':globaldbtec_map, 'globaldbtec2':globaldbtec2_map, 'globaldbFR':globaldbFR_map, 'globaldbCD':globaldbCD_map, 'globaldbamp':globaldbamp_map, 'h5parmtec':h5parmtec_map, 'h5parmtec2':h5parmtec2_map, 'h5parmFR':h5parmFR_map, 'h5parmCD':h5parmCD_map, 'h5parmamp':h5parmamp_map, 'plotstec': 'plots-tec' + current_loop, 'plotstec2': 'plots-tec2' + current_loop, 'plotsFR': 'plots-fr' + current_loop, 'plotsCD': 'plots-cd' + current_loop, 'plotsamp': 'plots-amp' + current_loop, 'globaldbFR_folder': globaldbFR_folder, 'globaldbCD_folder': globaldbCD_folder, 'globaldbamp_folder': globaldbamp_folder, 'image_high1': image_high1, 'image_high1_pattern': image_high1_pattern, 'image_mask': image_mask, 'image_high2': image_high2, 'image_high2_sources': image_high2_sources, 'filter_model': filter_model, 'sourcedb_target': sourcedb_target}
    return result
    pass
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5,
         image_padding=1.,
         y_axis_stretch=1.,
         calc_y_axis_stretch=False,
         apply_y_axis_stretch_highres=True,
         apply_y_axis_stretch_lowres=True):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.
    image_padding : float, optional
        How much padding shall we add to the padded image sizes.
    y_axis_stretch : float, optional
        How much shall the y-axis be stretched or compressed.
    calc_y_axis_stretch : bool, optional
        Adjust the image sizes returned by this script for the mean elevation.
        If True, the value of y_axis_stretch above is ignored
    apply_y_axis_stretch_highres : bool, optional
        Apply the y-axis stretch to the high-res image sizes
    apply_y_axis_stretch_lowres : bool, optional
        Apply the y-axis stretch to the low-res image sizes

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    if not outmapname or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        )
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    cellsize_highres_deg = float(cellsize_highres_deg)
    cellsize_lowres_deg = float(cellsize_lowres_deg)
    fieldsize_highres = float(fieldsize_highres)
    fieldsize_lowres = float(fieldsize_lowres)
    image_padding = float(image_padding)
    y_axis_stretch = float(y_axis_stretch)
    calc_y_axis_stretch = input2bool(calc_y_axis_stretch)
    apply_y_axis_stretch_highres = input2bool(apply_y_axis_stretch_highres)
    apply_y_axis_stretch_lowres = input2bool(apply_y_axis_stretch_lowres)

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    print "InitSubtract_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    high_paddedsize_map = DataMap([])
    low_paddedsize_map = DataMap([])
    numfiles = 0
    for i, band in enumerate(bands):
        print "InitSubtract_sort_and_compute.py: Working on Band:", band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res, imsize_low_res) = band.get_image_sizes(
            cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres,
            fieldsize_lowres)

        # Calculate y_axis_stretch if desired
        if calc_y_axis_stretch:
            if i == 0:
                y_axis_stretch = 1.0 / np.sin(band.mean_el_rad)
                print "InitSubtract_sort_and_compute.py: Using y-axis stretch of:", y_axis_stretch

        # Adjust sizes so that we get the correct ones below
        if apply_y_axis_stretch_highres:
            imsize_high_res /= y_axis_stretch
        if apply_y_axis_stretch_lowres:
            imsize_low_res /= y_axis_stretch

        imsize_high_res = band.get_optimum_size(int(imsize_high_res))
        imsize_high_res_stretch = band.get_optimum_size(
            int(imsize_high_res * y_axis_stretch))
        high_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_res) + " " + str(imsize_high_res_stretch),
                False))

        imsize_low_res = band.get_optimum_size(int(imsize_low_res))
        imsize_low_res_stretch = band.get_optimum_size(
            int(imsize_low_res * y_axis_stretch))
        low_size_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_res) + " " + str(imsize_low_res_stretch),
                False))

        imsize_high_pad = band.get_optimum_size(
            int(imsize_high_res * image_padding))
        imsize_high_pad_stretch = band.get_optimum_size(
            int(imsize_high_res * image_padding * y_axis_stretch))
        high_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_high_pad) + " " + str(imsize_high_pad_stretch),
                False))

        imsize_low_pad = band.get_optimum_size(
            int(imsize_low_res * image_padding))
        imsize_low_pad_stretch = band.get_optimum_size(
            int(imsize_low_res * image_padding * y_axis_stretch))
        low_paddedsize_map.append(
            DataProduct(
                'localhost',
                str(imsize_low_pad) + " " + str(imsize_low_pad_stretch),
                False))

    print "InitSubtract_sort_and_compute.py: Computing averaging steps."
    (freqstep, timestep) = bands[0].get_averaging_steps()
    (nwavelengths_high,
     nwavelengths_low) = bands[0].nwavelengths(cellsize_highres_deg,
                                               cellsize_lowres_deg, timestep)

    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    nwavelengths_high_map = DataMap([])
    nwavelengths_low_map = DataMap([])
    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))
        nwavelengths_high_map.append(
            DataProduct('localhost', str(nwavelengths_high), False))
        nwavelengths_low_map.append(
            DataProduct('localhost', str(nwavelengths_low), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)
    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    high_padsize_name = os.path.join(mapfile_dir,
                                     outmapname + '_high_padded_size')
    high_paddedsize_map.save(high_padsize_name)
    low_padsize_name = os.path.join(mapfile_dir,
                                    outmapname + '_low_padded_size')
    low_paddedsize_map.save(low_padsize_name)
    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    nwavelengths_high_name = os.path.join(mapfile_dir,
                                          outmapname + '_nwavelengths_high')
    nwavelengths_high_map.save(nwavelengths_high_name)
    nwavelengths_low_name = os.path.join(mapfile_dir,
                                         outmapname + '_nwavelengths_low')
    nwavelengths_low_map.save(nwavelengths_low_name)

    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'high_padsize_mapfile': high_padsize_name,
        'low_padsize_mapfile': low_padsize_name,
        'freqstep': freqstepname,
        'timestep': timestepname,
        'nwavelengths_high_mapfile': nwavelengths_high_name,
        'nwavelengths_low_mapfile': nwavelengths_low_name
    }
    return result
Ejemplo n.º 48
0
    def run(self, awimager_output, ms_per_image, sourcelist, target,
            output_image, minbaseline, maxbaseline, processed_ms_dir,
            fillrootimagegroup_exec, environment, sourcedb, concat_ms,
            correlated_output_location, msselect_executable):
        self.environment.update(environment)
        """
        :param awimager_output: Path to the casa image produced by awimager 
        :param ms_per_image: The X (90) measurements set scheduled to 
            create the image
        :param sourcelist: list of sources found in the image 
        :param target: <unused>
        :param minbaseline: Minimum baseline used for the image 
        :param maxbaseline: largest/maximum baseline used for the image
        :param processed_ms_dir: The X (90) measurements set actually used to 
            create the image
        :param fillrootimagegroup_exec: Executable used to add image data to
            the hdf5 image  
                 
        :rtype: self.outputs['hdf5'] set to "succes" to signal node succes
        :rtype: self.outputs['image'] path to the produced hdf5 image
        """
        with log_time(self.logger):
            ms_per_image_map = DataMap.load(ms_per_image)

            # *****************************************************************
            # 1. add image info
            # Get all the files in the processed measurement dir
            file_list = os.listdir(processed_ms_dir)

            processed_ms_paths = []
            ms_per_image_map.iterator = DataMap.SkipIterator
            for item in ms_per_image_map:
                ms_path = item.file
                processed_ms_paths.append(ms_path)

            #add the information the image
            try:
                self.logger.debug("Start addImage Info")
                addimg.addImagingInfo(awimager_output, processed_ms_paths,
                                      sourcedb, minbaseline, maxbaseline)

            except Exception, error:
                self.logger.warn("addImagingInfo Threw Exception:")
                self.logger.warn(error)
                # Catch raising of already done error: allows for rerunning
                # of the recipe
                if "addImagingInfo already done" in str(error):
                    self.logger.warn("addImagingInfo already done, continue")
                    pass
                else:
                    raise Exception(error)
                #The majority of the tables is updated correctly

            # ***************************************************************
            # 2. convert to hdf5 image format
            output_directory = None
            pim_image = pim.image(awimager_output)
            try:
                self.logger.info(
                    "Saving image in HDF5 Format to: {0}".format(output_image))
                # Create the output directory
                output_directory = os.path.dirname(output_image)
                create_directory(output_directory)
                # save the image
                pim_image.saveas(output_image, hdf5=True)

            except Exception, error:
                self.logger.error(
                    "Exception raised inside pyrap.images: {0}".format(
                        str(error)))
                raise error
Ejemplo n.º 49
0
def plugin_main( args, **kwargs ):

    mapfile_in = kwargs['mapfile_in']
    lotss_radius = kwargs['lotss_radius']
    lbcs_radius  = kwargs['lbcs_radius']
    im_radius = float(kwargs['im_radius'])
    image_limit_Jy = float(kwargs['image_limit_Jy'])
    bright_limit_Jy = float(kwargs['bright_limit_Jy'])
    lotss_result_file = kwargs['lotss_result_file']
    lotss_catalogue = kwargs['lotss_catalogue']
    lbcs_catalogue = kwargs['lbcs_catalogue']
    delay_cals_file = kwargs['delay_cals_file']
    match_tolerance = float(kwargs['match_tolerance'])
    fail_lotss_ok = kwargs['continue_no_lotss'].lower().capitalize()

    mslist = DataMap.load(mapfile_in)
    MSname = mslist[0].file
    # For testing
    #MSname = kwargs['MSname']
 
    ## first check for a valid delay_calibrator file
    if os.path.isfile(delay_cals_file):
	print( 'Delay calibrators file {:s} exists! returning.'.format(delay_cals_file) )
        return

    ## look for or download LBCS
    print("Attempting to find or download LBCS catalogue.")
    lbcs_catalogue = my_lbcs_catalogue( MSname, Radius=lbcs_radius, outfile=lbcs_catalogue )
    ## look for or download LoTSS
    print("Attempting to find or download LoTSS catalogue.")
    lotss_catalogue = my_lotss_catalogue( MSname, Radius=lotss_radius, bright_limit_Jy=bright_limit_Jy, outfile=lotss_catalogue )

    ## if lbcs exists, and either lotss exists or continue_without_lotss = True, process the catalogue(s).
    ## else provide an error message and stop
    if len(lbcs_catalogue) == 0:
	logging.error('LBCS coverage does not exist, and catalogue not found on disk.')
	return
    if len(lotss_catalogue) == 0 and not fail_lotss_ok:
	logging.error('LoTSS coverage does not exist, and contine_without_lotss is set to False.')
	return 

    ## if the LoTSS catalogue is empty, write out the delay cals only and stop
    if len(lotss_catalogue) == 0:
        print('Target field not in LoTSS coverage yet! Only writing {:s} based on LBCS'.format(delay_cals_file))

        ## Add the radius from phase centre to the catalogue
        RATar, DECTar = grab_coo_MS(input2strlist_nomapfile(MSname)[0])
        ptg_coords = SkyCoord( RATar, DECTar, frame='icrs', unit='deg' )

        src_coords = SkyCoord( lbcs_catalogue['RA'], lbcs_catalogue['DEC'], frame='icrs', unit='deg' )
        separations = src_coords.separation(ptg_coords )
        seps = Column( separations.deg, name='Radius' )
        lbcs_catalogue.add_column( seps )

        ## rename the source_id column
        lbcs_catalogue.rename_column('Observation','Source_id')

        ## add in some dummy data
        Total_flux = Column( np.ones(len(lbcs_catalogue)), name='Total_flux' )
        lbcs_catalogue.add_column( Total_flux )
        LGZ_Size = Column( np.ones( len(lbcs_catalogue) )*20., name='LGZ_Size' ) ## set to a default of 20 arcsec
        lbcs_catalogue.add_column( LGZ_Size )

        ## order based on radius from the phase centre
        lbcs_catalogue.sort('Radius')

        ## write the catalogue
        lbcs_catalogue.write(delay_cals_file, format='csv')
        return

    ## else continue 
    result = find_close_objs( lotss_catalogue, lbcs_catalogue, tolerance=match_tolerance )

    ## check if there are any matches
    if len(result) == 0:
        logging.error('LoTSS and LBCS coverage exists, but no matches found. This indicates something went wrong, please check your catalogues.')
        return
    else:
	# add radius to the catalogue
        RATar, DECTar = grab_coo_MS(input2strlist_nomapfile(MSname)[0])
        ptg_coords = SkyCoord( RATar, DECTar, frame='icrs', unit='deg' )

	src_coords = SkyCoord( result['RA'], result['DEC'], frame='icrs', unit='deg' )
	separations = src_coords.separation(ptg_coords )
        seps = Column( separations.deg, name='Radius' )
        result.add_column( seps )

        ## order by radius from the phase centre
        result.sort( 'Radius' )

        ## Write catalogues
        ## 1 - delay calibrators -- from lbcs_catalogue
        result.write( delay_cals_file, format='csv' )
	print('Writing delay calibrator candidate file {:s}'.format(delay_cals_file))

        ## sources to image -- first remove things that are already in the delay_cals_file
	good_index = [ x for x, src_id in enumerate( lotss_catalogue['Source_id'] ) if src_id not in result['Source_id'] ]
	
	tmp_cat = lotss_catalogue[good_index]

	## make a flux cut
	image_index = np.where( tmp_cat['Peak_flux'] >= image_limit_Jy*1e3 )[0]
	flux_cut_sources = tmp_cat[image_index]

        ## make a radius cut
        src_coords = SkyCoord( flux_cut_sources['RA'], flux_cut_sources['DEC'], frame='icrs', unit='deg' )
        separations = src_coords.separation( ptg_coords )
        seps = Column( separations.deg, name='Radius' )
        flux_cut_sources.add_column( seps )
        good_idx = np.where( flux_cut_sources['Radius'] <= im_radius )[0]
        sources_to_image = flux_cut_sources[good_idx]

        nsrcs = float( len( sources_to_image ) )
	print "There are "+str(nsrcs)+" sources above "+str(image_limit_Jy)+" mJy within "+str(im_radius)+" degrees of the phase centre."
	sources_to_image.write( lotss_result_file, format='csv' )

    return
Ejemplo n.º 50
0
    def finalize(self):
        """
        Finalize this operation
        """

        # Add output datamaps to direction object for later use
        self.direction.input_files_single_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'input_files_single.mapfile')
        self.direction.shifted_model_data_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'corrupt_final_model.mapfile')
        self.direction.diff_models_field_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'shift_diff_model_to_field.mapfile')
        self.direction.dir_indep_parmdbs_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'dir_indep_instrument_parmdbs.mapfile')
        self.direction.dir_indep_skymodels_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'full_skymodels.mapfile')
        self.direction.dir_indep_facet_skymodels_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'make_facet_skymodels_all.mapfile')
        self.direction.dir_dep_parmdb_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'merge_selfcal_parmdbs.mapfile')
        self.direction.selfcal_plots_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'make_selfcal_plots.mapfile')
        self.direction.facet_image_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'final_image.mapfile')
        self.direction.facet_model_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'final_model_rootnames.mapfile')
        self.direction.facet_premask_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'premask.mapfile')
        self.direction.wsclean_modelimg_size_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'pad_model_images.padsize.mapfile')
        self.direction.verify_subtract_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile')

        # Store results of verify_subtract check. This will work if the verification
        # was done using multiple bands although we use only one at the moment
        if os.path.exists(self.direction.verify_subtract_mapfile
                          ) and not self.parset['skip_selfcal_check']:
            ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile)
            ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile]
            if all(ok_flags):
                self.direction.selfcal_ok = True
            else:
                self.direction.selfcal_ok = False
        elif self.parset['skip_selfcal_check']:
            self.direction.selfcal_ok = True
        else:
            self.direction.selfcal_ok = False

        # Delete all data used only for selfcal as they're no longer needed.
        # Note: we keep the data if selfcal failed verification, so that the user
        # can check them for problems
        self.direction.cleanup_mapfiles = [
            os.path.join(self.pipeline_mapfile_dir,
                         'make_sourcedb_all_facet_sources.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'make_sourcedb_cal_facet_sources.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'concat_averaged_input.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'average_pre_compressed.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'average_post_compressed.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'corrupt_final_model.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'predict_all_model_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'shift_cal.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat_corr.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'concat_blavg_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat0_input.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat1_input.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat2_input.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat3_input.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat4_input.mapfile')
        ]
        if not self.parset[
                'keep_avg_facet_data'] and self.direction.name != 'target':
            # Add averaged calibrated data for the facet to files to be deleted.
            # These are only needed if the user wants to reimage by hand (e.g.,
            # with a different weighting). They are always kept for the target
            self.direction.cleanup_mapfiles.append(
                os.path.join(self.pipeline_mapfile_dir,
                             'concat_averaged_compressed.mapfile'))
        if not self.parset['keep_unavg_facet_data']:
            # Add unaveraged calibrated data for the facet to files to be deleted.
            # These are only needed if the user wants to phase shift them to
            # another direction (e.g., to combine several facets together before
            # imaging them all at once)
            self.direction.cleanup_mapfiles.append(
                os.path.join(self.pipeline_mapfile_dir, 'shift_empty.mapfile'))
        if self.direction.selfcal_ok or not self.parset[
                'exit_on_selfcal_failure']:
            self.log.debug('Cleaning up files (direction: {})'.format(
                self.direction.name))
            self.direction.cleanup()
Ejemplo n.º 51
0
def plugin_main(args, **kwargs):
    """
    Takes in list of targets and returns the appropriate one in a mapfile
    Knows which is the current target by storing target ID in a mapfile
    Outputs an expanded list of the current target

    Parameters
    ----------
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    target_list: str
		List of all targets
	target_id: str
		Current 

    Returns
    -------
    result : dict
        Output datamap filename

    """
    infile_map  = kwargs['infile']
    mapfile_dir = kwargs['mapfile_dir']
    filename    = kwargs['filename']
    outdir      = kwargs['wd']
    tick        = int(kwargs['counter'])
    manual      = kwargs['manual']
    manual      = manual.lower() in ['true','t','1']
    data        = DataMap.load(infile_map)	# these are actual MS files
    datalist    = [data[i].file for i in xrange(len(data))]
    
    if manual:
        target_file = kwargs['target_file']
        pass
    try:
        nP = int(kwargs['nP'])	# if the user has defined a different value, use it
        pass
    except:
        nP = 3
        pass
    try:
        radius = float(kwargs['radius'])	# if the user has defined a different value, use it
        pass
    except:
        radius = 2.5
        pass

    ## if tick = 0, need to do the work to make directions files etc., otherwise just update the ticker
   
    fileid    = os.path.join(mapfile_dir, filename)	           # this file holds all the output measurement sets
    bigfileid = os.path.join(mapfile_dir, filename + '_bigfield')  # this big file holds all the directions
    
    if tick == 0:
        map_out_big = DataMap([])

        if manual:
            with open(target_file, 'r') as f:			# if the user has provided a list of targets, use it: otherwise use Lobos to find good targets
                for line in f:
		   if 'RA' not in line:
                      coords = (line.rstrip('\n')).split(',')
                      map_out_big.data.append(DataProduct( '[\"'+coords[0]+'\",\"'+coords[1]+'\"]' , coords[2], False ))

        else:
            infile = ((DataMap.load(infile_map))[0]).file	# get the actual filename from the map provided
            table = pyrap.tables.table(infile + '/FIELD', readonly = True)
            ra    = math.degrees(float(table.getcol('PHASE_DIR')[0][0][0] ) % (2 * math.pi))
            dec   = math.degrees(float(table.getcol('PHASE_DIR')[0][0][-1]))
            table.close()
            hexcoords = SkyCoord(ra, dec, unit = 'degree', frame='fk5')
            hexcoords = hexcoords.to_string('hmsdms', sep=':')
            hexra = [hexcoords.split(' ')[0] for i in hexcoords]
            hexdec = [hexcoords.split(' ')[1] for i in hexcoords]

            if not os.path.isfile (outdir+'/lobos_stats.sum'):
                os.system ('wget http://www.jb.man.ac.uk/~njj/lobos_stats.sum -P '+outdir)
            lobos = np.loadtxt(outdir+'/lobos_stats.sum',dtype='S')
            for l in lobos:
                newcoords = SkyCoord(l[1],l[2], unit=(u.hourangle, u.deg),frame='fk5')
                new = np.array([newcoords.ra.degree,newcoords.dec.degree])
                try:
                    lobos_coord = np.vstack((lobos_coord,new))
                except:
                    lobos_coord = np.copy(new)
            a = correlate(np.array([[ra,dec]]),0,1,lobos_coord,0,1,radius)
            for i in np.asarray(a[:,1],dtype='int'):
                if lobos[i][5].count('P')>=nP:
                    namera = lobos[i,1].replace(':','').split('.')[0]
                    namedec = lobos[i,2].replace(':','').split('.')[0]
                    dpppra = lobos[i,1].replace(':','h',1).replace(':','m',1)+'s' ## phase-shift RA
                    dpppdec = lobos[i,2].replace(':','d',1).replace(':','m',1)+'s'## phase-shift DEC
                    hemisphere = '-' if '-' in hexdec else '+'
                    outfile = namera+hemisphere+namedec             ## outfilename
                    map_out_big.data.append(DataProduct('[\"'+dpppra+'\",\"'+dpppdec+'\"]', outfile, False ))
                    
        map_out_big.save(bigfileid)	        # save all directions
        current_coords = map_out_big[0].host	# save current direction
        current_name = map_out_big[0].file      # save current filename
        n = len(map_out_big)
    else:
        data_big = DataMap.load(bigfileid)	# load all directions
        current_coords = data_big[tick].host	# save current direction
        current_name = data_big[tick].file
	n = len(data_big) 			# current progress

    map_out = DataMap([])
    
    for msID, ms_file in enumerate(datalist): 
	map_out.data.append(DataProduct( data[msID].host, '/'.join(data[msID].file.split('/')[:-1]) + '/' + current_name + '_' + data[msID].file.split('/')[-1], data[msID].skip)) 
	pass
      
    map_out.save(fileid)			# save all output measurement sets
    
    if (tick + 1) == n:     			# check how far the progress is
        do_break = True
    else:
        do_break = False
    #result = {'targetlist':bigfileid,'cords':current_coords,'cdir':current_name,'cdir_pattern':'*'+current_name+'*','ndir':int(n),'break':do_break}
    result = {'targetlist':bigfileid,'cords':current_coords,'ndir':int(n),'break':do_break,'mapfile':fileid}
    return result
Ejemplo n.º 52
0
    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(imager_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data

        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])

        # TODO: DataMap extention
        #        #Check if the input has equal length and on the same nodes
        #        if not validate_data_maps(ms_map, parmdb_map):
        #            self.logger.error("The combination of mapfiles failed validation:")
        #            self.logger.error("ms_map: \n{0}".format(ms_map))
        #            self.logger.error("parmdb_map: \n{0}".format(parmdb_map))
        #            return 1

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        map_dir = os.path.join(self.config.get("layout", "job_directory"),
                               "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y in zip(ms_map, parmdb_map, sourcedb_map):
            w.skip = x.skip = y.skip = (w.skip or x.skip or y.skip)

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            DataMap.SkipIterator
        for (idx,
             (ms, parmdb,
              sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)):
            #host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(
                map_dir, "%s-%s_map_%s.map" % (host, idx, run_id))
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id))
            MultiDataMap([tuple([host, parmdb.file,
                                 False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(
                map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id))
            MultiDataMap([tuple([host, [sourcedb.file],
                                 False])]).save(sourcedb_list_path)

            arguments = [
                self.inputs['bbs_executable'], self.inputs['parset'],
                ms_list_path, parmdb_list_path, sourcedb_list_path
            ]
            jobs.append(
                ComputeJob(host,
                           node_command,
                           arguments,
                           resources={"cores": self.inputs['nthreads']}))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():  #if one of the nodes failed
            self.logger.error("One of the nodes failed while performing"
                              "a BBS run. Aborting: concat.ms corruption")
            return 1

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Ejemplo n.º 53
0
def _add_name(inmap, suffix):
    dmap = DataMap.load(inmap)
    for item in dmap:
        item.file += suffix
    return dmap
Ejemplo n.º 54
0
    #parser.add_argument('-v','--verbose',help='More detailed information',action='store_true')
    #parser.add_argument('-f','--faillog',help='Name of a file which will contain a list of failed commands from the list.',default=None)
    #parser.add_argument('-N','--NumberOfTasks',help='Number of concurrent commands.',type=int,default=0)
    #parser.add_argument('-l','--Logs',help='Individual log files for each process.',action='store_true')
    #parser.add_argument('-R','--retry',help='Number of times failed commands should be retried after all commands ran through',type=int,default=-1)
    #parser.add_argument('-L','--low',help='Low index of the commandlist. Start from here.',type=int,default=0)
    #parser.add_argument('-H','--high',help='High index of the commandlist. End execution at this index',type=int,default=None)
    args = parser.parse_args()

    mm = MapfileManager()
    #print 'MAP: ', mm.map
    #mm.expand(args.number)
    #mm.from_parts(ntimes=args.number)
    mm.from_parts(data=['d1', 'd2', 'd3'], ntimes=args.number)
    dp = DataProduct('i am', 'last', False)
    dmtest = DataMap([dp])
    mm.insert(2, {'host': 'i am', 'file': 'number two', 'skip': False})
    mm.append(dp)
    print 'MAP: ', mm.data
    mm.save(args.name)
    dm = DataMap.load(args.name)
    print 'LOADED: ', dm
    md = MultiDataProduct('localhost', dm, False)
    md2 = MultiDataProduct('foreignhost', dm, False)
    print 'MULTIprod', md
    mm.append(md)
    print 'BLA: ', mm.data
    mdm = MultiDataMap([md])
    print 'MULTIMAP: ', mdm
    mdm.split_list(1)
    print 'MULTIMAP SPLIT: ', mdm
Ejemplo n.º 55
0
    def go(self):
        super(get_metadata, self).go()
        # ********************************************************************
        # 1. Parse and validate inputs
        args = self.inputs['args']
        product_type = self.inputs['product_type']
        global_prefix = self.inputs['parset_prefix']
        # Add a trailing dot (.) if not present in the prefix.
        if global_prefix and not global_prefix.endswith('.'):
            global_prefix += '.'

        if not product_type in self.valid_product_types:
            self.logger.warn(
                "Unknown product type: %s\n\tValid product types are: %s" %
                (product_type, ', '.join(self.valid_product_types))
        )

        # ********************************************************************
        # 2. Load mapfiles
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # ********************************************************************
        # 3. call node side of the recipe
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        data.iterator = DataMap.SkipIterator
        jobs = []
        for inp in data:
            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        self.inputs['product_type']
                    ]
                )
            )
        self._schedule_jobs(jobs)
        for job, inp in zip(jobs, data):
            if job.results['returncode'] != 0:
                inp.skip = True

        # ********************************************************************
        # 4. validate performance
        # 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        self.logger.debug("Updating data map file: %s" % args[0])
        data.save(args[0])

        # ********************************************************************
        # 5. Create the parset-file and return it to the caller
        parset = parameterset()
        prefix = "Output_%s_" % product_type  #Underscore is needed because
                             # Mom / LTA cannot differentiate input and output
        parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs)))

        prefix = global_prefix + prefix
        for idx, job in enumerate(jobs):
            self.logger.debug("job[%d].results = %s" % (idx, job.results))

            # the Master/node communication adds a monitor_stats entry,
            # this must be remove manually here 
            meta_data_parset = metadata.to_parset(job.results)
            try:
                meta_data_parset.remove("monitor_stats")
            except:
                pass

            parset.adoptCollection(meta_data_parset,
                                   '%s[%d].' % (prefix, idx))

        # Return result to caller
        parset.writeFile(self.inputs["metadata_file"])
        return 0
Ejemplo n.º 56
0
class imaging_pipeline(control):
    """
    The imaging pipeline is used to generate images and find
    sources in the generated images. Generated images and lists of found
    sources are complemented with meta data and thus ready for consumption by
    the Long Term Storage (LTA)
    
    This pipeline difference from the MSSS imaging pipeline on two aspects:
    1. It does not by default perform any automated parameter determination for,
    the awimager.
    2. It does not output images and sourcelist to the image server.

    *subband groups*
    The imager_pipeline is able to generate images on the frequency range of
    LOFAR in parallel. Combining the frequency subbands together in so called
    subbandgroups. Each subband group will result in an image and sourcelist,
    (typically 8, because ten subband groups are combined).

    *Time Slices*
    Images are compiled from a number of so-called (time) slices. Each
    slice comprises a short (approx. 10 min) observation of a field (an area on
    the sky) containing typically 80 subbands. The number of slices will be
    different for LBA observations (typically 9) and HBA observations
    (typically 2), due to differences in sensitivity.

    Each image will be compiled on a different cluster node to balance the
    processing load. The input- and output- files and locations are determined
    by the scheduler and specified in the parset-file.

    **This pipeline performs the following operations:**

    1. Prepare Phase. Copy the preprocessed MS's from the different compute
       nodes to the nodes where the images will be compiled (the prepare phase)
       Combine the subbands in subband groups, concattenate the timeslice in a
       single large measurement set and perform flagging, RFI and bad station
       exclusion.
    2. Create db. Generate a local sky model (LSM) from the global sky model
       (GSM) for the sources that are in the field-of-view (FoV). The LSM
       is stored as sourcedb.
       In step 3 calibration of the measurement sets is performed on these
       sources and in step 4 to create a mask for the awimager. The calibration
       solution will be placed in an instrument table/db also created in this
       step.
    3. BBS. Calibrate the measurement set with the sourcedb from the gsm.
       In later iterations sourced found in the created images will be added
       to this list. Resulting in a selfcalibration cycle.
    4. Awimager. The combined  measurement sets are now imaged. The imaging
       is performed using a mask: The sources in the sourcedb are used to
       create an casa image masking known sources. Together with the
       measurement set an image is created.
    5. Sourcefinding. The images created in step 4 are fed to pyBDSM to find
       and describe sources. In multiple itterations substracting the found
       sources, all sources are collectedin a sourcelist.
       Step I. The sources found in step 5 are fed back into step 2.
       This allows the Measurement sets to be calibrated with sources currently
       found in the image. This loop will continue until convergence (3 times
       for the time being).
    6. Finalize. Meta data with regards to the input, computations performed
       and results are collected an added to the casa image. The images created
       are converted from casa to HDF5 and copied to the correct output
       location.
    7. Export meta data: meta data is generated ready for
       consumption by the LTA and/or the LOFAR framework.


    **Per subband-group, the following output products will be delivered:**

    a. An image
    b. A source list
    c. (Calibration solutions and corrected visibilities)

    """
    def __init__(self):
        """
        Initialize member variables and call superclass init function
        """
        control.__init__(self)
        self.input_data = DataMap()
        self.target_data = DataMap()
        self.output_data = DataMap()
        self.scratch_directory = None
        self.parset_dir = None
        self.mapfile_dir = None

    @mail_log_on_exception
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(self.inputs['working_directory'],
                                              self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile)

        number_of_major_cycles = self.parset.getInt(
            "Imaging.number_of_major_cycles")

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                            "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile)  # copy the output map
        for item in source_list_map:
            item.file = ""  # set all to empty string
        source_list_map.save(source_list_map_path)

        for idx_loop in range(number_of_major_cycles):
            # *****************************************************************
            # (2) Create dbs and sky model
            parmdbs_path, sourcedb_map_path = self._create_dbs(
                concat_ms_map_path,
                timeslice_map_path,
                source_list_map_path=source_list_map_path,
                skip_create_dbs=False)

            # *****************************************************************
            # (3)  bbs_imager recipe.
            bbs_output = self._bbs(timeslice_map_path,
                                   parmdbs_path,
                                   sourcedb_map_path,
                                   skip=False)

            # TODO: Extra recipe: concat timeslices using pyrap.concatms
            # (see prepare)

            # *****************************************************************
            # (4) Get parameters awimager from the prepare_parset and inputs
            aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                                                            idx_loop,
                                                            sourcedb_map_path,
                                                            skip=False)

            # *****************************************************************
            # (5) Source finding
            sourcelist_map, found_sourcedb_path = self._source_finding(
                aw_image_mapfile, idx_loop, skip=False)
            # should the output be a sourcedb? instead of a sourcelist

        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map = self._finalize(
            aw_image_mapfile, processed_ms_dir, ms_per_image_map_path,
            sourcelist_map, minbaseline, maxbaseline, target_mapfile,
            output_image_mapfile, found_sourcedb_path)

        # *********************************************************************
        # (7) Get metadata
        # Create a parset containing the metadata for MAC/SAS
        metadata_file = "%s_feedback_SkyImage" % (self.parset_file, )
        self.run_task(
            "get_metadata",
            placed_data_image_map,
            parset_prefix=(full_parset.getString('prefix') +
                           full_parset.fullModuleName('DataProducts')),
            product_type="SkyImage",
            metadata_file=metadata_file)

        self.send_feedback_processing(
            parameterset({'feedback_version': feedback_version}))
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0

    def _get_io_product_specs(self):
        """
        Get input- and output-data product specifications from the
        parset-file, and do some sanity checks.
        """
        dps = self.parset.makeSubset(
            self.parset.fullModuleName('DataProducts') + '.')
        # convert input dataproducts from parset value to DataMap
        self.input_data = DataMap([
            tuple(os.path.join(location, filename).split(':')) + (skip, )
            for location, filename, skip in zip(
                dps.getStringVector('Input_Correlated.locations'),
                dps.getStringVector('Input_Correlated.filenames'),
                dps.getBoolVector('Input_Correlated.skip'))
        ])
        self.logger.debug("%d Input_Correlated data products specified" %
                          len(self.input_data))

        self.output_data = DataMap([
            tuple(os.path.join(location, filename).split(':')) + (skip, )
            for location, filename, skip in zip(
                dps.getStringVector('Output_SkyImage.locations'),
                dps.getStringVector('Output_SkyImage.filenames'),
                dps.getBoolVector('Output_SkyImage.skip'))
        ])
        self.logger.debug("%d Output_SkyImage data products specified" %
                          len(self.output_data))

        # # Sanity checks on input- and output data product specifications
        # if not validate_data_maps(self.input_data, self.output_data):
        #    raise PipelineException(
        #        "Validation of input/output data product specification failed!"
        #    )#Turned off untill DataMap is extended..

        # Target data is basically scratch data, consisting of one concatenated
        # MS per image. It must be stored on the same host as the final image.
        self.target_data = copy.deepcopy(self.output_data)

        for idx, item in enumerate(self.target_data):
            item.file = os.path.join(self.scratch_directory,
                                     'ms_per_image_%d' % idx, 'concat.ms')

    @xml_node
    def _finalize(self,
                  awimager_output_map,
                  processed_ms_dir,
                  ms_per_image_map,
                  sourcelist_map,
                  minbaseline,
                  maxbaseline,
                  target_mapfile,
                  output_image_mapfile,
                  sourcedb_map,
                  skip=False):
        """
        Perform the final step of the imager:
        Convert the output image to hdf5 and copy to output location
        Collect meta data and add to the image
        """

        placed_image_mapfile = self._write_datamap_to_file(
            None, "placed_image")
        self.logger.debug("Touched mapfile for correctly placed"
                          " hdf images: {0}".format(placed_image_mapfile))

        if skip:
            return placed_image_mapfile
        else:
            # run the awimager recipe
            placed_image_mapfile = self.run_task(
                "imager_finalize",
                target_mapfile,
                awimager_output_map=awimager_output_map,
                ms_per_image_map=ms_per_image_map,
                sourcelist_map=sourcelist_map,
                sourcedb_map=sourcedb_map,
                minbaseline=minbaseline,
                maxbaseline=maxbaseline,
                target_mapfile=target_mapfile,
                output_image_mapfile=output_image_mapfile,
                processed_ms_dir=processed_ms_dir,
                placed_image_mapfile=placed_image_mapfile
            )["placed_image_mapfile"]

        return placed_image_mapfile

    @xml_node
    def _source_finding(self, image_map_path, major_cycle, skip=True):
        """
        Perform the sourcefinding step
        """
        # Create the parsets for the different sourcefinder runs
        bdsm_parset_pass_1 = self.parset.makeSubset("BDSM[0].")
        parset_path_pass_1 = self._write_parset_to_file(
            bdsm_parset_pass_1, "pybdsm_first_pass.par",
            "Sourcefinder first pass parset.")

        bdsm_parset_pass_2 = self.parset.makeSubset("BDSM[1].")
        parset_path_pass_2 = self._write_parset_to_file(
            bdsm_parset_pass_2, "pybdsm_second_pass.par",
            "sourcefinder second pass parset")

        # touch a mapfile to be filled with created sourcelists
        source_list_map = self._write_datamap_to_file(
            None, "source_finding_outputs",
            "map to sourcefinding outputs (sourcelist)")
        sourcedb_map_path = self._write_datamap_to_file(
            None, "source_dbs_outputs",
            "Map to sourcedbs based in found sources")

        # construct the location to save the output products of the
        # sourcefinder
        cycle_path = os.path.join(self.scratch_directory,
                                  "awimage_cycle_{0}".format(major_cycle))
        catalog_path = os.path.join(cycle_path, "bdsm_catalog")
        sourcedb_path = os.path.join(cycle_path, "bdsm_sourcedb")

        # Run the sourcefinder
        if skip:
            return source_list_map, sourcedb_map_path
        else:
            self.run_task("imager_source_finding",
                          image_map_path,
                          bdsm_parset_file_run1=parset_path_pass_1,
                          bdsm_parset_file_run2x=parset_path_pass_2,
                          working_directory=self.scratch_directory,
                          catalog_output_path=catalog_path,
                          mapfile=source_list_map,
                          sourcedb_target_path=sourcedb_path,
                          sourcedb_map_path=sourcedb_map_path)

            return source_list_map, sourcedb_map_path

    @xml_node
    def _bbs(self,
             timeslice_map_path,
             parmdbs_map_path,
             sourcedb_map_path,
             skip=False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(
            parset, "bbs", "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(
            None, "bbs_output", "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(
            None, "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                              "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset=parset_path,
                      instrument_mapfile=parmdbs_map_path,
                      sourcedb_mapfile=sourcedb_map_path,
                      mapfile=output_mapfile,
                      working_directory=self.scratch_directory)

        return output_mapfile

    @xml_node
    def _aw_imager(self,
                   prepare_phase_output,
                   major_cycle,
                   sky_path,
                   skip=False):
        """
        Create an image based on the calibrated, filtered and combined data.
        """
        # Create parset for the awimage recipe
        parset = self.parset.makeSubset("AWimager.")
        # Get maxbaseline from 'full' parset
        max_baseline = self.parset.getInt("Imaging.maxbaseline")
        patch_dictionary = {"maxbaseline": str(max_baseline)}
        try:
            temp_parset_filename = patch_parset(parset, patch_dictionary)
            aw_image_parset = get_parset(temp_parset_filename)
            aw_image_parset_path = self._write_parset_to_file(
                aw_image_parset, "awimager_cycle_{0}".format(major_cycle),
                "Awimager recipe parset")
        finally:
            # remove tempfile
            os.remove(temp_parset_filename)

        # Create path to write the awimage files
        intermediate_image_path = os.path.join(
            self.scratch_directory, "awimage_cycle_{0}".format(major_cycle),
            "image")

        output_mapfile = self._write_datamap_to_file(
            None, "awimager", "output map for awimager recipe")

        mask_patch_size = self.parset.getInt("Imaging.mask_patch_size")
        auto_imaging_specs = self.parset.getBool("Imaging.auto_imaging_specs")
        fov = self.parset.getFloat("Imaging.fov")
        specify_fov = self.parset.getBool("Imaging.specify_fov")
        if skip:
            pass
        else:
            # run the awimager recipe
            self.run_task("imager_awimager",
                          prepare_phase_output,
                          parset=aw_image_parset_path,
                          mapfile=output_mapfile,
                          output_image=intermediate_image_path,
                          mask_patch_size=mask_patch_size,
                          sourcedb_path=sky_path,
                          working_directory=self.scratch_directory,
                          autogenerate_parameters=auto_imaging_specs,
                          specify_fov=specify_fov,
                          fov=fov)

        return output_mapfile, max_baseline

    @xml_node
    def _prepare_phase(self, input_ms_map_path, target_mapfile):
        """
        Copy ms to correct location, combine the ms in slices and combine
        the time slices into a large virtual measurement set
        """
        # Create the dir where found and processed ms are placed
        # ms_per_image_map_path contains all the original ms locations:
        # this list contains possible missing files
        processed_ms_dir = os.path.join(self.scratch_directory, "subbands")

        # get the parameters, create a subset for ndppp, save
        ndppp_parset = self.parset.makeSubset("DPPP.")
        ndppp_parset_path = self._write_parset_to_file(
            ndppp_parset, "prepare_imager_ndppp", "parset for ndpp recipe")

        # create the output file paths
        # [1] output -> prepare_output
        output_mapfile = self._write_datamap_to_file(None, "prepare_output")
        time_slices_mapfile = self._write_datamap_to_file(
            None, "prepare_time_slices")
        ms_per_image_mapfile = self._write_datamap_to_file(
            None, "ms_per_image")

        # get some parameters from the imaging pipeline parset:
        slices_per_image = self.parset.getInt("Imaging.slices_per_image")
        subbands_per_image = self.parset.getInt("Imaging.subbands_per_image")

        outputs = self.run_task("imager_prepare",
                                input_ms_map_path,
                                parset=ndppp_parset_path,
                                target_mapfile=target_mapfile,
                                slices_per_image=slices_per_image,
                                subbands_per_image=subbands_per_image,
                                mapfile=output_mapfile,
                                slices_mapfile=time_slices_mapfile,
                                ms_per_image_mapfile=ms_per_image_mapfile,
                                working_directory=self.scratch_directory,
                                processed_ms_dir=processed_ms_dir)

        # validate that the prepare phase produced the correct data
        output_keys = list(outputs.keys())
        if not ('mapfile' in output_keys):
            error_msg = "The imager_prepare master script did not"\
                    "return correct data. missing: {0}".format('mapfile')
            self.logger.error(error_msg)
            raise PipelineException(error_msg)
        if not ('slices_mapfile' in output_keys):
            error_msg = "The imager_prepare master script did not"\
                    "return correct data. missing: {0}".format(
                                                        'slices_mapfile')
            self.logger.error(error_msg)
            raise PipelineException(error_msg)
        if not ('ms_per_image_mapfile' in output_keys):
            error_msg = "The imager_prepare master script did not"\
                    "return correct data. missing: {0}".format(
                                                'ms_per_image_mapfile')
            self.logger.error(error_msg)
            raise PipelineException(error_msg)

        # Return the mapfiles paths with processed data
        return output_mapfile, outputs["slices_mapfile"], ms_per_image_mapfile, \
            processed_ms_dir

    @xml_node
    def _create_dbs(self,
                    input_map_path,
                    timeslice_map_path,
                    source_list_map_path,
                    skip_create_dbs=False):
        """
        Create for each of the concatenated input measurement sets
        an instrument model and parmdb
        """
        # Create the parameters set
        parset = self.parset.makeSubset("GSM.")

        # create the files that will contain the output of the recipe
        parmdbs_map_path = self._write_datamap_to_file(
            None, "parmdbs", "parmdbs output mapfile")
        sourcedb_map_path = self._write_datamap_to_file(
            None, "sky_files", "source db output mapfile")

        # run the master script
        if skip_create_dbs:
            pass
        else:
            self.run_task(
                "imager_create_dbs",
                input_map_path,
                monetdb_hostname=parset.getString("monetdb_hostname"),
                monetdb_port=parset.getInt("monetdb_port"),
                monetdb_name=parset.getString("monetdb_name"),
                monetdb_user=parset.getString("monetdb_user"),
                monetdb_password=parset.getString("monetdb_password"),
                assoc_theta=parset.getString("assoc_theta"),
                sourcedb_suffix=".sourcedb",
                slice_paths_mapfile=timeslice_map_path,
                parmdb_suffix=".parmdb",
                parmdbs_map_path=parmdbs_map_path,
                sourcedb_map_path=sourcedb_map_path,
                source_list_map_path=source_list_map_path,
                working_directory=self.scratch_directory)

        return parmdbs_map_path, sourcedb_map_path

    # TODO: Move these helpers to the parent class
    def _write_parset_to_file(self, parset, parset_name, message):
        """
        Write the suplied the suplied parameterset to the parameter set
        directory in the jobs dir with the filename suplied in parset_name.
        Return the full path to the created file.
        """
        parset_dir = os.path.join(self.config.get("layout", "job_directory"),
                                  "parsets")
        # create the parset dir if it does not exist
        create_directory(parset_dir)

        # write the content to a new parset file
        parset_path = os.path.join(parset_dir,
                                   "{0}.parset".format(parset_name))
        parset.writeFile(parset_path)

        # display a debug log entrie with path and message
        self.logger.debug("Wrote parset to path <{0}> : {1}".format(
            parset_path, message))

        return parset_path

    def _write_datamap_to_file(self, datamap, mapfile_name, message=""):
        """
        Write the suplied the suplied map to the mapfile.
        directory in the jobs dir with the filename suplied in mapfile_name.
        Return the full path to the created file.
        Id supllied data is None then the file is touched if not existing, but
        existing files are kept as is
        """

        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"),
                                   "mapfiles")
        # create the mapfile_dir if it does not exist
        create_directory(mapfile_dir)

        # write the content to a new parset file
        mapfile_path = os.path.join(mapfile_dir,
                                    "{0}.map".format(mapfile_name))

        # display a debug log entrie with path and message
        if datamap != None:
            datamap.save(mapfile_path)

            self.logger.debug("Wrote mapfile <{0}>: {1}".format(
                mapfile_path, message))
        else:
            if not os.path.exists(mapfile_path):
                DataMap().save(mapfile_path)

                self.logger.debug("Touched mapfile <{0}>: {1}".format(
                    mapfile_path, message))

        return mapfile_path
Ejemplo n.º 57
0
def _create_mapfile_ato(inmap):
    return MultiDataMap(DataMap.load(inmap))
Ejemplo n.º 58
0
def plugin_main(args, **kwargs):
    """
    Takes in a catalogue with a target and returns an appropriate mapfile
    
    Parameters
    ----------
    mapfile_in: str
        
    mapfile_dir: str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    target_file: str
        file containing target info

    Returns
    -------
    result : dict
        Output datamap filename
    
    """
    # parse the inputs
    infile_map = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    target_file = kwargs['target_file']
    all_to_one = kwargs['all_to_one'].lower().capitalize()

    # the input data
    data = DataMap.load(infile_map)
    datalist = [data[i].file for i in xrange(len(data))]

    # outfile information
    fileid = os.path.join(mapfile_dir, filename)
    coordfileid = os.path.join(mapfile_dir, 'coords_' + filename)

    # initialise the output data map for the coordinates
    map_out_coords = DataMap([])
    # read in the catalogue to get source_id, RA, and DEC
    t = Table.read(target_file, format='csv')
    RA_val = t['RA_LOTSS'].data[0]
    DEC_val = t['DEC_LOTSS'].data[0]
    Source_id = t['Source_id'].data[0]
    if str(Source_id)[0:1] == 'I':
        pass
    elif str(Source_id)[0:1] == 'S':
        pass
    else:
        Source_id = 'S' + str(Source_id)
    # make a string of coordinates for the NDPPP command
    ss = '["' + str(RA_val) + 'deg","' + str(DEC_val) + 'deg"]'
    # save the coordinate information
    map_out_coords.data.append(DataProduct(ss, Source_id, False))
    map_out_coords.save(coordfileid)
    # save the coords to a variable to return
    current_coords = map_out_coords[0].host

    # get the name (source_id)
    current_name = map_out_coords[0].file
    # initialise an output data map
    map_out = DataMap([])
    if all_to_one == 'True':
        msID = 0
        ms_file = datalist[0]
        map_out.data.append(
            DataProduct(
                data[msID].host, '/'.join(data[msID].file.split('/')[:-1]) +
                '/' + current_name + '_' + data[msID].file.split('/')[-1],
                data[msID].skip))
    else:
        print('HELLO HELLO HELLO')
        for msID, ms_file in enumerate(datalist):
            map_out.data.append(
                DataProduct(
                    data[msID].host,
                    '/'.join(data[msID].file.split('/')[:-1]) + '/' +
                    current_name + '_' + data[msID].file.split('/')[-1],
                    data[msID].skip))
    # save the file
    map_out.save(fileid)
    result = {
        'coordfile': coordfileid,
        'coords': current_coords,
        'name': current_name,
        'mapfile': fileid
    }
    return result
Ejemplo n.º 59
0
def main(ms_input,
         filename=None,
         mapfile_dir=None,
         numSB=-1,
         hosts=None,
         NDPPPfill=True,
         target_path=None,
         stepname=None,
         mergeLastGroup=False,
         truncateLastSBs=True,
         firstSB=None):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    filename: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    numSB : int, optional 
        How many files should go into one frequency group. Values <= 0 mean put 
        all files of the same time-step into one group.
        default = -1
    hosts : list or str
        List of hostnames or string with list of hostnames
    NDPPPfill : bool, optional
        Add dummy file-names for missing frequencies, so that NDPPP can
        fill the data with flagged dummy data.
        default = True
    target_path : str, optional
        Change the path of the "groups" files to this. (I.e. write output files 
        into this directory with the subsequent NDPPP call.)
        default = keep path of input files
    stepname : str, optional
        Add this step-name into the file-names of the output files.
    mergeLastGroup, truncateLastSBs : bool, optional
        mergeLastGroup = True, truncateLastSBs = True:
          not allowed
        mergeLastGroup = True, truncateLastSBs = False:
          put the files from the last group that doesn't have SBperGroup subbands 
          into the second last group (which will then have more than SBperGroup entries). 
        mergeLastGroup = False, truncateLastSBs = True:
          ignore last files, that don't make for a full group (not all files are used).
        mergeLastGroup = False, truncateLastSBs = False:
          keep inclomplete last group, or - with NDPPPfill=True - fill
          last group with dummies.      
    firstSB : int, optional
        If set, then reference the grouping of files to this station-subband. As if a file 
        with this station-subband would be included in the input files.
        (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz)

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfile

    """

    NDPPPfill = input2bool(NDPPPfill)
    mergeLastGroup = input2bool(mergeLastGroup)
    truncateLastSBs = input2bool(truncateLastSBs)
    firstSB = input2int(firstSB)
    numSB = int(numSB)

    if not filename or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: filename and mapfile_dir are needed!')
    if mergeLastGroup and truncateLastSBs:
        raise ValueError(
            'sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!'
        )


#    if mergeLastGroup:
#        raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!')
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError(
            'sort_times_into_freqGroups: type of "ms_input" unknown!')

    if type(hosts) is str:
        hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')]
    if not hosts:
        hosts = ['localhost']
    numhosts = len(hosts)
    print "sort_times_into_freqGroups: Working on", len(
        ms_list), "files (including flagged files)."

    time_groups = {}
    # sort by time
    for i, ms in enumerate(ms_list):
        # work only on files selected by a previous step
        if ms.lower() != 'none':
            # use the slower but more reliable way:
            obstable = pt.table(ms, ack=False)
            timestamp = int(round(np.min(obstable.getcol('TIME'))))
            #obstable = pt.table(ms+'::OBSERVATION', ack=False)
            #timestamp = int(round(obstable.col('TIME_RANGE')[0][0]))
            obstable.close()
            if timestamp in time_groups:
                time_groups[timestamp]['files'].append(ms)
            else:
                time_groups[timestamp] = {
                    'files': [ms],
                    'basename': os.path.splitext(ms)[0]
                }
    print "sort_times_into_freqGroups: found", len(time_groups), "time-groups"

    # sort time-groups by frequency
    timestamps = time_groups.keys()
    timestamps.sort()  # not needed now, but later
    first = True
    nchans = 0
    for time in timestamps:
        freqs = []
        for ms in time_groups[time]['files']:
            # Get the frequency info
            sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
            freq = sw.col('REF_FREQUENCY')[0]
            if first:
                file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0]
                nchans = sw.col('CHAN_WIDTH')[0].shape[0]
                chwidth = sw.col('CHAN_WIDTH')[0][0]
                freqset = set([freq])
                first = False
            else:
                assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0]
                assert nchans == sw.col('CHAN_WIDTH')[0].shape[0]
                assert chwidth == sw.col('CHAN_WIDTH')[0][0]
                freqset.add(freq)
            freqs.append(freq)
            sw.close()
        time_groups[time]['freq_names'] = zip(freqs,
                                              time_groups[time]['files'])
        time_groups[time]['freq_names'].sort(key=lambda pair: pair[0])
        #time_groups[time]['files'] = [name for (freq,name) in freq_names]
        #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names]
    print "sort_times_into_freqGroups: Collected the frequencies for the time-groups"

    freqliste = np.array(list(freqset))
    freqliste.sort()
    freq_width = np.min(freqliste[1:] - freqliste[:-1])
    if file_bandwidth > freq_width:
        raise ValueError(
            "Bandwidth of files is larger than minimum frequency step between two files!"
        )
    if file_bandwidth < (freq_width / 2.):
        raise ValueError(
            "Bandwidth of files is smaller than half the minimum frequency step between two files! (More than half the data is missing.)"
        )
    #the new output map
    filemap = MultiDataMap()
    groupmap = DataMap()
    # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border
    maxfreq = np.max(freqliste) + freq_width * 0.51
    if firstSB != None:
        minfreq = (float(firstSB) / 512. * 100e6) + 100e6 - freq_width / 2.
        if np.min(freqliste) < minfreq:
            raise ValueError(
                'sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!'
            )
    else:
        minfreq = np.min(freqliste) - freq_width / 2.
    groupBW = freq_width * numSB
    if groupBW < 1e6:
        print 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!'
    freqborders = np.arange(minfreq, maxfreq, groupBW)
    if mergeLastGroup:
        freqborders[-1] = maxfreq
    elif truncateLastSBs:
        pass  #nothing to do! # left to make the logic more clear!
    elif not truncateLastSBs and NDPPPfill:
        freqborders = np.append(freqborders, (freqborders[-1] + groupBW))
    elif not truncateLastSBs and not NDPPPfill:
        freqborders = np.append(freqborders, maxfreq)

    freqborders = freqborders[freqborders > (np.min(freqliste) - groupBW)]
    ngroups = len(freqborders) - 1
    if ngroups == 0:
        raise ValueError(
            'sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!'
        )

    print "sort_times_into_freqGroups: Will create", ngroups, "group(s) with", numSB, "file(s) each."

    hostID = 0
    for time in timestamps:
        (freq, fname) = time_groups[time]['freq_names'].pop(0)
        for groupIdx in xrange(ngroups):
            files = []
            skip_this = True
            filefreqs_low = np.arange(freqborders[groupIdx],
                                      freqborders[groupIdx + 1], freq_width)
            for lower_freq in filefreqs_low:
                if freq > lower_freq and freq < lower_freq + freq_width:
                    assert freq != 1e12
                    files.append(fname)
                    if len(time_groups[time]['freq_names']) > 0:
                        (freq, fname) = time_groups[time]['freq_names'].pop(0)
                    else:
                        (freq, fname) = (1e12, 'This_shouldn\'t_show_up')
                    skip_this = False
                elif NDPPPfill:
                    files.append('dummy.ms')
            if not skip_this:
                filemap.append(
                    MultiDataProduct(hosts[hostID % numhosts], files,
                                     skip_this))
                freqID = int(
                    (freqborders[groupIdx] + freqborders[groupIdx + 1]) / 2e6)
                groupname = time_groups[time]['basename'] + '_%Xt_%dMHz.ms' % (
                    time, freqID)
                if type(stepname) is str:
                    groupname += stepname
                if type(target_path) is str:
                    groupname = os.path.join(target_path,
                                             os.path.basename(groupname))
                groupmap.append(
                    DataProduct(hosts[hostID % numhosts], groupname,
                                skip_this))
        orphan_files = len(time_groups[time]['freq_names'])
        if freq < 1e12:
            orphan_files += 1
        if orphan_files > 0:
            print "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt." % (
                orphan_files, time)
    filemapname = os.path.join(mapfile_dir, filename)
    filemap.save(filemapname)
    groupmapname = os.path.join(mapfile_dir, filename + '_groups')
    groupmap.save(groupmapname)
    # genertate map with edge-channels to flag
    flagmap = _calc_edge_chans(filemap, nchans)
    flagmapname = os.path.join(mapfile_dir, filename + '_flags')
    flagmap.save(flagmapname)
    result = {
        'mapfile': filemapname,
        'groupmapfile': groupmapname,
        'flagmapfile': flagmapname
    }
    return result
Ejemplo n.º 60
0
def main(ms_input,
         outmapname=None,
         mapfile_dir=None,
         cellsize_highres_deg=0.00208,
         cellsize_lowres_deg=0.00694,
         fieldsize_highres=2.5,
         fieldsize_lowres=6.5):
    """
    Check a list of MS files for missing frequencies

    Parameters
    ----------
    ms_input : list or str
        List of MS filenames, or string with list, or path to a mapfile
    outmapname: str
        Name of output mapfile
    mapfile_dir : str
        Directory for output mapfile
    cellsize_highres_deg : float, optional
        cellsize for the high-res images in deg
    cellsize_lowres_deg : float, optional
        cellsize for the low-res images in deg
    fieldsize_highres : float, optional
        How many FWHM's shall the high-res images be.
    fieldsize_lowres : float, optional
        How many FWHM's shall the low-res images be.

    Returns
    -------
    result : dict
        Dict with the name of the generated mapfiles

    """
    if not outmapname or not mapfile_dir:
        raise ValueError(
            'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!'
        )
    if type(ms_input) is str:
        if ms_input.startswith('[') and ms_input.endswith(']'):
            ms_list = [
                f.strip(' \'\"') for f in ms_input.strip('[]').split(',')
            ]
        else:
            map_in = DataMap.load(ms_input)
            map_in.iterator = DataMap.SkipIterator
            ms_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        ms_list.append(f.strip(' \'\"'))
                else:
                    ms_list.append(fname.strip(' \'\"'))
    elif type(ms_input) is list:
        ms_list = [str(f).strip(' \'\"') for f in ms_input]
    else:
        raise TypeError('sort_into_freqBands: type of "ms_input" unknown!')

    msdict = {}
    for ms in ms_list:
        # group all MSs by frequency
        sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False)
        msfreq = int(sw.col('REF_FREQUENCY')[0])
        sw.close()
        if msfreq in msdict:
            msdict[msfreq].append(ms)
        else:
            msdict[msfreq] = [ms]
    bands = []
    print "InitSubtract_sort_and_compute.py: Putting files into bands."
    for MSkey in msdict.keys():
        bands.append(Band(msdict[MSkey]))

    group_map = MultiDataMap()
    file_single_map = DataMap([])
    high_size_map = DataMap([])
    low_size_map = DataMap([])
    numfiles = 0
    for band in bands:
        print "InitSubtract_sort_and_compute.py: Working on Band:", band.name
        group_map.append(MultiDataProduct('localhost', band.files, False))
        numfiles += len(band.files)
        for filename in band.files:
            file_single_map.append(DataProduct('localhost', filename, False))
        (imsize_high_res,
         imsize_low_res) = band.get_image_sizes(float(cellsize_highres_deg),
                                                float(cellsize_lowres_deg),
                                                float(fieldsize_highres),
                                                float(fieldsize_lowres))
        high_size_map.append(
            DataProduct('localhost',
                        str(imsize_high_res) + " " + str(imsize_high_res),
                        False))
        low_size_map.append(
            DataProduct('localhost',
                        str(imsize_low_res) + " " + str(imsize_low_res),
                        False))

    print "InitSubtract_sort_and_compute.py: Computing averaging steps."
    (freqstep, timestep) = bands[0].get_averaging_steps()
    # get mapfiles for freqstep and timestep with the length of single_map
    freqstep_map = DataMap([])
    timestep_map = DataMap([])
    for index in xrange(numfiles):
        freqstep_map.append(DataProduct('localhost', str(freqstep), False))
        timestep_map.append(DataProduct('localhost', str(timestep), False))

    groupmapname = os.path.join(mapfile_dir, outmapname)
    group_map.save(groupmapname)
    file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single')
    file_single_map.save(file_single_mapname)
    high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size')
    high_size_map.save(high_sizename)
    low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size')
    low_size_map.save(low_sizename)
    freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep')
    freqstep_map.save(freqstepname)
    timestepname = os.path.join(mapfile_dir, outmapname + '_timestep')
    timestep_map.save(timestepname)
    result = {
        'groupmap': groupmapname,
        'single_mapfile': file_single_mapname,
        'high_size_mapfile': high_sizename,
        'low_size_mapfile': low_sizename,
        'freqstep': freqstepname,
        'timestep': timestepname
    }
    return result