Beispiel #1
0
 def __init__(self, parsetDict):
     # The constructor gets the subset of the NDPPP parset containing
     # all keys-value pairs for this step.
     # Note: the superclass constructor MUST be called.
     DPStep.__init__(self, parsetDict)
     parset = parameterset(parsetDict)
     self.itsIncr = parset.getDouble('incr', 1)
Beispiel #2
0
 def __init__(self, parsetDict):
     # The constructor gets the subset of the NDPPP parset containing
     # all keys-value pairs for this step.
     # Note: the superclass constructor MUST be called.
     DPStep.__init__(self, parsetDict)
     parset = parameterset(parsetDict)
     self.itsIncr = parset.getDouble('incr', 1)
Beispiel #3
0
 def __init__(self, id, predecessors='', inputs='', outputs='', parset=''):
     self.id = id
     self.predecessors_as_str = predecessors
     self.inputs = inputs
     self.output = outputs
     self.parset_as_str = str(parset)
     self.parset = parameterset()
Beispiel #4
0
    def _prepare_steps(self, **kwargs):
        """
        Prepare for running the NDPPP program. This means, for one thing,
        patching the parsetfile with the correct input/output MS names,
        start/end times if availabe, etc. If a demixing step must be performed,
        some extra work needs to be done.
        
        Returns: patch dictionary that must be applied to the parset.
        """
        self.logger.debug("Time interval: %s %s" %
                          (kwargs['start_time'], kwargs['end_time']))
        # Create output directory for output MS.
        create_directory(os.path.dirname(kwargs['tmpfile']))

        patch_dictionary = {
            'msin': kwargs['infile'],
            'msout': kwargs['tmpfile'],
            'uselogger': 'True'
        }
        if kwargs['start_time']:
            patch_dictionary['msin.starttime'] = kwargs['start_time']
        if kwargs['end_time']:
            patch_dictionary['msin.endtime'] = kwargs['end_time']

        # If we need to do a demixing step, we have to do some extra work.
        # We have to read the parsetfile to check this.
        parset = parameterset(kwargs['parsetfile'])
        for step in parset.getStringVector('steps'):
            if parset.getString(step + '.type', '').startswith('demix'):
                patch_dictionary.update(
                    self._prepare_demix_step(step, **kwargs))

        # Return the patch dictionary that must be applied to the parset.
        return patch_dictionary
Beispiel #5
0
        def _send_filtered_event_message(self, otdb_id: int,
                                         modificationTime: datetime,
                                         state: str):
            try:
                with OTDBRPC.create(exchange=self.exchange,
                                    broker=self.broker,
                                    timeout=2) as otdbrpc:
                    parset = parameterset(
                        otdbrpc.taskGetSpecification(otdb_id=otdb_id).get(
                            "specification", ''))
                    task_type = parset.get("ObsSW.Observation.processType")
                    priority = 6 if task_type == "Observation" else 2
            except Exception as e:
                logger.warning(
                    'Could not determine task type for otdb_id=%s, using default priority=4: %s',
                    otdb_id, e)
                priority = 4

            try:
                content = {
                    "treeID": otdb_id,
                    "state": state,
                    "time_of_change": modificationTime
                }
                msg = EventMessage(
                    subject=DEFAULT_FILTERED_OTDB_NOTIFICATION_SUBJECT,
                    content=content,
                    priority=priority)
                logger.info(
                    'sending filtered event message subject:\'%s\' content: %s',
                    msg.subject, content)
                self.send(msg)
            except Exception as e:
                logger.error('Could not send event message: %s', e)
 def __init__(self):
     super(preprocessing_pipeline, self).__init__()
     self.parset = parameterset()
     self.input_data = []
     self.output_data = []
     self.io_data_mask = []
     self.parset_feedback_file = None
 def __init__(self):
     super(preprocessing_pipeline, self).__init__()
     self.parset = parameterset()
     self.input_data = []
     self.output_data = []
     self.io_data_mask = []
     self.parset_feedback_file = None
Beispiel #8
0
 def __init__(self, id, predecessors='', inputs='', outputs='', parset=''):
     self.id = id
     self.predecessors_as_str = predecessors
     self.inputs = inputs
     self.output = outputs
     self.parset_as_str = str(parset)
     self.parset = parameterset()
Beispiel #9
0
def to_parset(data, prefix=''):
    """
    Convert the data in the variable `data` to a LOFAR parameterset. Values
    may contain vectors (python lists) or records (python dicts) of scalars.
    Deeper nested structures must be unraveled in separate key/value pairs,
    where the name of the nested value is moved into the key. Keys for
    vector values will get an indexed attached to their name.
    
    For example, the dictionary entry
        'vec_rec' : [{1:'a', 2:'b'}, {3:'c'}]
    will be converted to the following parameterset key/value pairs
        vec_rec[0]={1: 'a', 2: 'b'}
        vec_rec[1]={3: 'c'}
    And, the dictionary entry
        'rec_vec' : {'a':[1, 2], 'b':[3]}
    will be converted to
        rec_vec.a=[1, 2]
        rec_vec.b=[3]
    """
    result = parameterset()
    if isinstance(data, dict):
        for key, value in data.iteritems():
            fullkey = prefix + '.' + key if prefix else key
            if isinstance(value, dict):
                if any(
                        isinstance(v, dict) or isinstance(v, list)
                        for v in value.values()):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            elif isinstance(value, list):
                if any(
                        isinstance(v, dict) or isinstance(v, list)
                        for v in value):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            else:
                result.replace(fullkey, str(value))
    elif isinstance(data, list):
        for index, value in enumerate(data):
            fullkey = prefix + '[%d]' % index
            if isinstance(value, dict):
                if any(
                        isinstance(v, dict) or isinstance(v, list)
                        for v in value.values()):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            elif isinstance(value, list):
                if any(
                        isinstance(v, dict) or isinstance(v, list)
                        for v in value):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            else:
                result.replace(fullkey, str(value))
    return result
    def _get_meta_data(self, number_of_major_cycles, placed_data_image_map,
                       placed_correlated_map, full_parset, max_cycles_reached):
        """
        Function combining all the meta data collection steps of the processing
        """
        parset_prefix = full_parset.getString('prefix') + \
                full_parset.fullModuleName('DataProducts')
                    
        toplevel_meta_data = parameterset({'feedback_version': feedback_version})
        toplevel_meta_data.replace(
             parset_prefix + ".numberOfMajorCycles", 
                                           str(number_of_major_cycles))
        toplevel_meta_data_path = os.path.join(
                self.parset_dir, "toplevel_meta_data.parset")

        toplevel_meta_data.replace(parset_prefix + ".max_cycles_reached",
                                  str(max_cycles_reached))

        try:
            toplevel_meta_data.writeFile(toplevel_meta_data_path)
            self.logger.info("Wrote meta data to: " + 
                    toplevel_meta_data_path)
        except RuntimeError as err:
            self.logger.error(
              "Failed to write toplevel meta information parset: %s" % str(
                                    toplevel_meta_data_path))
            return 1

        skyimage_metadata = "%s_feedback_SkyImage" % (self.parset_file,)
        correlated_metadata = "%s_feedback_Correlated" % (self.parset_file,)

        # Create a parset-file containing the metadata for MAC/SAS at nodes
        self.run_task("get_metadata", placed_data_image_map,           
            parset_prefix = parset_prefix,
            product_type = "SkyImage",
            metadata_file = skyimage_metadata)

        self.run_task("get_metadata", placed_correlated_map,
            parset_prefix = parset_prefix,
            product_type = "Correlated",
            metadata_file = correlated_metadata)

        self.send_feedback_processing(toplevel_meta_data)
        self.send_feedback_dataproducts(parameterset(skyimage_metadata))
        self.send_feedback_dataproducts(parameterset(correlated_metadata))
Beispiel #11
0
def load_parameters(filename):
    """
    Load parameters from file and return them as hash.
    """
    if LOFAR_PARAMETERSET:
        data = parameterset(filename).dict()
    else:
        data = ConfigObj(filename, raise_errors=True, file_error=True)
    return data
Beispiel #12
0
def load_parameters(filename):
    """
    Load parameters from file and return them as hash.
    """
    if LOFAR_PARAMETERSET:
        data = parameterset(filename).dict()
    else:
        data = ConfigObj(filename, raise_errors=True, file_error=True)
    return data
Beispiel #13
0
        def do_qa(self, otdb_id):
            '''
            try to do all qa (quality assurance) steps for the given otdb_id
            resulting in an h5 MS-extract file and inspection plots
            :param int otdb_id: observation/pipeline otdb id for which the conversion needs to be done.
            :return: None
            '''

            hdf5_file_path = None

            with OTDBRPC.create(exchange=self.exchange,
                                broker=self.broker,
                                timeout=5) as otdbrpc:
                parset = parameterset(
                    otdbrpc.taskGetSpecification(otdb_id=otdb_id).get(
                        "specification", ''))

                if not parset:
                    logger.warning("could not find a parset for otdb_id %s.",
                                   otdb_id)
                    return

                if parset.getBool(
                        'ObsSW.Observation.DataProducts.Output_Correlated.enabled'
                ):
                    hdf5_file_path = self._convert_ms2hdf5(otdb_id)
                elif parset.getBool(
                        'ObsSW.Observation.DataProducts.Output_CoherentStokes.enabled'
                ):
                    hdf5_file_path = self._convert_bf2hdf5(otdb_id)
                else:
                    logger.info(
                        "No uv or cs dataproducts avaiblable to convert for otdb_id %s",
                        otdb_id)
                    return

            if hdf5_file_path:
                # keep a note of where the h5 file was stored for this unfinished otdb_id
                self._unfinished_otdb_id_map[otdb_id] = hdf5_file_path

                # cluster it
                self._cluster_h5_file(hdf5_file_path, otdb_id)

                self._copy_hdf5_to_nfs_dir(hdf5_file_path)

                plot_dir_path = self._create_plots_for_h5_file(
                    hdf5_file_path, otdb_id)
                plot_dir_path = self._move_plots_to_nfs_dir(plot_dir_path)

                # and notify that we're finished
                self._send_event_message(
                    'Finished', {
                        'otdb_id': otdb_id,
                        'hdf5_file_path': hdf5_file_path,
                        'plot_dir_path': plot_dir_path or ''
                    })
Beispiel #14
0
def to_parset(data, prefix=''):
    """
    Convert the data in the variable `data` to a LOFAR parameterset. Values
    may contain vectors (python lists) or records (python dicts) of scalars.
    Deeper nested structures must be unraveled in separate key/value pairs,
    where the name of the nested value is moved into the key. Keys for
    vector values will get an indexed attached to their name.
    
    For example, the dictionary entry
        'vec_rec' : [{1:'a', 2:'b'}, {3:'c'}]
    will be converted to the following parameterset key/value pairs
        vec_rec[0]={1: 'a', 2: 'b'}
        vec_rec[1]={3: 'c'}
    And, the dictionary entry
        'rec_vec' : {'a':[1, 2], 'b':[3]}
    will be converted to
        rec_vec.a=[1, 2]
        rec_vec.b=[3]
    """
    result = parameterset()
    if isinstance(data, dict):
        for key, value in data.iteritems():
            fullkey = prefix + '.' + key if prefix else key
            if isinstance(value, dict):
                if any(isinstance(v, dict) or isinstance(v, list) 
                    for v in value.values()):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            elif isinstance(value, list):
                if any(isinstance(v, dict) or isinstance(v, list) 
                    for v in value):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            else:
                result.replace(fullkey, str(value))
    elif isinstance(data, list):
        for index, value in enumerate(data):
            fullkey = prefix + '[%d]' % index
            if isinstance(value, dict):
                if any(isinstance(v, dict) or isinstance(v, list) 
                    for v in value.values()):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            elif isinstance(value, list):
                if any(isinstance(v, dict) or isinstance(v, list) 
                    for v in value):
                    result.adoptCollection(to_parset(value, fullkey))
                else:
                    result.replace(fullkey, str(value))
            else:
                result.replace(fullkey, str(value))
    return result
Beispiel #15
0
 def verify_and_estimate(self, parset, input_files={}):
     """ Create estimates for a single process based on its parset and input files"""
     if self._checkParsetForRequiredKeys(parset):
         estimates = self._calculate(parameterset(parset), input_files)
     else:
         raise ValueError('The parset is incomplete')
     result = {}
     result[self.name] = {}
     result[self.name]['storage'] = estimates['storage']
     result[self.name]['bandwidth'] = estimates['bandwidth']
     return result
Beispiel #16
0
 def verify_and_estimate(self, parset, input_files={}):
     """ Create estimates for a single process based on its parset and input files"""
     if self._checkParsetForRequiredKeys(parset):
         estimates = self._calculate(parameterset(parset), input_files)
     else:
         raise ValueError('The parset is incomplete')
     result = {}
     result[self.name] = {}
     result[self.name]['storage'] = estimates['storage']
     result[self.name]['bandwidth'] = estimates['bandwidth']
     return result
Beispiel #17
0
 def _read_files(self):
     """Read data file locations from parset-file"""
     self.logger.debug("Reading data file locations from parset-file: %s" %
                       self.inputs['parset'])
     parset = parameterset(self.inputs['parset'])
     dps = parset.makeSubset(parset.fullModuleName('DataProducts') + '.')
     return [
         tuple(os.path.join(location, filename).split(':'))
             for location, filename in zip(
                 dps.getStringVector('Input_Correlated.locations'),
                 dps.getStringVector('Input_Correlated.filenames'))
     ]
Beispiel #18
0
 def _read_files(self):
     """Read data file locations from parset-file"""
     self.logger.debug("Reading data file locations from parset-file: %s" %
                       self.inputs['parset'])
     parset = parameterset(self.inputs['parset'])
     dps = parset.makeSubset(parset.fullModuleName('DataProducts') + '.')
     return [
         tuple(os.path.join(location, filename).split(':'))
         for location, filename in zip(
             dps.getStringVector('Input_Correlated.locations'),
             dps.getStringVector('Input_Correlated.filenames'))
     ]
Beispiel #19
0
    def __init__(self, filename):
        parset = parameterset(filename)
        self.filename = filename
        self.allocated = False

        self.positions = []
        self.subbands = []
        try:
            for beam in range(parset.getInt("Observation.nrBeams")):
                ra = parset.getFloat("Observation.Beam[%d].angle1" % beam)
                dec = parset.getFloat("Observation.Beam[%d].angle2" % beam)
                self.positions.append((ra, dec))
                try:
                    self.subbands.append(parset.get('Observation.Beam[%d].subbandList' % beam).expand().getIntVector())
                except RuntimeError:
                    self.subbands.append([])
        except RuntimeError:
            pass

        try:
            self.time = [
                parset.getString('Observation.startTime'),
                parset.getString('Observation.stopTime'),
            ]
        except RuntimeError:
            self.time = []

        try:
            self.stations = parset.get('Observation.VirtualInstrument.stationList').expand().getStringVector()
        except RuntimeError:
            self.stations = []
        try:
            self.clock = int(parset.getString("Observation.clockMode")[-3:])
        except RuntimeError:
            self.clock = None
        try:
            self.antennaset = parset.getString('Observation.antennaSet')
        except RuntimeError:
            self.antennaset = None
        try:
            self.filter = parset.getString("Observation.bandFilter")
        except RuntimeError:
            self.filter = None
        self.campaign = {}
        if "Observation.Campaign.name" in parset.keys():
            self.campaign['name'] = parset.getString("Observation.Campaign.name")
        else:
            self.campaign['name'] = None
        if "Observation.Campaign.title" in parset.keys():
            self.campaign['title'] = parset.getString("Observation.Campaign.title")
        else:
            self.campaign['title'] = None
 def __init__(self):
     """
     Initialize member variables and call superclass init function
     """
     control.__init__(self)
     self.parset = parameterset()
     self.input_data = DataMap()
     self.target_data = DataMap()
     self.output_data = DataMap()
     self.scratch_directory = None
     self.parset_feedback_file = None
     self.parset_dir = None
     self.mapfile_dir = None
 def __init__(self):
     """
     Initialize member variables and call superclass init function
     """
     control.__init__(self)
     self.parset = parameterset()
     self.input_data = DataMap()
     self.target_data = DataMap()
     self.output_data = DataMap()
     self.scratch_directory = None
     self.parset_feedback_file = None
     self.parset_dir = None
     self.mapfile_dir = None
    def verify_and_estimate(self, parset, predecessor_estimates=[]):
        """ Create estimates for an observation or pipeline step based on its parset and,
            in case of a pipeline step, all estimates of its direct predecessor(s).
        """

        self.verify(parset, predecessor_estimates)

        result = self._calculate(parameterset(parset), predecessor_estimates)

        logger.info('Estimates for %s:' % self.name)
        logger.info(pprint.pformat(result))

        return result
Beispiel #23
0
def patch_parset(parset, data, output_dir=None):
    """
    Generate a parset file by adding the contents of the data dictionary to
    the specified parset object. Write it to file, and return the filename.

    `parset` may either be the filename of a parset-file or an instance of
    `lofar.parameterset.parameterset`.
    """
    if isinstance(parset, str):
        temp_parset = parameterset(parset)
    else:
        temp_parset = parset.makeSubset('')  # a sneaky way to copy the parset
    for key, value in data.items():
        temp_parset.replace(key, str(value))
    fd, output = mkstemp(dir=output_dir)
    temp_parset.writeFile(output)
    os.close(fd)
    return output
Beispiel #24
0
def patch_parset(parset, data, output_dir=None):
    """
    Generate a parset file by adding the contents of the data dictionary to
    the specified parset object. Write it to file, and return the filename.

    `parset` may either be the filename of a parset-file or an instance of
    `lofar.parameterset.parameterset`.
    """
    if isinstance(parset, str):
        temp_parset = parameterset(parset)
    else:
        temp_parset = parset.makeSubset('')  # a sneaky way to copy the parset
    for key, value in data.iteritems():
        temp_parset.replace(key, str(value))
    fd, output = mkstemp(dir=output_dir)
    temp_parset.writeFile(output)
    os.close(fd)
    return output
Beispiel #25
0
    def __init__(self, parsetDict):
        # The constructor gets the subset of the NDPPP parset containing
        # all keys-value pairs for this step.
        # Note: the superclass constructor MUST be called.
        DPStep.__init__(self, parsetDict)
        parset = parameterset(parsetDict)

        self.itsTimeFill = 0.
        self.itsTimeFlag = 0.
        self.itsTimeReorder = 0.
        self.itsTimeSolve = 0.

        self.itsSols = []
        self.itsTimeSlot = 0
        self.itsMinBlPerAnt = parset.getInt('minBlPerAnt', 4)
        self.itsSolInt = parset.getInt('solint', 1)
        if self.itsSolInt > 1:
            raise ("SolInt>1 is not yet supported")
        self.itsNChan = parset.getInt('nchan', 0)
def processMessages(receiver, matchPrefix, execPath, msgSaveDir):
    while True:
        msg = None
        try:
            msg = receiver.get()  # blocking
            if msg is None:
                continue

            content = msg.content()
            # payload type can be unicode, but parameterset only converts str to std::string
            message = str(content.payload)
            ps = lofParset.parameterset()
            ps.adoptBuffer(message)
            hosts = getOutputHosts(ps, matchPrefix)
            if hosts:
                logger.info('Received message is applicable to us, so act on it')

                obsId = content.sasid
                messageFilename = msgSaveDir + 'L' + obsId + '.parset.xml'

                try:
                    saveData(messageFilename, message)

                    hosts = uniq(hosts)
                    hosts.sort()

                    runProcess(execPath, messageFilename, hosts)
                except IOError as exc:  # saveData()
                    logger.error('Skipped running executable: failed to save message to %s: %s',
                                 exc.filename, exc.strerror)
                except OSError as exc:  # runProcess()
                    logger.error('Failed to run executable: %s', exc.strerror)

            logger.info('Done with message')

        except lofMess.message.MessageException as exc:  # XMLDoc(), _get_data()
            logger.error('Failed to parse or retrieve node from XML message: %s', exc.message)

        finally:
            if msg is not None:
                receiver.ack(msg)  # optional for topics, needed for queues
Beispiel #27
0
    def go(self):
        self.logger.info("Starting vdsreader run")
        super(vdsreader, self).go()

        # *********************************************************************
        # 1. Open the gvds file as a parameterset
        try:
            gvds = parameterset(self.inputs['gvds'])
        except:
            self.logger.error("Unable to read G(V)DS file")
            raise

        self.logger.info("Building list of measurementsets")

        # **********************************************************************
        # 2. convert al partx.FileName values to ms
        ms_names = [
            gvds.getString("Part%d.FileName" % (part_no,))
            for part_no in xrange(gvds.getInt("NParts"))
        ]
        self.logger.debug(ms_names)

        self.outputs['data'] = ms_names

        # **********************************************************************\
        # 3. parse start and end time and pointing information
        try:
            self.outputs['start_time'] = gvds.getString('StartTime')
            self.outputs['end_time'] = gvds.getString('EndTime')
        except:
            self.logger.warn("Failed to read start/end time from GVDS file")
        try:
            self.outputs['pointing'] = {
                'type': gvds.getStringVector('Extra.FieldDirectionType')[0],
                'dec': gvds.getStringVector('Extra.FieldDirectionDec')[0],
                'ra': gvds.getStringVector('Extra.FieldDirectionRa')[0]
            }
        except:
            self.logger.warn("Failed to read pointing information from GVDS file")
        return 0
Beispiel #28
0
    def go(self):
        self.logger.info("Starting vdsreader run")
        super(vdsreader, self).go()

        # *********************************************************************
        # 1. Open the gvds file as a parameterset
        try:
            gvds = parameterset(self.inputs['gvds'])
        except:
            self.logger.error("Unable to read G(V)DS file")
            raise

        self.logger.info("Building list of measurementsets")

        # **********************************************************************
        # 2. convert al partx.FileName values to ms
        ms_names = [
            gvds.getString("Part%d.FileName" % (part_no,))
            for part_no in range(gvds.getInt("NParts"))
        ]
        self.logger.debug(ms_names)

        self.outputs['data'] = ms_names

        # **********************************************************************\
        # 3. parse start and end time and pointing information
        try:
            self.outputs['start_time'] = gvds.getString('StartTime')
            self.outputs['end_time'] = gvds.getString('EndTime')
        except:
            self.logger.warn("Failed to read start/end time from GVDS file")
        try:
            self.outputs['pointing'] = {
                'type': gvds.getStringVector('Extra.FieldDirectionType')[0],
                'dec': gvds.getStringVector('Extra.FieldDirectionDec')[0],
                'ra': gvds.getStringVector('Extra.FieldDirectionRa')[0]
            }
        except:
            self.logger.warn("Failed to read pointing information from GVDS file")
        return 0
Beispiel #29
0
    def _prepare_steps(self, **kwargs):
        """
        Prepare for running the NDPPP program. This means, for one thing,
        patching the parsetfile with the correct input/output MS names,
        start/end times if availabe, etc. If a demixing step must be performed,
        some extra work needs to be done.
        
        Returns: patch dictionary that must be applied to the parset.
        """
        self.logger.debug(
            "Time interval: %s %s" % (kwargs['start_time'], kwargs['end_time'])
        )
        # Create output directory for output MS.
        create_directory(os.path.dirname(kwargs['tmpfile']))

        patch_dictionary = {
            'msin': kwargs['infile'],
            'msout': kwargs['tmpfile'],
            'uselogger': 'True'
        }
        if kwargs['start_time']:
            patch_dictionary['msin.starttime'] = kwargs['start_time']
        if kwargs['end_time']:
            patch_dictionary['msin.endtime'] = kwargs['end_time']

        # If we need to do a demixing step, we have to do some extra work.
        # We have to read the parsetfile to check this.
        parset = parameterset(kwargs['parsetfile'])
        for step in parset.getStringVector('steps'):
            if parset.getString(step + '.type', '').startswith('demix'):
                patch_dictionary.update(
                    self._prepare_demix_step(step, **kwargs)
                )

        # Return the patch dictionary that must be applied to the parset.
        return patch_dictionary
Beispiel #30
0
    def go(self):
        self.logger.info("Starting BBS run")
        super(new_bbs, self).go()

        #                Check for relevant input parameters in the parset-file
        # ---------------------------------------------------------------------
        self.logger.debug("Reading parset from %s" % self.inputs['parset'])
        self.parset = parameterset(self.inputs['parset'])

        self._set_input('db_host', 'BBDB.Host')
        self._set_input('db_user', 'BBDB.User')
        self._set_input('db_name', 'BBDB.Name')
        self._set_input('db_key', 'BBDB.Key')

        #self.logger.debug("self.inputs = %s" % self.inputs)

        #                                         Clean the blackboard database
        # ---------------------------------------------------------------------
        self.logger.info(
            "Cleaning BBS database for key '%s'" % (self.inputs['db_key'])
        )
        command = ["psql",
                   "-h", self.inputs['db_host'],
                   "-U", self.inputs['db_user'],
                   "-d", self.inputs['db_name'],
                   "-c", "DELETE FROM blackboard.session WHERE key='%s';" %
                         self.inputs['db_key']
                  ]
        self.logger.debug(command)
        if subprocess.call(command) != 0:
            self.logger.warning(
                "Failed to clean BBS database for key '%s'" %
                self.inputs['db_key']
            )

        #                  Create a bbs_map describing the file mapping on disk
        # ---------------------------------------------------------------------
        if not self._make_bbs_map():
            return 1

        # Produce a GVDS file, describing the data that must be processed.
        gvds_file = self.run_task(
            "vdsmaker",
            self.inputs['data_mapfile'],
            gvds=self.inputs['gvds']
        )['gvds']

        #      Construct a parset for BBS GlobalControl by patching the GVDS
        #           file and database information into the supplied template
        # ------------------------------------------------------------------
        self.logger.debug("Building parset for BBS control")
        # Create a location for parsets
        job_directory = self.config.get(
                            "layout", "job_directory")
        parset_directory = os.path.join(job_directory, "parsets")
        create_directory(parset_directory)

        # patch the parset and copy result to target location remove tempfile
        try:
            bbs_parset = utilities.patch_parset(
                self.parset,
                {
                    'Observation': gvds_file,
                    'BBDB.Key': self.inputs['db_key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #'BBDB.Port': self.inputs['db_name'],
                }
            )
            bbs_parset_path = os.path.join(parset_directory,
                                           "bbs_control.parset")
            shutil.copyfile(bbs_parset, bbs_parset_path)
            self.logger.debug("BBS control parset is %s" % (bbs_parset_path,))

        finally:
            # Always remove the file in the tempdir
            os.remove(bbs_parset)

        try:
            #        When one of our processes fails, we set the killswitch.
            #      Everything else will then come crashing down, rather than
            #                                         hanging about forever.
            # --------------------------------------------------------------
            self.killswitch = threading.Event()
            self.killswitch.clear()
            signal.signal(signal.SIGTERM, self.killswitch.set)

            #                           GlobalControl runs in its own thread
            # --------------------------------------------------------------
            run_flag = threading.Event()
            run_flag.clear()
            bbs_control = threading.Thread(
                target=self._run_bbs_control,
                args=(bbs_parset, run_flag)
            )
            bbs_control.start()
            run_flag.wait()    # Wait for control to start before proceeding

            #      We run BBS KernelControl on each compute node by directly
            #                             invoking the node script using SSH
            #      Note that we use a job_server to send out job details and
            #           collect logging information, so we define a bunch of
            #    ComputeJobs. However, we need more control than the generic
            #     ComputeJob.dispatch method supplies, so we'll control them
            #                                          with our own threads.
            # --------------------------------------------------------------
            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobpool = {}
            bbs_kernels = []
            with job_server(self.logger, jobpool, self.error) as(jobhost,
                                                                   jobport):
                self.logger.debug("Job server at %s:%d" % (jobhost, jobport))
                for job_id, details in enumerate(self.bbs_map):
                    host, files = details
                    jobpool[job_id] = ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['kernel_exec'],
                            files,
                            self.inputs['db_key'],
                            self.inputs['db_name'],
                            self.inputs['db_user'],
                            self.inputs['db_host']
                        ]
                    )
                    bbs_kernels.append(
                        threading.Thread(
                            target=self._run_bbs_kernel,
                            args=(host, command, job_id, jobhost, str(jobport))
                        )
                    )
                self.logger.info("Starting %d threads" % len(bbs_kernels))
                for thread in bbs_kernels:
                    thread.start()
                self.logger.debug("Waiting for all kernels to complete")
                for thread in bbs_kernels:
                    thread.join()

            #         When GlobalControl finishes, our work here is done
            # ----------------------------------------------------------
            self.logger.info("Waiting for GlobalControl thread")
            bbs_control.join()
        finally:
            os.unlink(bbs_parset)

        if self.killswitch.isSet():
            #  If killswitch is set, then one of our processes failed so
            #                                   the whole run is invalid
            # ----------------------------------------------------------
            return 1

        self.outputs['mapfile'] = self.inputs['data_mapfile']
        return 0
Beispiel #31
0
 def __init__(self):
     super(new_bbs, self).__init__()
     self.bbs_map = list()
     self.parset = parameterset()
     self.killswitch = threading.Event()
Beispiel #32
0
    def __init__(self):
        super(control, self).__init__()

        self.parset = parameterset()
        self.momID = 0
        self.sasID = 0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Get input from parset, validate and cast to pipeline 'data types'
        #    Only perform work on existing files
        #    Created needed directories 
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_correlated_mapfile = os.path.join(
            mapfile_dir, "input_correlated.mapfile"
        )
        output_correlated_mapfile = os.path.join(
            mapfile_dir, "output_correlated.mapfile"
        )
        output_instrument_mapfile = os.path.join(
            mapfile_dir, "output_instrument.mapfile"
        )
        self.input_data['correlated'].save(input_correlated_mapfile)
        self.output_data['correlated'].save(output_correlated_mapfile)
        self.output_data['instrument'].save(output_instrument_mapfile)

        if len(self.input_data['correlated']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data['correlated']))

        # *********************************************************************
        # 2. Create database needed for performing work: 
        #    Vds, descibing data on the nodes
        #    sourcedb, For skymodel (A-team)
        #    parmdb for outputtting solutions
        # Produce a GVDS file describing the data on the compute nodes.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task(
                "vdsmaker", input_correlated_mapfile
            )['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", input_correlated_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'),
                suffix='.dppp.parmdb'
            )['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_correlated_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'),
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']

        # *********************************************************************
        # 3. Run NDPPP to demix the A-Team sources
        #    TODOW: Do flagging?
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp", input_correlated_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile
            )['mapfile']

        # *********************************************************************
        # 4. Run BBS with a model of the calibrator
        #    Create a parmdb for calibration solutions
        #    Create sourcedb with known calibration solutions
        #    Run bbs with both
        # Create an empty parmdb for BBS
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", dppp_mapfile,
                mapfile=os.path.join(mapfile_dir, 'bbs.parmdb.mapfile'),
                suffix='.bbs.parmdb'
            )['mapfile']

        # Create a sourcedb based on sourcedb's input argument "skymodel"
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_correlated_mapfile,
                skymodel=os.path.join(
                    self.config.get('DEFAULT', 'lofarroot'),
                    'share', 'pipeline', 'skymodels',
                    py_parset.getString('Calibration.SkyModel') +
                        '.skymodel'),
                mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'),
                suffix='.bbs.sourcedb')['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the calibrator source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer", dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=parmdb_mapfile,
                sky_mapfile=sourcedb_mapfile
            )['data_mapfile']

        # *********************************************************************
        # 5. Perform gain outlier correction on the found calibration solutions
        #    Swapping outliers in the gains with the median 
        # Export the calibration solutions using gainoutliercorrection and store
        # the results in the files specified in the instrument mapfile.
        export_instrument_model = py_parset.getBool(
            'Calibration.exportCalibrationParameters', False)

        with duration(self, "gainoutliercorrection"):
            self.run_task("gainoutliercorrection",
                      (parmdb_mapfile, output_instrument_mapfile),
                      sigma=1.0,
                      export_instrument_model=export_instrument_model) # TODO: Parset parameter

        # *********************************************************************
        # 6. Copy corrected MS's to their final output destination.
        with duration(self, "copier"):
            self.run_task("copier",
                mapfile_source=bbs_mapfile,
                mapfile_target=output_correlated_mapfile,
                mapfiles_dir=mapfile_dir,
                mapfile=output_correlated_mapfile
            )

        # *********************************************************************
        # 7. Create feedback file for further processing by the LOFAR framework
        #    a. get metadata of the measurement sets
        #    b. get metadata of the instrument models
        #    c. join the two files and write the final feedback file
        correlated_metadata = os.path.join(parset_dir, "correlated.metadata")
        instrument_metadata = os.path.join(parset_dir, "instrument.metadata")
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_correlated_mapfile,
                parset_file=correlated_metadata,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="Correlated")

        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_instrument_mapfile,
                parset_file=instrument_metadata,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="InstrumentModel")

        parset = parameterset(correlated_metadata)
        parset.adoptFile(instrument_metadata)
        parset.writeFile(self.parset_feedback_file)

        return 0
Beispiel #34
0
 def __init__(self):
     control.__init__(self)
     self.parset = parameterset()
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            'ObsSW.Observation.ObservationControl.PythonControl.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # Create some needed directories
        job_dir = self.config.get("layout", "job_directory")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(mapfile_dir)
        parset_dir = os.path.join(job_dir, "parsets")
        create_directory(parset_dir)

        # *********************************************************************
        # 2. Copy the instrument files to the correct node
        # The instrument files are currently located on the wrong nodes
        # Copy to correct nodes and assign the instrument table the now
        # correct data

        # Copy the instrument files to the corrent nodes: failures might happen
        # update both intrument and datamap to contain only successes!
        self._copy_instrument_files(mapfile_dir)

        # Write input- and output data map-files.
        data_mapfile = os.path.join(mapfile_dir, "data.mapfile")
        self.input_data['data'].save(data_mapfile)
        copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile")
        self.input_data['instrument'].save(copied_instrument_mapfile)
        self.logger.debug(
            "Wrote input data mapfile: %s" % data_mapfile
        )

        # Save copied files to a new mapfile
        corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile")
        self.output_data['data'].save(corrected_mapfile)
        self.logger.debug(
            "Wrote output corrected data mapfile: %s" % corrected_mapfile
        )

        # Validate number of copied files, abort on zero files copied
        if len(self.input_data['data']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data['data'])
        )

        # *********************************************************************
        # 3. Create database needed for performing work: 
        #    - GVDS, describing data on the compute nodes
        #    - SourceDB, for skymodel (A-team)
        #    - ParmDB for outputtting solutions
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds']

        # Read metadata (e.g., start- and end-time) from the GVDS file.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", data_mapfile,
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']

        # *********************************************************************
        # 4. Run NDPPP to demix the A-Team sources
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task("ndppp",
                data_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.mapfile')
            )['mapfile']

        # ********************************************************************
        # 5. Run bss using the instrument file from the target observation
        # Create an empty sourcedb for BBS
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", data_mapfile
            )['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the target source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task("bbs_reducer",
                dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=copied_instrument_mapfile,
                sky_mapfile=sourcedb_mapfile
            )['data_mapfile']

        # *********************************************************************
        # 6. Copy the MS's to their final output destination.
        # When the copier recipe has run, the map-file named in
        # corrected_mapfile will contain an updated map of output files.
        with duration(self, "copier"):
            self.run_task("copier",
                mapfile_source=bbs_mapfile,
                mapfile_target=corrected_mapfile,
                mapfiles_dir=mapfile_dir,
                mapfile=corrected_mapfile,
                allow_move=True
            )

        # *********************************************************************
        # 7. Create feedback for further processing by the LOFAR framework
        metadata_file = "%s_feedback_Correlated" % (self.parset_file,)
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", corrected_mapfile,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')
                ),
                product_type="Correlated",
                metadata_file=metadata_file)

        self.send_feedback_processing(parameterset({'feedback_version': feedback_version}))
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Beispiel #36
0
    def go(self):
        super(get_metadata, self).go()
        # ********************************************************************
        # 1. Parse and validate inputs
        args = self.inputs['args']
        product_type = self.inputs['product_type']
        global_prefix = self.inputs['parset_prefix']
        # Add a trailing dot (.) if not present in the prefix.
        if global_prefix and not global_prefix.endswith('.'):
            global_prefix += '.'

        if not product_type in self.valid_product_types:
            self.logger.warn(
                "Unknown product type: %s\n\tValid product types are: %s" %
                (product_type, ', '.join(self.valid_product_types))
        )

        # ********************************************************************
        # 2. Load mapfiles
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # ********************************************************************
        # 3. call node side of the recipe
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        data.iterator = DataMap.SkipIterator
        jobs = []
        for inp in data:
            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        self.inputs['product_type']
                    ]
                )
            )
        self._schedule_jobs(jobs)
        for job, inp in zip(jobs, data):
            if job.results['returncode'] != 0:
                inp.skip = True

        # ********************************************************************
        # 4. validate performance
        # 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        self.logger.debug("Updating data map file: %s" % args[0])
        data.save(args[0])

        # ********************************************************************
        # 5. Create the parset-file and return it to the caller
        parset = parameterset()
        prefix = "Output_%s_" % product_type  #Underscore is needed because
                             # Mom / LTA cannot differentiate input and output
        parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs)))

        prefix = global_prefix + prefix
        for idx, job in enumerate(jobs):
            self.logger.debug("job[%d].results = %s" % (idx, job.results))

            # the Master/node communication adds a monitor_stats entry,
            # this must be remove manually here 
            meta_data_parset = metadata.to_parset(job.results)
            try:
                meta_data_parset.remove("monitor_stats")
            except:
                pass

            parset.adoptCollection(meta_data_parset,
                                   '%s[%d].' % (prefix, idx))

        # Return result to caller
        parset.writeFile(self.inputs["metadata_file"])
        return 0
Beispiel #37
0
    def doAssignment(self, specification_tree):
        logger.info('doAssignment: specification_tree=%s' % (specification_tree))

        otdb_id = specification_tree['otdb_id']
        taskType = specification_tree.get('task_type', '').lower()
        status = specification_tree.get('state', '').lower()

        if status not in ['approved', 'prescheduled']: # cep2 accepts both, cep4 only prescheduled, see below
            logger.info('skipping specification for otdb_id=%s because status=%s', (otdb_id, status))

        #parse main parset...
        mainParset = parameterset(specification_tree['specification'])

        momId = mainParset.getInt('Observation.momID', -1)
        try:
            startTime = datetime.strptime(mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S')
            endTime = datetime.strptime(mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S')
        except ValueError:
            logger.warning('cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, ))

        # insert new task and specification in the radb
        # any existing specification and task with same otdb_id will be deleted automatically
        logger.info('doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' %
                    (momId, otdb_id, status, taskType, startTime, endTime))
        result = self.radbrpc.insertSpecificationAndTask(momId, otdb_id, status, taskType, startTime, endTime, str(mainParset))

        if not result['inserted']:
            logger.error('could not insert specification and task')
            return

        specificationId = result['specification_id']
        taskId = result['task_id']
        logger.info('doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId,taskId))

        # do not assign resources to task for other clusters than cep4
        if not self.checkClusterIsCEP4(mainParset):
            return

        if status != 'prescheduled':
            logger.info('skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status))
            return

        needed = self.getNeededResouces(specification_tree)
        logger.info('doAssignment: getNeededResouces=%s' % (needed,))

        if not str(otdb_id) in needed:
            logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed))
            return

        if not taskType in needed[str(otdb_id)]:
            logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)]))
            return

        # make sure the availability in the radb is up to date
        # TODO: this should be updated regularly
        try:
            self.updateAvailableResources('cep4')
        except Exception as e:
            logger.warning("Exception while updating available resources: %s" % str(e))

        # claim the resources for this task
        # during the claim inserts the claims are automatically validated
        # and if not enough resources are available, then they are put to conflict status
        # also, if any claim is in conflict state, then the task is put to conflict status as well
        main_needed = needed[str(otdb_id)]
        task = self.radbrpc.getTask(taskId)
        claimed, claim_ids = self.claimResources(main_needed, task)
        if claimed:
            conflictingClaims = self.radbrpc.getResourceClaims(task_ids=taskId, status='conflict')

            if conflictingClaims:
                logger.warning('doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' %
                               (len(conflictingClaims), conflictingClaims))
            else:
                logger.info('doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId,))
                self.radbrpc.updateTaskAndResourceClaims(taskId, task_status='scheduled', claim_status='allocated')

        self.processPredecessors(specification_tree)
Beispiel #38
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.

        Note: return 0 on success, 1 on failure.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        #
        # Note that PULP will read many of these fields directly. That makes
        # the following fields, and possibly others, part of the API towards
        # PULP:
        #
        # self.config
        # self.logger
        # self.input_data
        # self.output_data
        # self.parset_feedback_file
        # self.job_dir

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        self.job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(self.job_dir, "parsets")
        mapfile_dir = os.path.join(self.job_dir, "mapfiles")
        
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        # Coherent Stokes
        self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile")
        self.input_data['coherent'].save(self.input_CS_mapfile)
        # Incoherent Stokes
        self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile")
        self.input_data['incoherent'].save(self.input_IS_mapfile)
        # Output data
        self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data['data'].save(self.output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.')
        pulsar_parset = os.path.join(parset_dir, "Pulsar.parset")
        self.pulsar_parms.writeFile(pulsar_parset)
            
        self.logger.debug("Processing: %s" %
          ', '.join(str(f) for f in self.input_data))
        
        # Rebuilding sys.argv without the options given automatically by framework
        # --auto = automatic run from framework
        # -q = quiet mode, no user interaction
        sys.argv = ['pulp.py', '--auto', '-q']
      
        if (not self.coherentStokesEnabled):
          sys.argv.extend(["--noCS", "--noCV", "--noFE"])
          
        if (not self.incoherentStokesEnabled):
          sys.argv.append("--noIS")       

        # Tell PULP where to write the feedback to
        self.parset_feedback_file =  "%s_feedback" % (self.parset_file,)
       
        # Run the pulsar pipeline
        self.logger.debug("Starting pulp with: " + join(sys.argv))
        p = pulp.pulp(self) # TODO: MUCK self to capture the API

        # NOTE: PULP returns 0 on SUCCESS!!
        if p.go():
          self.logger.error("PULP did not succeed. Bailing out!")
          return 1

        # Read and forward the feedback
        try:
          metadata = parameterset(self.parset_feedback_file)
        except IOError, e:
          self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e))
          return 1
    def run(self, input_image, bdsm_parameter_run1_path,
            bdsm_parameter_run2x_path, catalog_output_path, image_output_path,
            sourcedb_target_path, environment, working_directory,
            create_sourcdb_exec):
        """
        :param input_image: image to look for sources in
        :param bdsm_parameter_run1_path: parset with bdsm parameters for the 
               first run
        :param bdsm_parameter_run2x_path: second ron bdsm parameters
        :param catalog_output_path: Path to full list of sources found
        :param image_output_path: Path to fits image with all sources 
               substracted
        :param sourcedb_target_path: Path to store the sourcedb created from 
            containing all the found sources
        :param environment: environment for runwithlog4cplus
        :param working_directory: Working dir
        :param create_sourcdb_exec: Path to create sourcedb executable 
        
        :rtype: self.outputs['source_db'] sourcedb_target_path
        
        """

        #******************************************************************
        # 0. Create the directories used in this recipe
        create_directory(working_directory)

        import lofar.bdsm as bdsm#@UnresolvedImport
        self.logger.info("Starting imager_source_finding")
        self.environment.update(environment)
        # default frequency is None (read from image), save for later cycles.
        # output of pybdsm forgets freq of source image
        frequency = None
        # Output of the for loop: n iterations and any source found
        n_itter_sourcefind = None
        sources_found = False
        max_sourcefind_itter = 5  # TODO: maximum itter is a magic value
        for idx in range(max_sourcefind_itter):
            # ******************************************************************
            # 1. Select correct input image
            # The first iteration uses the input image, second and later use the
            # output of the previous iteration. The 1+ iteration have a 
            # seperate parameter set. 
            if idx == 0:
                input_image_local = input_image # input_image_cropped
                image_output_path_local = image_output_path + "_0"
                bdsm_parameter_local = parameterset(bdsm_parameter_run1_path)
            else:
                input_image_local = image_output_path + "_{0}".format(
                                                                str(idx - 1))
                image_output_path_local = image_output_path + "_{0}".format(
                                                                    str(idx))
                bdsm_parameter_local = parameterset(bdsm_parameter_run2x_path)

            # *****************************************************************
            # 2. parse the parameters and convert to python if possible 
            # this is needed for pybdsm
            bdsm_parameters = {}
            for key in bdsm_parameter_local.keys():
                parameter_value = bdsm_parameter_local.getStringVector(key)[0]
                try:
                    parameter_value = eval(parameter_value)
                except:
                    pass  #do nothing
                bdsm_parameters[key] = parameter_value

            # pybdsm needs its filename here, to derive the log location
            bdsm_parameters["filename"] = input_image_local


            # *****************************************************************
            # 3. Start pybdsm
            self.logger.debug(
                "Starting sourcefinder bdsm on {0} using parameters:".format(
                                                        input_image_local))
            self.logger.debug(repr(bdsm_parameters))
            img = bdsm.process_image(bdsm_parameters, frequency = frequency)

            # Always export the catalog 
            img.write_catalog(
                outfile = catalog_output_path + "_{0}".format(str(idx)),
                catalog_type = 'gaul', clobber = True,
                format = "bbs", force_output = True)

            # If no more matching of sources with gausians is possible (nsrc==0)
            # break the loop
            if img.nsrc == 0:
                n_itter_sourcefind = idx
                break

            # We have at least found a single source!
            self.logger.debug("Number of source found: {0}".format(
                                                                img.nsrc))
            # *****************************************************************
            # 4. export the image 

            self.logger.debug("Wrote list of sources to file at: {0})".format(
                                                            catalog_output_path))
            img.export_image(outfile = image_output_path_local,
                                 img_type = 'gaus_resid', clobber = True,
                                 img_format = "fits")
            self.logger.debug("Wrote fits image with substracted sources"
                                  " at: {0})".format(image_output_path_local))

            # Save the frequency from image header of the original input file,
            # This information is not written by pybdsm to the exported image
            frequency = img.frequency


        # if not set the maximum number of itteration us performed
        if n_itter_sourcefind == None:
            n_itter_sourcefind = max_sourcefind_itter

        # ********************************************************************
        # 5. The produced catalogs now need to be combined into a single list
        # Call with the number of loops and the path to the files, only combine
        # if we found sources
        self.logger.debug(
                "Writing source list to file: {0}".format(catalog_output_path))
        self._combine_source_lists(n_itter_sourcefind, catalog_output_path)

        # *********************************************************************
        # 6. Convert sourcelist to sourcedb
        self._create_source_db(catalog_output_path, sourcedb_target_path,
            working_directory, create_sourcdb_exec, False)
        # Assign the outputs
        self.outputs["catalog_output_path"] = catalog_output_path
        self.outputs["source_db"] = sourcedb_target_path
        return 0
Beispiel #40
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Get input from parset, validate and cast to pipeline 'data types'
        #    Only perform work on existing files
        #    Created needed directories
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_correlated_mapfile = os.path.join(mapfile_dir,
                                                "input_correlated.mapfile")
        output_correlated_mapfile = os.path.join(mapfile_dir,
                                                 "output_correlated.mapfile")
        output_instrument_mapfile = os.path.join(mapfile_dir,
                                                 "output_instrument.mapfile")
        self.input_data['correlated'].save(input_correlated_mapfile)
        self.output_data['correlated'].save(output_correlated_mapfile)
        self.output_data['instrument'].save(output_instrument_mapfile)

        if len(self.input_data['correlated']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug(
            "Processing: %s" %
            ', '.join(str(f) for f in self.input_data['correlated']))

        # *********************************************************************
        # 2. Create database needed for performing work:
        #    Vds, descibing data on the nodes
        #    sourcedb, For skymodel (A-team)
        #    parmdb for outputtting solutions
        # Produce a GVDS file describing the data on the compute nodes.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker",
                                      input_correlated_mapfile)['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb",
                                           input_correlated_mapfile,
                                           mapfile=os.path.join(
                                               mapfile_dir,
                                               'dppp.parmdb.mapfile'),
                                           suffix='.dppp.parmdb')['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share',
                'pipeline',
                'skymodels',
                skymodel + '.skymodel')
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task("setupsourcedb",
                                             input_correlated_mapfile,
                                             mapfile=os.path.join(
                                                 mapfile_dir,
                                                 'dppp.sourcedb.mapfile'),
                                             skymodel=skymodel,
                                             suffix='.dppp.sourcedb',
                                             type='blob')['mapfile']

        # *********************************************************************
        # 3. Run NDPPP to demix the A-Team sources
        #    TODOW: Do flagging?
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp",
                input_correlated_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=py_parset.getStringVector(
                    'PreProcessing.demix_always'),
                demix_if_needed=py_parset.getStringVector(
                    'PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile)['mapfile']

        # *********************************************************************
        # 4. Run BBS with a model of the calibrator
        #    Create a parmdb for calibration solutions
        #    Create sourcedb with known calibration solutions
        #    Run bbs with both
        # Create an empty parmdb for BBS
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb",
                                           dppp_mapfile,
                                           mapfile=os.path.join(
                                               mapfile_dir,
                                               'bbs.parmdb.mapfile'),
                                           suffix='.bbs.parmdb')['mapfile']

        # Create a sourcedb based on sourcedb's input argument "skymodel"
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb",
                input_correlated_mapfile,
                skymodel=os.path.join(
                    self.config.get('DEFAULT', 'lofarroot'), 'share',
                    'pipeline', 'skymodels',
                    py_parset.getString('Calibration.SkyModel') + '.skymodel'),
                mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'),
                suffix='.bbs.sourcedb')['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the calibrator source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer",
                dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=parmdb_mapfile,
                sky_mapfile=sourcedb_mapfile)['data_mapfile']

        # *********************************************************************
        # 5. Perform gain outlier correction on the found calibration solutions
        #    Swapping outliers in the gains with the median
        # Export the calibration solutions using gainoutliercorrection and store
        # the results in the files specified in the instrument mapfile.
        export_instrument_model = py_parset.getBool(
            'Calibration.exportCalibrationParameters', False)

        with duration(self, "gainoutliercorrection"):
            self.run_task("gainoutliercorrection",
                          (parmdb_mapfile, output_instrument_mapfile),
                          sigma=1.0,
                          export_instrument_model=export_instrument_model
                          )  # TODO: Parset parameter

        # *********************************************************************
        # 6. Copy corrected MS's to their final output destination.
        with duration(self, "copier"):
            self.run_task("copier",
                          mapfile_source=bbs_mapfile,
                          mapfile_target=output_correlated_mapfile,
                          mapfiles_dir=mapfile_dir,
                          mapfile=output_correlated_mapfile)

        # *********************************************************************
        # 7. Create feedback file for further processing by the LOFAR framework
        #    a. get metadata of the measurement sets
        #    b. get metadata of the instrument models
        #    c. join the two files and write the final feedback file
        correlated_metadata = os.path.join(parset_dir, "correlated.metadata")
        instrument_metadata = os.path.join(parset_dir, "instrument.metadata")
        with duration(self, "get_metadata"):
            self.run_task(
                "get_metadata",
                output_correlated_mapfile,
                parset_file=correlated_metadata,
                parset_prefix=(self.parset.getString('prefix') +
                               self.parset.fullModuleName('DataProducts')),
                product_type="Correlated")

        with duration(self, "get_metadata"):
            self.run_task(
                "get_metadata",
                output_instrument_mapfile,
                parset_file=instrument_metadata,
                parset_prefix=(self.parset.getString('prefix') +
                               self.parset.fullModuleName('DataProducts')),
                product_type="InstrumentModel")

        parset = parameterset(correlated_metadata)
        parset.adoptFile(instrument_metadata)
        parset.writeFile(self.parset_feedback_file)

        return 0
Beispiel #41
0
#!/usr/bin/env python
from lofar.parameterset import parameterset

# Test task.feedback.dataproducts
from lofar.messagebus.protocols import TaskFeedbackDataproducts

parset = parameterset()
parset.add("foo", "bar")

msg = TaskFeedbackDataproducts("from", "forUser", "summary", 1, 2, parset)

# Test task.feedback.processing
from lofar.messagebus.protocols import TaskFeedbackProcessing

parset = parameterset()
parset.add("foo", "bar")

msg = TaskFeedbackProcessing("from", "forUser", "summary", 1, 2, parset)

# Test task.feedback.state
from lofar.messagebus.protocols import TaskFeedbackState

msg = TaskFeedbackState("from", "forUser", "summary", 1, 2, True)
Beispiel #42
0
 def __init__(self):
   super(control, self).__init__()
   
   self.parset = parameterset()
   self.momID = 0
   self.sasID = 0
Beispiel #43
0
    "/opt/pipeline/dependencies/lib/python2.5/site-packages:/opt/pipeline/framework/lib/python2.5/site-packages:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/lofar/lib/python2.5/site-packages:/opt/pythonlibs/lib/python/site-packages",
    "LD_LIBRARY_PATH":
    "/opt/pipeline/dependencies/lib:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/casacore/lib:/opt/LofIm/daily/lofar/lib:/opt/wcslib/lib/:/opt/hdf5/lib:/opt/LofIm/daily/casarest/lib:/data/sys/opt/lofar/external/log4cplus/lib",
    "PATH":
    "/opt/pipeline/dependencies/bin:/home/swinbank/sw/bin:/opt/pipeline/dependencies/bin:/usr/local/bin:/usr/bin:/usr/X11R6/bin:/bin:/usr/games:/opt/LofIm/daily/casarest/bin:/opt/LofIm/daily/casarest/bin",
}

# -----------------------------------------------------------------------------

# To ensure consistency in the configuration between this wrapper and the
# pipeline, we will set the start time here.
start_time = datetime.datetime.utcnow().replace(microsecond=0).isoformat()

# We should always be called with standard command line arguments:
# tree ID, parset, ... others?
input_parset = parameterset(sys.argv[1])
tree_id = sys.argv[2]  # check this!

# Extract runtime, working, results directories from input parset
runtime_directory = input_parset.getString(
    "ObsSW.Observation.ObservationControl.PythonControl.runtimeDirectory")
working_directory = input_parset.getString(
    "ObsSW.Observation.ObservationControl.PythonControl.workingDirectory")
results_directory = input_parset.getString(
    "ObsSW.Observation.ObservationControl.PythonControl.resultDirectory")

# Set up configuration for later processing stages
config = ConfigParser({
    "job_name": tree_id,
    "cwd": os.getcwd(),
    "start_time": start_time,
Beispiel #44
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(self.inputs['working_directory'],
                                              self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile)

        number_of_major_cycles = self.parset.getInt(
            "Imaging.number_of_major_cycles")

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                            "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile)  # copy the output map
        for item in source_list_map:
            item.file = ""  # set all to empty string
        source_list_map.save(source_list_map_path)

        for idx_loop in range(number_of_major_cycles):
            # *****************************************************************
            # (2) Create dbs and sky model
            parmdbs_path, sourcedb_map_path = self._create_dbs(
                concat_ms_map_path,
                timeslice_map_path,
                source_list_map_path=source_list_map_path,
                skip_create_dbs=False)

            # *****************************************************************
            # (3)  bbs_imager recipe.
            bbs_output = self._bbs(timeslice_map_path,
                                   parmdbs_path,
                                   sourcedb_map_path,
                                   skip=False)

            # TODO: Extra recipe: concat timeslices using pyrap.concatms
            # (see prepare)

            # *****************************************************************
            # (4) Get parameters awimager from the prepare_parset and inputs
            aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                                                            idx_loop,
                                                            sourcedb_map_path,
                                                            skip=False)

            # *****************************************************************
            # (5) Source finding
            sourcelist_map, found_sourcedb_path = self._source_finding(
                aw_image_mapfile, idx_loop, skip=False)
            # should the output be a sourcedb? instead of a sourcelist

        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map = self._finalize(
            aw_image_mapfile, processed_ms_dir, ms_per_image_map_path,
            sourcelist_map, minbaseline, maxbaseline, target_mapfile,
            output_image_mapfile, found_sourcedb_path)

        # *********************************************************************
        # (7) Get metadata
        # Create a parset containing the metadata for MAC/SAS
        metadata_file = "%s_feedback_SkyImage" % (self.parset_file, )
        self.run_task(
            "get_metadata",
            placed_data_image_map,
            parset_prefix=(full_parset.getString('prefix') +
                           full_parset.fullModuleName('DataProducts')),
            product_type="SkyImage",
            metadata_file=metadata_file)

        self.send_feedback_processing(parameterset())
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting longbaseline pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(self.inputs['working_directory'],
                                              self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)

        ## ***************************************************************
        #output_mapfile_path = os.path.join(self.mapfile_dir, "output.mapfile")
        #self.output_mapfile.save(output_mapfile_path)

        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_ms_mapfile = os.path.join(self.mapfile_dir, "output.mapfile")
        self.output_data.save(output_ms_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_ms_mapfile))

        # TODO: This is a backdoor option to manually add beamtables when these
        # are missing on the provided ms. There is NO use case for users of the
        # pipeline
        add_beam_tables = self.parset.getBool("Imaging.addBeamTables", False)

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._long_baseline(input_mapfile,
                         target_mapfile, add_beam_tables, output_ms_mapfile)

        # *********************************************************************
        # (7) Get metadata
        # create a parset with information that is available on the toplevel
        toplevel_meta_data = parameterset(
            {'feedback_version': feedback_version})

        # get some parameters from the imaging pipeline parset:
        subbandgroups_per_ms = self.parset.getInt(
            "LongBaseline.subbandgroups_per_ms")
        subbands_per_subbandgroup = self.parset.getInt(
            "LongBaseline.subbands_per_subbandgroup")

        toplevel_meta_data.replace("subbandsPerSubbandGroup",
                                   str(subbands_per_subbandgroup))
        toplevel_meta_data.replace("subbandGroupsPerMS",
                                   str(subbandgroups_per_ms))

        # Create a parset-file containing the metadata for MAC/SAS at nodes
        metadata_file = "%s_feedback_Correlated" % (self.parset_file, )
        self.run_task(
            "get_metadata",
            output_ms_mapfile,
            parset_prefix=(full_parset.getString('prefix') +
                           full_parset.fullModuleName('DataProducts')),
            product_type="Correlated",
            metadata_file=metadata_file)

        self.send_feedback_processing(toplevel_meta_data)
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.

        Note: return 0 on success, 1 on failure.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        #
        # Note that PULP will read many of these fields directly. That makes
        # the following fields, and possibly others, part of the API towards
        # PULP:
        #
        # self.config
        # self.logger
        # self.input_data
        # self.output_data
        # self.parset_feedback_file
        # self.job_dir

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        self.job_dir = self.config.get("layout", "job_directory")
        self.globalfs = self.config.has_option("remote", "globalfs") and self.config.getboolean("remote", "globalfs")
        parset_dir = os.path.join(self.job_dir, "parsets")
        mapfile_dir = os.path.join(self.job_dir, "mapfiles")
        
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        # Coherent Stokes
        self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile")
        self.input_data['coherent'].save(self.input_CS_mapfile)
        # Incoherent Stokes
        self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile")
        self.input_data['incoherent'].save(self.input_IS_mapfile)
        # Output data
        self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data['data'].save(self.output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.')
        pulsar_parset = os.path.join(parset_dir, "Pulsar.parset")

        if self.globalfs:
          # patch for Pulp in case of DOCKER
          for k in [x for x in self.pulsar_parms.keys() if x.endswith("_extra_opts")]:
            self.pulsar_parms.replace(k, self.pulsar_parms[k].getString().replace(" ","\\\\ "))

        self.pulsar_parms.writeFile(pulsar_parset)
            
        self.logger.debug("Processing: %s" %
          ', '.join(str(f) for f in self.input_data))
        
        # Rebuilding sys.argv without the options given automatically by framework
        # --auto = automatic run from framework
        # -q = quiet mode, no user interaction
        sys.argv = ['pulp.py', '--auto', '-q']

        if self.globalfs:
          project = self.parset.getString(self.parset.fullModuleName('Campaign') + '.name')
          sys.argv.extend(['--slurm', '--globalfs', '--docker', '--docker-container=lofar-pulp:%s' % os.environ.get("LOFAR_TAG"), '--raw=/data/projects/%s' % project])
        else:
          sys.argv.append("--auto")
      
        if (not self.coherentStokesEnabled):
          sys.argv.extend(["--noCS", "--noCV", "--noFE"])
          
        if (not self.incoherentStokesEnabled):
          sys.argv.append("--noIS")       

        # Tell PULP where to write the feedback to
        self.parset_feedback_file =  "%s_feedback" % (self.parset_file,)
       
        # Run the pulsar pipeline
        self.logger.debug("Starting pulp with: " + join(sys.argv))
        self.logger.debug("Calling pulp.pulp(self) with self = %s", pprint.pformat(vars(self)))
        p = pulp.pulp(self) # TODO: MUCK self to capture the API

        # NOTE: PULP returns 0 on SUCCESS!!
        if p.go():
          self.logger.error("PULP did not succeed. Bailing out!")
          return 1

        # Read and forward the feedback
        try:
          metadata = parameterset(self.parset_feedback_file)
        except IOError, e:
          self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e))
          return 1
Beispiel #47
0
 def get_tasks(self):
     """
     Convert the pipeline into DPU tasks. We assume that the pipeline can be parallelized by
     creating independent tasks for all its input files. Furthermore, we do take into account
     that there might be dependencies between different pipelines. In that case, task number i
     for input file i of the next pipeline will start when task number i for input file i of the 
     previous pipeline has finished.
     
     As an example, the following shows how a calibration pipeline followed by a target pipeline
     (which should wait for the calibration pipeline to finish) are parallelized:
     
                             Tasks
                     0      1     ...   N
     Pipeline 0:   SB000  SB001       SB00N  (all executed independently)
     (calibration)
     
     Pipeline 1:   SB000  SB001       SB00N  (horizontally independent, but vertically depending on the previous task)
     (target)
     
     The dependencies between the pipelines will be handled at a later stage.
     """
     
     # First, interpret the parset and get all the information about the
     # input and output files as was defined in the XML.
     self.read_parset()
     inputs_filenames_keys  = map(lambda input:  str( input['filenames']), self.inputs.values())
     inputs_locations_keys  = map(lambda input:  str( input['locations']), self.inputs.values())
     inputs_skip_keys       = map(lambda input:  str( input['skip']),      self.inputs.values())
     outputs_filenames_keys = map(lambda output: str(output['filenames']), self.outputs.values())
     outputs_locations_keys = map(lambda output: str(output['locations']), self.outputs.values())
     outputs_skip_keys      = map(lambda output: str(output['skip']),      self.outputs.values())
         
     input_map_list = []
     output_map_list = []
     # Combine the information about each input and output into tuples.
     # Note that the order of these keys are used when creating the individual jobs:
     # filenames, locations, skip values
     input_map_keys = zip(inputs_filenames_keys, inputs_locations_keys, inputs_skip_keys )
     output_map_keys = zip(outputs_filenames_keys, outputs_locations_keys, outputs_skip_keys )
     
     # Create a DataMap for each input and each output.
     for filename, location, skip in input_map_keys:
         input_map_list.append(
           DataMap([
                    tuple(os.path.join(location, filename).split(':')) + (skip,)
                              for filename, location, skip in zip(
                                  self.parset.getStringVector(filename),
                                  self.parset.getStringVector(location),
                                  self.parset.getBoolVector(skip))
                  ])
         )
         
     for filename, location, skip in output_map_keys:
         output_map_list.append(
           DataMap([
                    tuple(os.path.join(location, filename).split(':')) + (skip,)
                              for filename, location, skip in zip(
                                  self.parset.getStringVector(filename),
                                  self.parset.getStringVector(location),
                                  self.parset.getBoolVector(skip))
                  ])
         )
     
     # Align the data maps in order to validate them and set the skip values
     # in the same way for each input and output.
     align_data_maps(*(input_map_list+output_map_list))
     
     # Finally, convert everything into individual tasks.
     pipeline_jobs = []
     job_data_product_keys = input_map_keys + output_map_keys
     for idx, job_data_products in enumerate(zip(*(input_map_list+ output_map_list))):
         job = cep_pipeline_job()
         # Clone the parset by creating another instance.
         job_parset = parameterset()
         job_parset.adoptArgv(str(self.parset_as_str).split('\n'))
         job_should_be_skipped = False
         
         # Now replace all input and output information by the (single) data
         # element that should be processed by this task.
         for [job_data_product, job_data_product_key] in zip(job_data_products, job_data_product_keys):
             job_should_be_skipped = job_data_product.skip
             job.host = job_data_product.host
             # We assume that the job will be launched on the node where the
             # data is stored.
             host = 'localhost'
             filename = os.path.basename(job_data_product.file)
             file_location = os.path.dirname(job_data_product.file)
             skip = job_data_product.skip
             # Remember that the key order is determined in a previous zip.
             job_parset.replace(job_data_product_key[0], str([filename]))
             job_parset.replace(job_data_product_key[1], str([host + ":" + file_location]))
             job_parset.replace(job_data_product_key[2], str([skip]))
         
         if job_should_be_skipped :
             # If skip was True for either one of the input/output elements,
             # we should skip this job but increase the job index.
             continue
         
         job.parset_as_dict = job_parset.dict()
         job.command = self.get_command()
         job.name = self.id + "_" + str(idx)
         pipeline_jobs.append(job)
     
     return pipeline_jobs
 def __init__(self):
     control.__init__(self)
     self.parset = parameterset()
     self.input_data = {}
     self.output_data = {}
     self.parset_feedback_file = None
# Set up environment for pipeline run
pipeline_environment = {
    "PYTHONPATH": "/opt/pipeline/dependencies/lib/python2.5/site-packages:/opt/pipeline/framework/lib/python2.5/site-packages:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/lofar/lib/python2.5/site-packages:/opt/pythonlibs/lib/python/site-packages",
    "LD_LIBRARY_PATH": "/opt/pipeline/dependencies/lib:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/casacore/lib:/opt/LofIm/daily/lofar/lib:/opt/wcslib/lib/:/opt/hdf5/lib:/opt/LofIm/daily/casarest/lib:/data/sys/opt/lofar/external/log4cplus/lib", 
    "PATH": "/opt/pipeline/dependencies/bin:/home/swinbank/sw/bin:/opt/pipeline/dependencies/bin:/usr/local/bin:/usr/bin:/usr/X11R6/bin:/bin:/usr/games:/opt/LofIm/daily/casarest/bin:/opt/LofIm/daily/casarest/bin",
}

# -----------------------------------------------------------------------------

# To ensure consistency in the configuration between this wrapper and the
# pipeline, we will set the start time here.
start_time = datetime.datetime.utcnow().replace(microsecond=0).isoformat()

# We should always be called with standard command line arguments:
# tree ID, parset, ... others?
input_parset = parameterset(sys.argv[1])
tree_id      = sys.argv[2] # check this!

# Extract runtime, working, results directories from input parset
runtime_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.runtimeDirectory")
working_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.workingDirectory")
results_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.resultDirectory")

# Set up configuration for later processing stages
config = ConfigParser({
    "job_name": tree_id,
    "cwd": os.getcwd(),
    "start_time": start_time,
})
config.read(config_file)
config.set('DEFAULT', 'runtime_directory', runtime_directory)
Beispiel #50
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            'ObsSW.Observation.ObservationControl.PythonControl.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # Create some needed directories
        job_dir = self.config.get("layout", "job_directory")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(mapfile_dir)
        parset_dir = os.path.join(job_dir, "parsets")
        create_directory(parset_dir)

        # *********************************************************************
        # 2. Copy the instrument files to the correct node
        # The instrument files are currently located on the wrong nodes
        # Copy to correct nodes and assign the instrument table the now
        # correct data

        # Copy the instrument files to the corrent nodes: failures might happen
        # update both intrument and datamap to contain only successes!
        self._copy_instrument_files(mapfile_dir)

        # Write input- and output data map-files.
        data_mapfile = os.path.join(mapfile_dir, "data.mapfile")
        self.input_data['data'].save(data_mapfile)
        copied_instrument_mapfile = os.path.join(mapfile_dir,
                                                 "copied_instrument.mapfile")
        self.input_data['instrument'].save(copied_instrument_mapfile)
        self.logger.debug("Wrote input data mapfile: %s" % data_mapfile)

        # Save copied files to a new mapfile
        corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile")
        self.output_data['data'].save(corrected_mapfile)
        self.logger.debug("Wrote output corrected data mapfile: %s" %
                          corrected_mapfile)

        # Validate number of copied files, abort on zero files copied
        if len(self.input_data['data']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
                          ', '.join(str(f) for f in self.input_data['data']))

        # *********************************************************************
        # 3. Create database needed for performing work:
        #    - GVDS, describing data on the compute nodes
        #    - SourceDB, for skymodel (A-team)
        #    - ParmDB for outputtting solutions
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds']

        # Read metadata (e.g., start- and end-time) from the GVDS file.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb",
                                           data_mapfile)['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share',
                'pipeline',
                'skymodels',
                skymodel + '.skymodel')
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task("setupsourcedb",
                                             data_mapfile,
                                             skymodel=skymodel,
                                             suffix='.dppp.sourcedb',
                                             type='blob')['mapfile']

        # *********************************************************************
        # 4. Run NDPPP to demix the A-Team sources
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp",
                data_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=py_parset.getStringVector(
                    'PreProcessing.demix_always'),
                demix_if_needed=py_parset.getStringVector(
                    'PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.mapfile'))['mapfile']

        # ********************************************************************
        # 5. Run bss using the instrument file from the target observation
        # Create an empty sourcedb for BBS
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task("setupsourcedb",
                                             data_mapfile)['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the target source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer",
                dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=copied_instrument_mapfile,
                sky_mapfile=sourcedb_mapfile)['data_mapfile']

        # *********************************************************************
        # 6. Copy the MS's to their final output destination.
        # When the copier recipe has run, the map-file named in
        # corrected_mapfile will contain an updated map of output files.
        with duration(self, "copier"):
            self.run_task("copier",
                          mapfile_source=bbs_mapfile,
                          mapfile_target=corrected_mapfile,
                          mapfiles_dir=mapfile_dir,
                          mapfile=corrected_mapfile)

        # *********************************************************************
        # 7. Create feedback for further processing by the LOFAR framework
        metadata_file = "%s_feedback_Correlated" % (self.parset_file, )
        with duration(self, "get_metadata"):
            self.run_task(
                "get_metadata",
                corrected_mapfile,
                parset_prefix=(self.parset.getString('prefix') +
                               self.parset.fullModuleName('DataProducts')),
                product_type="Correlated",
                metadata_file=metadata_file)

        self.send_feedback_processing(parameterset())
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile")
        self.input_data.save(input_data_mapfile)
        output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data.save(output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data))

        # *********************************************************************
        # 2. Create VDS-file and databases. The latter are needed when doing
        #    demixing within DPPP.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create a parameter database that will be used by the NDPPP demixing
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", input_data_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'),
                suffix='.dppp.parmdb'
            )['mapfile']
                
        # Create a source database from a user-supplied sky model
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_data_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'),
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']


        # *********************************************************************
        # 3. Average and flag data, using NDPPP.

        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            output_data_mapfile = self.run_task("ndppp",
                (input_data_mapfile, output_data_mapfile),
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile
            )['mapfile']

        # *********************************************************************
        # 6. Create feedback file for further processing by the LOFAR framework
        # Create a parset containing the metadata
        metadata_file = "%s_feedback_Correlated" % (self.parset_file,)
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_data_mapfile,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="Correlated",
                metadata_file=metadata_file)

        self.send_feedback_processing(parameterset({'feedback_version': feedback_version}))
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Beispiel #52
0
    def run(self, executable, infiles, db_key, db_name, db_user, db_host):
        """
        Depricated functionality
        """
        # executable : path to KernelControl executable
        # infiles    : tuple of MS, instrument- and sky-model files
        # db_*       : database connection parameters
        # ----------------------------------------------------------------------
        self.logger.debug("executable = %s" % executable)
        self.logger.debug("infiles = %s" % str(infiles))
        self.logger.debug("db_key = %s" % db_key)
        self.logger.debug("db_name = %s" % db_name)
        self.logger.debug("db_user = %s" % db_user)
        self.logger.debug("db_host = %s" % db_host)

        (ms, parmdb_instrument, parmdb_sky) = infiles

        with log_time(self.logger):
            if os.path.exists(ms):
                self.logger.info("Processing %s" % (ms))
            else:
                self.logger.error("Dataset %s does not exist" % (ms))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up BBSKernel parset")
            # Getting the filesystem must be done differently, using the
            # DataProduct keys in the parset provided by the scheduler.
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms))
            fd, parset_file = mkstemp()
            kernel_parset = parameterset()
            for key, value in {
                    "ObservationPart.Filesystem": filesystem,
                    "ObservationPart.Path": ms,
                    "BBDB.Key": db_key,
                    "BBDB.Name": db_name,
                    "BBDB.User": db_user,
                    "BBDB.Host": db_host,
                    "ParmDB.Sky": parmdb_sky,
                    "ParmDB.Instrument": parmdb_instrument
            }.items():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_file)
            os.close(fd)
            self.logger.debug("BBSKernel parset written to %s" % parset_file)

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" %
                                  (os.path.basename(__file__), ))
            try:
                self.logger.info("******** {0}".format(
                    open(parset_file).read()))
                cmd = [executable, parset_file, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                        working_dir,
                        self.logger.name + "." + os.path.basename(ms),
                        os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(cmd,
                                               stdout=PIPE,
                                               stderr=PIPE,
                                               cwd=working_dir)
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(bbs_kernel_process.returncode,
                                             executable)
            except CalledProcessError as e:
                self.logger.error(str(e))
                return 1
            finally:
                os.unlink(parset_file)
                shutil.rmtree(working_dir)
            return 0
Beispiel #53
0
    def run(self, executable, infiles, db_key, db_name, db_user, db_host):
        """
        Depricated functionality
        """
        # executable : path to KernelControl executable
        # infiles    : tuple of MS, instrument- and sky-model files
        # db_*       : database connection parameters
        # ----------------------------------------------------------------------
        self.logger.debug("executable = %s" % executable)
        self.logger.debug("infiles = %s" % str(infiles))
        self.logger.debug("db_key = %s" % db_key)
        self.logger.debug("db_name = %s" % db_name)
        self.logger.debug("db_user = %s" % db_user)
        self.logger.debug("db_host = %s" % db_host)

        (ms, parmdb_instrument, parmdb_sky) = infiles

        with log_time(self.logger):
            if os.path.exists(ms):
                self.logger.info("Processing %s" % (ms))
            else:
                self.logger.error("Dataset %s does not exist" % (ms))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up BBSKernel parset")
            # Getting the filesystem must be done differently, using the
            # DataProduct keys in the parset provided by the scheduler.
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms))
            fd, parset_file = mkstemp()
            kernel_parset = parameterset()
            for key, value in {
                "ObservationPart.Filesystem": filesystem,
                "ObservationPart.Path": ms,
                "BBDB.Key": db_key,
                "BBDB.Name": db_name,
                "BBDB.User": db_user,
                "BBDB.Host": db_host,
                "ParmDB.Sky": parmdb_sky,
                "ParmDB.Instrument": parmdb_instrument
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_file)
            os.close(fd)
            self.logger.debug("BBSKernel parset written to %s" % parset_file)

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            try:
                self.logger.info("******** {0}".format(open(parset_file).read()))
                cmd = [executable, parset_file, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                    working_dir,
                    self.logger.name + "." + os.path.basename(ms),
                    os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(
                        cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(
                        bbs_kernel_process.returncode, executable
                    )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
Beispiel #54
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(
            self.inputs['working_directory'], self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug(
            "Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # TODO: This is a backdoor option to manually add beamtables when these
        # are missing on the provided ms. There is NO use case for users of the
        # pipeline
        add_beam_tables = self.parset.getBool(
                                    "Imaging.addBeamTables", False)

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile, add_beam_tables)

        number_of_major_cycles = self.parset.getInt(
                                    "Imaging.number_of_major_cycles")

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                        "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile) # copy the output map
        for item in source_list_map:
            item.file = ""             # set all to empty string
        source_list_map.save(source_list_map_path)

        for idx_loop in range(number_of_major_cycles):
            # *****************************************************************
            # (2) Create dbs and sky model
            parmdbs_path, sourcedb_map_path = self._create_dbs(
                        concat_ms_map_path, timeslice_map_path,
                        source_list_map_path = source_list_map_path,
                        skip_create_dbs = False)

            # *****************************************************************
            # (3)  bbs_imager recipe.
            bbs_output = self._bbs(timeslice_map_path, parmdbs_path,
                        sourcedb_map_path, skip = False)

            # TODO: Extra recipe: concat timeslices using pyrap.concatms
            # (see prepare)

            # *****************************************************************
            # (4) Get parameters awimager from the prepare_parset and inputs
            aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                        idx_loop, sourcedb_map_path,
                        skip = False)

            # *****************************************************************
            # (5) Source finding
            sourcelist_map, found_sourcedb_path = self._source_finding(
                    aw_image_mapfile, idx_loop, skip = False)
            # should the output be a sourcedb? instead of a sourcelist

        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map = self._finalize(aw_image_mapfile,
            processed_ms_dir, ms_per_image_map_path, sourcelist_map,
            minbaseline, maxbaseline, target_mapfile, output_image_mapfile,
            found_sourcedb_path)

        # *********************************************************************
        # (7) Get metadata
        # create a parset with information that is available on the toplevel
        toplevel_meta_data = parameterset()
        toplevel_meta_data.replace("numberOfMajorCycles", 
                                           str(number_of_major_cycles))

        # Create a parset containing the metadata for MAC/SAS at nodes
        metadata_file = "%s_feedback_SkyImage" % (self.parset_file,)
        self.run_task("get_metadata", placed_data_image_map,
            parset_prefix = (
                full_parset.getString('prefix') +
                full_parset.fullModuleName('DataProducts')
            ),
            product_type = "SkyImage",
            metadata_file = metadata_file)

        self.send_feedback_processing(toplevel_meta_data)
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Beispiel #55
0
 def __init__(self):
     control.__init__(self)
     self.parset = parameterset()
Beispiel #56
0
 def __init__(self):
     control.__init__(self)
     self.parset = parameterset()
     self.input_data = {}
     self.output_data = {}
     self.parset_feedback_file = None
Beispiel #57
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_correlated_mapfile = os.path.join(
            mapfile_dir, "input_correlated.mapfile"
        )
        output_correlated_mapfile = os.path.join(
            mapfile_dir, "output_correlated.mapfile"
        )
        output_instrument_mapfile = os.path.join(
            mapfile_dir, "output_instrument.mapfile"
        )
        self.input_data['correlated'].save(input_correlated_mapfile)
        self.output_data['correlated'].save(output_correlated_mapfile)
        self.output_data['instrument'].save(output_instrument_mapfile)

        if len(self.input_data['correlated']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data['correlated']))

        # *********************************************************************
        # 2. Create VDS-file and databases. The latter are needed when doing
        #    demixing within DPPP.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task(
                "vdsmaker", input_correlated_mapfile
            )['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create a parameter database that will be used by the NDPPP demixing
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", input_correlated_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'),
                suffix='.dppp.parmdb'
            )['mapfile']
                
        # Create a source database from a user-supplied sky model
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_correlated_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'),
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']

        # *********************************************************************
        # 3. Average and flag data, using NDPPP.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp", input_correlated_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile
            )['mapfile']

        # *********************************************************************
        # 4. Create a sourcedb from the user-supplied sky model, 
        #    and an empty parmdb.
        skymodel = py_parset.getString('Calibration.SkyModel')

        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", dppp_mapfile,
                skymodel=skymodel,
                suffix='.bbs.sourcedb'
            )['mapfile']

        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", dppp_mapfile,
                suffix='.bbs.parmdb'
            )['mapfile']

        # *********************************************************************
        # 5. Run BBS to calibrate the data.

        # Create a parameter subset for BBS
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer", dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=parmdb_mapfile,
                sky_mapfile=sourcedb_mapfile
            )['data_mapfile']

        # *********************************************************************
        # 6. Copy output products to their final destination.
        #    a. copy the measurement sets
        #    b. copy the calculated instrument models
        #  When the copier recipe has run, the map-files named in
        #  output_correlated_mapfile and output_instrument_mapfile will
        #  contain an updated map of output files.
        with duration(self, "copier"):
            self.run_task("copier",
                mapfile_source=bbs_mapfile,
                mapfile_target=output_correlated_mapfile,
                mapfiles_dir=mapfile_dir,
                mapfile=output_correlated_mapfile
            )

        with duration(self, "copier"):
            self.run_task("copier",
                mapfile_source=parmdb_mapfile,
                mapfile_target=output_instrument_mapfile,
                mapfiles_dir=mapfile_dir,
                mapfile=output_instrument_mapfile
            )

        # *********************************************************************
        # 7. Create feedback for further processing by the LOFAR framework
        #    a. get metadata of the measurement sets
        #    b. get metadata of the instrument models
        #    c. join the two and write the final feedback
        correlated_metadata_file = "%s_feedback_Correlated" % (self.parset_file,)
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_correlated_mapfile,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="Correlated",
                metadata_file=correlated_metadata_file)

        instrument_metadata_file = "%s_feedback_InstrumentModel" % (self.parset_file,)
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_instrument_mapfile,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="InstrumentModel",
                metadata_file=instrument_metadata_file)

        self.send_feedback_processing(parameterset())
        self.send_feedback_dataproducts(parameterset(correlated_metadata_file))
        self.send_feedback_dataproducts(parameterset(instrument_metadata_file))

        return 0