Example #1
0
    def _run_rficonsole(self, rficonsole_executable, time_slice_dir,
                        time_slices):
        """
        _run_rficonsole runs the rficonsole application on the supplied
        timeslices in time_slices.

        """

        # loop all measurement sets
        rfi_temp_dir = os.path.join(time_slice_dir, "rfi_temp_dir")
        create_directory(rfi_temp_dir)

        try:
            rfi_console_proc_group = SubProcessGroup(self.logger)
            for time_slice in time_slices:
                # Each rfi console needs own working space for temp files
                temp_slice_path = os.path.join(rfi_temp_dir,
                                               os.path.basename(time_slice))
                create_directory(temp_slice_path)

                # construct copy command
                self.logger.info(time_slice)
                command = [rficonsole_executable, "-indirect-read", time_slice]
                self.logger.info("executing rficonsole command: {0}".format(
                    " ".join(command)))

                # Add the command to the process group
                rfi_console_proc_group.run(command, cwd=temp_slice_path)

            # wait for all to finish
            if rfi_console_proc_group.wait_for_finish() != None:
                raise Exception("an rfi_console_proc_group run failed!")

        finally:
            shutil.rmtree(rfi_temp_dir)
Example #2
0
    def _write_datamap_to_file(self, datamap, mapfile_name, message = ""):
        """
        Write the suplied the suplied map to the mapfile.
        directory in the jobs dir with the filename suplied in mapfile_name.
        Return the full path to the created file.
        If suplied data is None then the file is touched if not existing, but
        existing files are kept as is
        """

        mapfile_dir = os.path.join(
            self.config.get("layout", "job_directory"), "mapfiles")
        # create the mapfile_dir if it does not exist
        create_directory(mapfile_dir)

        # write the content to a new parset file
        mapfile_path = os.path.join(mapfile_dir,
                         "{0}.map".format(mapfile_name))

        # display a debug log entrie with path and message
        if datamap != None:
            datamap.save(mapfile_path)

            self.logger.debug(
            "Wrote mapfile <{0}>: {1}".format(mapfile_path, message))
        else:
            if not os.path.exists(mapfile_path):
                DataMap().save(mapfile_path)

                self.logger.debug(
                    "Touched mapfile <{0}>: {1}".format(mapfile_path, message))

        return mapfile_path
Example #3
0
    def _write_datamap_to_file(self, datamap, mapfile_name, message=""):
        """
        Write the suplied the suplied map to the mapfile.
        directory in the jobs dir with the filename suplied in mapfile_name.
        Return the full path to the created file.
        If suplied data is None then the file is touched if not existing, but
        existing files are kept as is
        """

        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"),
                                   "mapfiles")
        # create the mapfile_dir if it does not exist
        create_directory(mapfile_dir)

        # write the content to a new parset file
        mapfile_path = os.path.join(mapfile_dir,
                                    "{0}.map".format(mapfile_name))

        # display a debug log entrie with path and message
        if datamap != None:
            datamap.save(mapfile_path)

            self.logger.debug("Wrote mapfile <{0}>: {1}".format(
                mapfile_path, message))
        else:
            if not os.path.exists(mapfile_path):
                DataMap().save(mapfile_path)

                self.logger.debug("Touched mapfile <{0}>: {1}".format(
                    mapfile_path, message))

        return mapfile_path
Example #4
0
    def _prepare_steps(self, **kwargs):
        """
        Prepare for running the NDPPP program. This means, for one thing,
        patching the parsetfile with the correct input/output MS names,
        start/end times if availabe, etc. If a demixing step must be performed,
        some extra work needs to be done.
        
        Returns: patch dictionary that must be applied to the parset.
        """
        self.logger.debug("Time interval: %s %s" %
                          (kwargs['start_time'], kwargs['end_time']))
        # Create output directory for output MS.
        create_directory(os.path.dirname(kwargs['tmpfile']))

        patch_dictionary = {
            'msin': kwargs['infile'],
            'msout': kwargs['tmpfile'],
            'uselogger': 'True'
        }
        if kwargs['start_time']:
            patch_dictionary['msin.starttime'] = kwargs['start_time']
        if kwargs['end_time']:
            patch_dictionary['msin.endtime'] = kwargs['end_time']

        # If we need to do a demixing step, we have to do some extra work.
        # We have to read the parsetfile to check this.
        parset = parameterset(kwargs['parsetfile'])
        for step in parset.getStringVector('steps'):
            if parset.getString(step + '.type', '').startswith('demix'):
                patch_dictionary.update(
                    self._prepare_demix_step(step, **kwargs))

        # Return the patch dictionary that must be applied to the parset.
        return patch_dictionary
    def run(self, infile, outfile, executable, environment, sigma,
            use_parmexportcal):
        self.environment.update(environment)
        if os.path.exists(infile):
            self.logger.info("Processing {0}".format(infile))
        else:
            self.logger.error(
                "Instrument model file %s does not exist" % infile
                )
            return 1

        # Create output directory (if it doesn't already exist)
        create_directory(os.path.dirname(outfile))

        # Remove the target outfile if there: parexportcall fail otherwise
        if os.path.exists(outfile):
            shutil.rmtree(outfile)

        # ********************************************************************
        # 1. Select correction method
        if not use_parmexportcal:
            # ****************************************************************
            # 3. use gainoutliercorrect from Swinbank
            self.logger.info(
                "Using the gainoutlier correction based on editparmdb")
            self._filter_stations_parmdb(infile, outfile, sigma)
            return 0

        # else:
        if not os.access(executable, os.X_OK):
            self.logger.error(
                "Could not find parmexport call executable at: {0}".format(
                                    executable))
            self.logger.error("bailing out!")
            return 1

        # ********************************************************************
        # 2. Call parmexportcal for gain correction
        self.logger.info(
            "Using the gainoutlier correction based on parmexportcal")
        try:
            temp_dir = tempfile.mkdtemp()
            with CatchLog4CPlus(
                temp_dir,
                self.logger.name + '.' + os.path.basename(infile),
                os.path.basename(executable)
            ) as logger:
                cmd = [executable, '-in', infile, '-out', outfile]
                self.logger.debug(
                    "Parmexportcal call: {0} ".format(" ".join(cmd)))
                catch_segfaults(
                    cmd,
                    temp_dir,
                    self.environment,
                    logger
                )
        except Exception, excp:
            self.logger.error(str(excp))
            return 1
Example #6
0
    def run(self, infile, outfile, executable, environment, sigma,
            use_parmexportcal):
        self.environment.update(environment)
        if os.path.exists(infile):
            self.logger.info("Processing {0}".format(infile))
        else:
            self.logger.error(
                "Instrument model file %s does not exist" % infile
                )
            return 1

        # Create output directory (if it doesn't already exist)
        create_directory(os.path.dirname(outfile))

        # Remove the target outfile if there: parexportcall fail otherwise
        if os.path.exists(outfile):
            shutil.rmtree(outfile)

        # ********************************************************************
        # 1. Select correction method
        if not use_parmexportcal:
            # ****************************************************************
            # 3. use gainoutliercorrect from Swinbank
            self.logger.info(
                "Using the gainoutlier correction based on editparmdb")
            self._filter_stations_parmdb(infile, outfile, sigma)
            return 0

        # else:
        if not os.access(executable, os.X_OK):
            self.logger.error(
                "Could not find parmexport call executable at: {0}".format(
                                    executable))
            self.logger.error("bailing out!")
            return 1

        # ********************************************************************
        # 2. Call parmexportcal for gain correction
        self.logger.info(
            "Using the gainoutlier correction based on parmexportcal")
        try:
            temp_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            with CatchLog4CPlus(
                temp_dir,
                self.logger.name + '.' + os.path.basename(infile),
                os.path.basename(executable)
            ) as logger:
                cmd = [executable, '-in', infile, '-out', outfile]
                self.logger.debug(
                    "Parmexportcal call: {0} ".format(" ".join(cmd)))
                catch_segfaults(
                    cmd,
                    temp_dir,
                    self.environment,
                    logger
                )
        except Exception, excp:
            self.logger.error(str(excp))
            return 1
    def test_filter_stations_parmdb(self):
        file_path_in = os.path.join(self.tempDir, "input")
        create_directory(file_path_in)

        file_path_out = os.path.join(self.tempDir, "fullName")

        GainOutlierDetection = GainOutlierCorrectionWrapper()

        # Call the major  function
        # No errors should be thrown...
        parmdb = GainOutlierDetection._filter_stations_parmdb(file_path_in,
                                    file_path_out, 2)
Example #8
0
    def test_filter_stations_parmdb(self):
        file_path_in = os.path.join(self.tempDir, "input")
        create_directory(file_path_in)

        file_path_out = os.path.join(self.tempDir, "fullName")

        GainOutlierDetection = GainOutlierCorrectionWrapper()

        # Call the major  function
        # No errors should be thrown...
        parmdb = GainOutlierDetection._filter_stations_parmdb(
            file_path_in, file_path_out, 2)
Example #9
0
 def _copy_single_file_using_rsync(self, source_node, source_path,
                                   target_path):
     # assure that target dir exists (rsync creates it but..
     # an error in the python code will throw a nicer error
     message = "No write acces to target path: {0}".format(
         os.path.dirname(target_path))
     # If not existing try to create dir catch no permission
     try:
         create_directory(os.path.dirname(target_path))
     except OSError, e:
         if e.errno == 13:  # No permision
             self.logger.error(message)
             raise IOError(message)
         else:
             raise e
Example #10
0
 def _copy_single_file_using_rsync(self, source_node, source_path,
                                   target_path):
     # assure that target dir exists (rsync creates it but..
     # an error in the python code will throw a nicer error
     message = "No write acces to target path: {0}".format(
                                 os.path.dirname(target_path))
     # If not existing try to create dir catch no permission
     try:
         create_directory(os.path.dirname(target_path))
     except OSError, e:
         if e.errno == 13:  # No permision
             self.logger.error(message)
             raise IOError(message)
         else:
             raise e
    def run(self, concatenated_measurement_set, sourcedb_target_path,
            monet_db_hostname, monet_db_port, monet_db_name, monet_db_user,
            monet_db_password, assoc_theta, parmdb_executable, slice_paths,
            parmdb_suffix, environment, working_directory, makesourcedb_path,
            source_list_path_extern, major_cycle):

        self.logger.info("Starting imager_create_dbs Node")
        self.environment.update(environment)

        #******************************************************************
        # 0. Create the directories used in this recipe
        create_directory(working_directory)

        #*******************************************************************
        # 1. get a sourcelist: from gsm or from file
        source_list, append = self._create_source_list(
            source_list_path_extern,sourcedb_target_path, 
            concatenated_measurement_set,monet_db_hostname, 
            monet_db_port, monet_db_name, monet_db_user,
            monet_db_password, assoc_theta)       

        #*******************************************************************
        # 2convert it to a sourcedb (casa table)
        if self._create_source_db(source_list, sourcedb_target_path,
                                  working_directory, makesourcedb_path,
                                  append) == None:
            self.logger.error("failed creating sourcedb")
            return 1

        #*******************************************************************
        # 3. Create a empty parmdb for each timeslice\
        parmdbs = self._create_parmdb_for_timeslices(parmdb_executable,
                                    slice_paths, parmdb_suffix)
        if parmdbs == None:
            self.logger.error("failed creating paramdb for slices")
            return 1

        # *******************************************************************
        # Add the create databases to the measurments set,
        self._add_dbs_to_ms(concatenated_measurement_set, sourcedb_target_path,
                            parmdbs, major_cycle)


        #*******************************************************************
        # 5. Assign the outputs
        self.outputs["sourcedb"] = sourcedb_target_path
        self.outputs["parmdbs"] = parmdbs
        return 0
Example #12
0
def copy(path_from, dir_to, clobber, use_symlinks=False):
    """
    Copy a file or directory

    Parameters
    ----------
    path_from : str
        Input file or directory
    dir_to : str
        Output directory
    clobber : bool
        Clobber existing file or directory?
    use_symlinks : bool, optional
        Use symlinks instead of copying files?

    """
    if not os.path.exists(path_from):
        log.warning('{} not found. Please check the '
                    'working directory'.format(path_from))
        return

    path_to = os.path.join(dir_to, os.path.basename(path_from))
    if os.path.exists(path_to):
        if not clobber:
            log.warning(' Destination "{}" exists and clobber = False. '
                        'Skipping it...'.format(path_to))
            return
    else:
        create_directory(dir_to)

    if use_symlinks:
        if os.path.exists(path_to):
            p = subprocess.Popen('rm -rf {0}'.format(path_to),
                                 shell=True,
                                 stdout=subprocess.PIPE)
            r = p.communicate()
        os.symlink(path_from, path_to)
    else:
        p = subprocess.Popen('rsync -a {0} {1}'.format(path_from, dir_to),
                             shell=True,
                             stdout=subprocess.PIPE)
        r = p.communicate()
        if p.returncode != 0:
            log.critical(
                'rsync exited abnormally when attempting to archive {}'.format(
                    path_from))
            sys.exit(1)
Example #13
0
    def run(self, concatenated_measurement_set, sourcedb_target_path,
            monet_db_hostname, monet_db_port, monet_db_name, monet_db_user,
            monet_db_password, assoc_theta, parmdb_executable, slice_paths,
            parmdb_suffix, environment, working_directory, makesourcedb_path,
            source_list_path_extern, major_cycle):

        self.logger.info("Starting imager_create_dbs Node")
        self.environment.update(environment)

        #******************************************************************
        # 0. Create the directories used in this recipe
        create_directory(working_directory)

        #*******************************************************************
        # 1. get a sourcelist: from gsm or from file
        source_list, append = self._create_source_list(
            source_list_path_extern, sourcedb_target_path,
            concatenated_measurement_set, monet_db_hostname, monet_db_port,
            monet_db_name, monet_db_user, monet_db_password, assoc_theta)

        #*******************************************************************
        # 2convert it to a sourcedb (casa table)
        if self._create_source_db(source_list, sourcedb_target_path,
                                  working_directory, makesourcedb_path,
                                  append) == None:
            self.logger.error("failed creating sourcedb")
            return 1

        #*******************************************************************
        # 3. Create a empty parmdb for each timeslice\
        parmdbs = self._create_parmdb_for_timeslices(parmdb_executable,
                                                     slice_paths,
                                                     parmdb_suffix)
        if parmdbs == None:
            self.logger.error("failed creating paramdb for slices")
            return 1

        # *******************************************************************
        # Add the create databases to the measurments set,
        self._add_dbs_to_ms(concatenated_measurement_set, sourcedb_target_path,
                            parmdbs, major_cycle)

        #*******************************************************************
        # 5. Assign the outputs
        self.outputs["sourcedb"] = sourcedb_target_path
        self.outputs["parmdbs"] = parmdbs
        return 0
Example #14
0
    def test__create_parmdb_missing_exec(self):
        """
        Test the correct functioning of the create parmdbs function
        
        """
        path_to_create = os.path.join(self.test_path, "testParmdb")
        create_directory(path_to_create)

        parmdb_output = os.path.join(path_to_create, "parmdbs")
        parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/incorrectExecutable"
        self.assertTrue(1 == self.imager_create_dbs._create_parmdb(parmdb_executable,
                                                            parmdb_output),
                        self.imager_create_dbs.logger.last())


        self.assertFalse(os.path.exists(parmdb_output), "target dir to be"
                        "created by parmdb does exist, while it should not")

        shutil.rmtree(path_to_create)
Example #15
0
def copy(path_from, dir_to, clobber, use_symlinks=False):
    """
    Copy a file or directory

    Parameters
    ----------
    path_from : str
        Input file or directory
    dir_to : str
        Output directory
    clobber : bool
        Clobber existing file or directory?
    use_symlinks : bool, optional
        Use symlinks instead of copying files?

    """
    if not os.path.exists(path_from):
        log.warning('{} not found. Please check the '
            'working directory'.format(path_from))
        return

    path_to = os.path.join(dir_to, os.path.basename(path_from))
    if os.path.exists(path_to):
        if not clobber:
            log.warning(' Destination "{}" exists and clobber = False. '
                'Skipping it...'.format(path_to))
            return
    else:
        create_directory(dir_to)

    if use_symlinks:
        if os.path.exists(path_to):
            p = subprocess.Popen('rm -rf {0}'.format(path_to), shell=True,
                stdout=subprocess.PIPE)
            r = p.communicate()
        os.symlink(path_from, path_to)
    else:
        p = subprocess.Popen('rsync -a {0} {1}'.format(path_from, dir_to),
            shell=True, stdout=subprocess.PIPE)
        r = p.communicate()
        if p.returncode != 0:
            log.critical('rsync exited abnormally when attempting to archive {}'.format(path_from))
            sys.exit(1)
Example #16
0
    def _save_active_mapfiles(self, cycle_idx, mapfile_dir, mapfiles = {}):
        """
        receives a dict with active mapfiles, var name to path
        Each mapfile is copier to a seperate directory and saved
        THis allows us to exit the last succesfull run
        """
        # create a directory for storing the saved mapfiles, use cycle idx
        mapfile_for_cycle_dir = os.path.join(mapfile_dir, "cycle_" + str(cycle_idx))
        create_directory(mapfile_for_cycle_dir)

        saved_mapfiles = {}
        for (var_name,mapfile_path) in list(mapfiles.items()):
            shutil.copy(mapfile_path, mapfile_for_cycle_dir)
            # save the newly created file, get the filename, and append it
            # to the directory name
            saved_mapfiles[var_name] = os.path.join(mapfile_for_cycle_dir,
                                          os.path.basename(mapfile_path))

        return saved_mapfiles
Example #17
0
    def test__create_parmdb_missing_exec(self):
        """
        Test the correct functioning of the create parmdbs function
        
        """
        path_to_create = os.path.join(self.test_path, "testParmdb")
        create_directory(path_to_create)

        parmdb_output = os.path.join(path_to_create, "parmdbs")
        parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/incorrectExecutable"
        self.assertTrue(
            1 == self.imager_create_dbs._create_parmdb(parmdb_executable,
                                                       parmdb_output),
            self.imager_create_dbs.logger.last())

        self.assertFalse(
            os.path.exists(parmdb_output), "target dir to be"
            "created by parmdb does exist, while it should not")

        shutil.rmtree(path_to_create)
Example #18
0
    def _write_parset_to_file(self, parset, parset_name, message):
        """
        Write the suplied the suplied parameterset to the parameter set
        directory in the jobs dir with the filename suplied in parset_name.
        Return the full path to the created file.
        """
        parset_dir = os.path.join(self.config.get("layout", "job_directory"),
                                  "parsets")
        # create the parset dir if it does not exist
        create_directory(parset_dir)

        # write the content to a new parset file
        parset_path = os.path.join(parset_dir,
                                   "{0}.parset".format(parset_name))
        parset.writeFile(parset_path)

        # display a debug log entrie with path and message
        self.logger.debug("Wrote parset to path <{0}> : {1}".format(
            parset_path, message))

        return parset_path
Example #19
0
    def _write_parset_to_file(self, parset, parset_name, message):
        """
        Write the suplied the suplied parameterset to the parameter set
        directory in the jobs dir with the filename suplied in parset_name.
        Return the full path to the created file.
        """
        parset_dir = os.path.join(
            self.config.get("layout", "job_directory"), "parsets")
        # create the parset dir if it does not exist
        create_directory(parset_dir)

        # write the content to a new parset file
        parset_path = os.path.join(parset_dir,
                         "{0}.parset".format(parset_name))
        parset.writeFile(parset_path)

        # display a debug log entrie with path and message
        self.logger.debug("Wrote parset to path <{0}> : {1}".format(
                               parset_path, message))

        return parset_path
Example #20
0
    def go(self):
        self.logger.info("Starting CEP-II datamapper run")
        super(cep2_datamapper, self).go()

        if self.inputs['parset']:
            datamap = self._read_files()
        elif self.inputs['observation_dir']:
            datamap = self._search_files()
        else:
            self.logger.error("Either observation_dir or parset must be given")
            return 1

        self.logger.info("Found %i datasets to process." % len(datamap))
        self.logger.debug("datamap = %s" % datamap)

        # Write datamap-file
        create_directory(os.path.dirname(self.inputs['mapfile']))
        store_data_map(self.inputs['mapfile'], datamap)
        self.logger.debug("Wrote mapfile: %s" % self.inputs['mapfile'])

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Example #21
0
    def test__create_parmdb_for_timeslices_except(self):
        """
        Test the errorous functioning of the _create_parmdb_for_timeslices
        with missing executable should return 1 and no created directories         
        """
        path_to_create = os.path.join(self.test_path, "testParmdb")
        parmdb_ms_output = os.path.join(path_to_create, "parmdbs")
        create_directory(parmdb_ms_output)
        parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/missingExcecutable"

        #Create a number of paths to supply to the create function
        ms_paths = []
        for idx in range(5):
            ms_paths.append(os.path.join(parmdb_ms_output, str(idx)))

        self.assertTrue(
            self.imager_create_dbs._create_parmdb_for_timeslices(
                parmdb_executable, ms_paths, ".parmdb") == None,
            self.imager_create_dbs.logger.last())
        final_ms_path = os.path.join(parmdb_ms_output,
                                     "time_slice_8.dppp.ms.parmdb")
        self.assertFalse(os.path.exists(final_ms_path))
Example #22
0
    def test__create_parmdb_for_timeslices_except(self):
        """
        Test the errorous functioning of the _create_parmdb_for_timeslices
        with missing executable should return 1 and no created directories         
        """
        path_to_create = os.path.join(self.test_path, "testParmdb")
        parmdb_ms_output = os.path.join(path_to_create, "parmdbs")
        create_directory(parmdb_ms_output)
        parmdb_executable = "/opt/cep/LofIm/daily/lofar/bin/missingExcecutable"

        #Create a number of paths to supply to the create function
        ms_paths = []
        for idx in range(5):
            ms_paths.append(os.path.join(parmdb_ms_output, str(idx)))


        self.assertTrue(
            self.imager_create_dbs._create_parmdb_for_timeslices(parmdb_executable,
                 ms_paths, ".parmdb") == None,
            self.imager_create_dbs.logger.last())
        final_ms_path = os.path.join(parmdb_ms_output, "time_slice_8.dppp.ms.parmdb")
        self.assertFalse(os.path.exists(final_ms_path))
Example #23
0
    def go(self):
        self.logger.info("Starting CEP-II datamapper run")
        super(cep2_datamapper, self).go()

        if self.inputs['parset']:
            datamap = self._read_files()
        elif self.inputs['observation_dir']:
            datamap = self._search_files()
        else:
            self.logger.error("Either observation_dir or parset must be given")
            return 1

        self.logger.info("Found %i datasets to process." % len(datamap))
        self.logger.debug("datamap = %s" % datamap)

        # Write datamap-file
        create_directory(os.path.dirname(self.inputs['mapfile']))
        store_data_map(self.inputs['mapfile'], datamap)
        self.logger.debug("Wrote mapfile: %s" % self.inputs['mapfile'])

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Example #24
0
def run_rficonsole(rficonsole_executable, temp_dir,
                    input_ms_list, logger, resourceMonitor):
    """
    _run_rficonsole runs the rficonsole application on the supplied
    timeslices in time_slices.
    This functionality has also been implemented in BBS. 
    """

    # loop all measurement sets
    rfi_temp_dir = os.path.join(temp_dir, "rfi_temp_dir")
    create_directory(rfi_temp_dir)

    try:
        rfi_console_proc_group = SubProcessGroup(logger=logger,
                                       usageStats=resourceMonitor)
        for time_slice in input_ms_list:
            # Each rfi console needs own working space for temp files
            temp_slice_path = os.path.join(rfi_temp_dir,
                os.path.basename(time_slice))
            create_directory(temp_slice_path)

            # construct copy command
            logger.info(time_slice)
            command = [rficonsole_executable, "-indirect-read",
                        time_slice]
            logger.info("executing rficonsole command: {0}".format(
                                                            " ".join(command)))

            # Add the command to the process group
            rfi_console_proc_group.run(command, cwd = temp_slice_path)
                

        # wait for all to finish
        if rfi_console_proc_group.wait_for_finish() != None:
            raise Exception("an rfi_console_proc_group run failed!")

    finally:
        shutil.rmtree(rfi_temp_dir)
Example #25
0
    def go(self):
        self.logger.info("Starting storagemapper run")
        super(storagemapper, self).go()

        #                          We read the storage node name out of the path
        #     and append the local filename (ie, on the storage node) to the map
        # ----------------------------------------------------------------------
        data = defaultdict(list)
        for filename in self.inputs['args']:
            host = filename.split(os.path.sep)[3]
            data[host].append(filename.split(host)[-1])

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.iteritems():
            parset.addStringVector(host, filenames)

        create_directory(os.path.dirname(self.inputs['mapfile']))
        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
Example #26
0
    def go(self):
        self.logger.info("Starting storagemapper run")
        super(storagemapper, self).go()

        #                          We read the storage node name out of the path
        #     and append the local filename (ie, on the storage node) to the map
        # ----------------------------------------------------------------------
        data = defaultdict(list)
        for filename in self.inputs['args']:
            host = filename.split(os.path.sep)[3]
            data[host].append(filename.split(host)[-1])

        #                                 Dump the generated mapping to a parset
        # ----------------------------------------------------------------------
        parset = Parset()
        for host, filenames in data.iteritems():
            parset.addStringVector(host, filenames)

        create_directory(os.path.dirname(self.inputs['mapfile']))
        parset.writeFile(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']

        return 0
Example #27
0
    def _copy_instrument_files(self, mapfile_dir):
        # For the copy recipe a target mapfile is needed
        # create target map based on the node and the dir in the input data map
        # with the filename based on the
        copier_map_path = os.path.join(mapfile_dir, "copier")
        create_directory(copier_map_path)
        target_map = self._create_target_map_for_instruments()

        #Write the two needed maps to file
        source_path = os.path.join(copier_map_path, "source_instruments.map")
        self.input_data['instrument'].save(source_path)

        target_path = os.path.join(copier_map_path, "target_instruments.map")
        target_map.save(target_path)

        copied_files_path = os.path.join(copier_map_path, "copied_instruments.map")

        # The output of the copier is a mapfile containing all the host, path
        # of succesfull copied files.
        copied_instruments_mapfile = self.run_task("copier",
                      mapfile_source=source_path,
                      mapfile_target=target_path,
                      mapfiles_dir=copier_map_path,
                      mapfile=copied_files_path,
                      allow_move=False)['mapfile_target_copied']

        # Some copy action might fail; the skip fields in the other map-files
        # need to be updated these to reflect this.
        self.input_data['instrument'] = DataMap.load(copied_instruments_mapfile)
        for data, inst, outp in zip(
            self.input_data['data'],
            self.input_data['instrument'],
            self.output_data['data']
        ):
            data.skip = inst.skip = outp.skip = (
                data.skip or inst.skip or outp.skip
            )
    def _copy_instrument_files(self, mapfile_dir):
        # For the copy recipe a target mapfile is needed
        # create target map based on the node and the dir in the input data map
        # with the filename based on the
        copier_map_path = os.path.join(mapfile_dir, "copier")
        create_directory(copier_map_path)
        target_map = self._create_target_map_for_instruments()

        #Write the two needed maps to file
        source_path = os.path.join(copier_map_path, "source_instruments.map")
        self.input_data['instrument'].save(source_path)

        target_path = os.path.join(copier_map_path, "target_instruments.map")
        target_map.save(target_path)

        copied_files_path = os.path.join(copier_map_path, "copied_instruments.map")

        # The output of the copier is a mapfile containing all the host, path
        # of succesfull copied files.
        copied_instruments_mapfile = self.run_task("copier",
                      mapfile_source=source_path,
                      mapfile_target=target_path,
                      mapfiles_dir=copier_map_path,
                      mapfile=copied_files_path,
                      allow_move=False)['mapfile_target_copied']

        # Some copy action might fail; the skip fields in the other map-files
        # need to be updated these to reflect this.
        self.input_data['instrument'] = DataMap.load(copied_instruments_mapfile)
        for data, inst, outp in zip(
            self.input_data['data'],
            self.input_data['instrument'],
            self.output_data['data']
        ):
            data.skip = inst.skip = outp.skip = (
                data.skip or inst.skip or outp.skip
            )
Example #29
0
def run_rficonsole(rficonsole_executable, temp_dir, input_ms_list, logger,
                   resourceMonitor):
    """
    _run_rficonsole runs the rficonsole application on the supplied
    timeslices in time_slices.
    This functionality has also been implemented in BBS. 
    """

    # loop all measurement sets
    rfi_temp_dir = os.path.join(temp_dir, "rfi_temp_dir")
    create_directory(rfi_temp_dir)

    try:
        rfi_console_proc_group = SubProcessGroup(logger=logger,
                                                 usageStats=resourceMonitor)
        for time_slice in input_ms_list:
            # Each rfi console needs own working space for temp files
            temp_slice_path = os.path.join(rfi_temp_dir,
                                           os.path.basename(time_slice))
            create_directory(temp_slice_path)

            # construct copy command
            logger.info(time_slice)
            command = [rficonsole_executable, "-indirect-read", time_slice]
            logger.info("executing rficonsole command: {0}".format(
                " ".join(command)))

            # Add the command to the process group
            rfi_console_proc_group.run(command, cwd=temp_slice_path)

        # wait for all to finish
        if rfi_console_proc_group.wait_for_finish() != None:
            raise Exception("an rfi_console_proc_group run failed!")

    finally:
        shutil.rmtree(rfi_temp_dir)
Example #30
0
    def _run_rficonsole(self, rficonsole_executable, time_slice_dir,
                        time_slices):
        """
        _run_rficonsole runs the rficonsole application on the supplied
        timeslices in time_slices.

        """

        # loop all measurement sets
        rfi_temp_dir = os.path.join(time_slice_dir, "rfi_temp_dir")
        create_directory(rfi_temp_dir)

        try:
            rfi_console_proc_group = SubProcessGroup(self.logger)
            for time_slice in time_slices:
                # Each rfi console needs own working space for temp files
                temp_slice_path = os.path.join(rfi_temp_dir,
                    os.path.basename(time_slice))
                create_directory(temp_slice_path)

                # construct copy command
                self.logger.info(time_slice)
                command = [rficonsole_executable, "-indirect-read",
                            time_slice]
                self.logger.info("executing rficonsole command: {0}".format(
                            " ".join(command)))

                # Add the command to the process group
                rfi_console_proc_group.run(command, cwd = temp_slice_path)

            # wait for all to finish
            if rfi_console_proc_group.wait_for_finish() != None:
                raise Exception("an rfi_console_proc_group run failed!")

        finally:
            shutil.rmtree(rfi_temp_dir)
Example #31
0
    def _prepare_steps(self, **kwargs):
        """
        Prepare for running the NDPPP program. This means, for one thing,
        patching the parsetfile with the correct input/output MS names,
        start/end times if availabe, etc. If a demixing step must be performed,
        some extra work needs to be done.
        
        Returns: patch dictionary that must be applied to the parset.
        """
        self.logger.debug(
            "Time interval: %s %s" % (kwargs['start_time'], kwargs['end_time'])
        )
        # Create output directory for output MS.
        create_directory(os.path.dirname(kwargs['tmpfile']))

        patch_dictionary = {
            'msin': kwargs['infile'],
            'msout': kwargs['tmpfile'],
            'uselogger': 'True'
        }
        if kwargs['start_time']:
            patch_dictionary['msin.starttime'] = kwargs['start_time']
        if kwargs['end_time']:
            patch_dictionary['msin.endtime'] = kwargs['end_time']

        # If we need to do a demixing step, we have to do some extra work.
        # We have to read the parsetfile to check this.
        parset = parameterset(kwargs['parsetfile'])
        for step in parset.getStringVector('steps'):
            if parset.getString(step + '.type', '').startswith('demix'):
                patch_dictionary.update(
                    self._prepare_demix_step(step, **kwargs)
                )

        # Return the patch dictionary that must be applied to the parset.
        return patch_dictionary
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile")
        self.input_data.save(input_data_mapfile)
        output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data.save(output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data))

        # *********************************************************************
        # 2. Create VDS-file and databases. The latter are needed when doing
        #    demixing within DPPP.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create a parameter database that will be used by the NDPPP demixing
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", input_data_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'),
                suffix='.dppp.parmdb'
            )['mapfile']
                
        # Create a source database from a user-supplied sky model
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_data_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'),
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']


        # *********************************************************************
        # 3. Average and flag data, using NDPPP.

        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            self.run_task("ndppp",
                (input_data_mapfile, output_data_mapfile),
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile
            )

        # *********************************************************************
        # 6. Create feedback file for further processing by the LOFAR framework
        # (MAC)
        # Create a parset-file containing the metadata for MAC/SAS
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_data_mapfile,
                parset_file=self.parset_feedback_file,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="Correlated")

        return 0
Example #33
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Get input from parset, validate and cast to pipeline 'data types'
        #    Only perform work on existing files
        #    Created needed directories
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_correlated_mapfile = os.path.join(mapfile_dir,
                                                "input_correlated.mapfile")
        output_correlated_mapfile = os.path.join(mapfile_dir,
                                                 "output_correlated.mapfile")
        output_instrument_mapfile = os.path.join(mapfile_dir,
                                                 "output_instrument.mapfile")
        self.input_data['correlated'].save(input_correlated_mapfile)
        self.output_data['correlated'].save(output_correlated_mapfile)
        self.output_data['instrument'].save(output_instrument_mapfile)

        if len(self.input_data['correlated']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug(
            "Processing: %s" %
            ', '.join(str(f) for f in self.input_data['correlated']))

        # *********************************************************************
        # 2. Create database needed for performing work:
        #    Vds, descibing data on the nodes
        #    sourcedb, For skymodel (A-team)
        #    parmdb for outputtting solutions
        # Produce a GVDS file describing the data on the compute nodes.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker",
                                      input_correlated_mapfile)['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb",
                                           input_correlated_mapfile,
                                           mapfile=os.path.join(
                                               mapfile_dir,
                                               'dppp.parmdb.mapfile'),
                                           suffix='.dppp.parmdb')['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share',
                'pipeline',
                'skymodels',
                skymodel + '.skymodel')
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task("setupsourcedb",
                                             input_correlated_mapfile,
                                             mapfile=os.path.join(
                                                 mapfile_dir,
                                                 'dppp.sourcedb.mapfile'),
                                             skymodel=skymodel,
                                             suffix='.dppp.sourcedb',
                                             type='blob')['mapfile']

        # *********************************************************************
        # 3. Run NDPPP to demix the A-Team sources
        #    TODOW: Do flagging?
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp",
                input_correlated_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=py_parset.getStringVector(
                    'PreProcessing.demix_always'),
                demix_if_needed=py_parset.getStringVector(
                    'PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile)['mapfile']

        # *********************************************************************
        # 4. Run BBS with a model of the calibrator
        #    Create a parmdb for calibration solutions
        #    Create sourcedb with known calibration solutions
        #    Run bbs with both
        # Create an empty parmdb for BBS
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb",
                                           dppp_mapfile,
                                           mapfile=os.path.join(
                                               mapfile_dir,
                                               'bbs.parmdb.mapfile'),
                                           suffix='.bbs.parmdb')['mapfile']

        # Create a sourcedb based on sourcedb's input argument "skymodel"
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb",
                input_correlated_mapfile,
                skymodel=os.path.join(
                    self.config.get('DEFAULT', 'lofarroot'), 'share',
                    'pipeline', 'skymodels',
                    py_parset.getString('Calibration.SkyModel') + '.skymodel'),
                mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'),
                suffix='.bbs.sourcedb')['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the calibrator source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer",
                dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=parmdb_mapfile,
                sky_mapfile=sourcedb_mapfile)['data_mapfile']

        # *********************************************************************
        # 5. Perform gain outlier correction on the found calibration solutions
        #    Swapping outliers in the gains with the median
        # Export the calibration solutions using gainoutliercorrection and store
        # the results in the files specified in the instrument mapfile.
        export_instrument_model = py_parset.getBool(
            'Calibration.exportCalibrationParameters', False)

        with duration(self, "gainoutliercorrection"):
            self.run_task("gainoutliercorrection",
                          (parmdb_mapfile, output_instrument_mapfile),
                          sigma=1.0,
                          export_instrument_model=export_instrument_model
                          )  # TODO: Parset parameter

        # *********************************************************************
        # 6. Copy corrected MS's to their final output destination.
        with duration(self, "copier"):
            self.run_task("copier",
                          mapfile_source=bbs_mapfile,
                          mapfile_target=output_correlated_mapfile,
                          mapfiles_dir=mapfile_dir,
                          mapfile=output_correlated_mapfile)

        # *********************************************************************
        # 7. Create feedback file for further processing by the LOFAR framework
        #    a. get metadata of the measurement sets
        #    b. get metadata of the instrument models
        #    c. join the two files and write the final feedback file
        correlated_metadata = os.path.join(parset_dir, "correlated.metadata")
        instrument_metadata = os.path.join(parset_dir, "instrument.metadata")
        with duration(self, "get_metadata"):
            self.run_task(
                "get_metadata",
                output_correlated_mapfile,
                parset_file=correlated_metadata,
                parset_prefix=(self.parset.getString('prefix') +
                               self.parset.fullModuleName('DataProducts')),
                product_type="Correlated")

        with duration(self, "get_metadata"):
            self.run_task(
                "get_metadata",
                output_instrument_mapfile,
                parset_file=instrument_metadata,
                parset_prefix=(self.parset.getString('prefix') +
                               self.parset.fullModuleName('DataProducts')),
                product_type="InstrumentModel")

        parset = parameterset(correlated_metadata)
        parset.adoptFile(instrument_metadata)
        parset.writeFile(self.parset_feedback_file)

        return 0
Example #34
0
# Extract runtime, working, results directories from input parset
runtime_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.runtimeDirectory")
working_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.workingDirectory")
results_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.resultDirectory")

# Set up configuration for later processing stages
config = ConfigParser({
    "job_name": tree_id,
    "cwd": os.getcwd(),
    "start_time": start_time,
})
config.read(config_file)
config.set('DEFAULT', 'runtime_directory', runtime_directory)
config.set('DEFAULT', 'default_working_directory', working_directory)

# Extract input file list from parset
to_process = input_parset.getStringVector('ObsSW.Observation.DataProducts.measurementSets')

# Read config file to establish location of parset directory to use
parset_directory = config.get("layout", "parset_directory")
create_directory(parset_directory)

# For each task (currently only ndppp), extract and write parset
tasks = ConfigParser(config.defaults())
tasks.read(string_to_list(config.get("DEFAULT", "task_files")))
ndppp_parset_location = tasks.get("ndppp", "parset")
input_parset.makeSubset("ObsSW.Observation.ObservationControl.PythonControl.DPPP.").writeFile(ndppp_parset_location)

# Run pipeline & wait for result
subprocess.check_call(['python', pipeline_definition, '-j', tree_id, '-d', '--config', config_file, '--runtime-directory', runtime_directory, '--default-working-directory', working_directory, '--start-time', start_time])
    def run(self, input_image, bdsm_parameter_run1_path,
            bdsm_parameter_run2x_path, catalog_output_path, image_output_path,
            sourcedb_target_path, environment, working_directory,
            create_sourcdb_exec):
        """
        :param input_image: image to look for sources in
        :param bdsm_parameter_run1_path: parset with bdsm parameters for the 
               first run
        :param bdsm_parameter_run2x_path: second ron bdsm parameters
        :param catalog_output_path: Path to full list of sources found
        :param image_output_path: Path to fits image with all sources 
               substracted
        :param sourcedb_target_path: Path to store the sourcedb created from 
            containing all the found sources
        :param environment: environment for runwithlog4cplus
        :param working_directory: Working dir
        :param create_sourcdb_exec: Path to create sourcedb executable 
        
        :rtype: self.outputs['source_db'] sourcedb_target_path
        
        """

        #******************************************************************
        # 0. Create the directories used in this recipe
        create_directory(working_directory)

        import lofar.bdsm as bdsm#@UnresolvedImport
        self.logger.info("Starting imager_source_finding")
        self.environment.update(environment)
        # default frequency is None (read from image), save for later cycles.
        # output of pybdsm forgets freq of source image
        frequency = None
        # Output of the for loop: n iterations and any source found
        n_itter_sourcefind = None
        sources_found = False
        max_sourcefind_itter = 5  # TODO: maximum itter is a magic value
        for idx in range(max_sourcefind_itter):
            # ******************************************************************
            # 1. Select correct input image
            # The first iteration uses the input image, second and later use the
            # output of the previous iteration. The 1+ iteration have a 
            # seperate parameter set. 
            if idx == 0:
                input_image_local = input_image # input_image_cropped
                image_output_path_local = image_output_path + "_0"
                bdsm_parameter_local = parameterset(bdsm_parameter_run1_path)
            else:
                input_image_local = image_output_path + "_{0}".format(
                                                                str(idx - 1))
                image_output_path_local = image_output_path + "_{0}".format(
                                                                    str(idx))
                bdsm_parameter_local = parameterset(bdsm_parameter_run2x_path)

            # *****************************************************************
            # 2. parse the parameters and convert to python if possible 
            # this is needed for pybdsm
            bdsm_parameters = {}
            for key in bdsm_parameter_local.keys():
                parameter_value = bdsm_parameter_local.getStringVector(key)[0]
                try:
                    parameter_value = eval(parameter_value)
                except:
                    pass  #do nothing
                bdsm_parameters[key] = parameter_value

            # pybdsm needs its filename here, to derive the log location
            bdsm_parameters["filename"] = input_image_local


            # *****************************************************************
            # 3. Start pybdsm
            self.logger.debug(
                "Starting sourcefinder bdsm on {0} using parameters:".format(
                                                        input_image_local))
            self.logger.debug(repr(bdsm_parameters))
            img = bdsm.process_image(bdsm_parameters, frequency = frequency)

            # Always export the catalog 
            img.write_catalog(
                outfile = catalog_output_path + "_{0}".format(str(idx)),
                catalog_type = 'gaul', clobber = True,
                format = "bbs", force_output = True)

            # If no more matching of sources with gausians is possible (nsrc==0)
            # break the loop
            if img.nsrc == 0:
                n_itter_sourcefind = idx
                break

            # We have at least found a single source!
            self.logger.debug("Number of source found: {0}".format(
                                                                img.nsrc))
            # *****************************************************************
            # 4. export the image 

            self.logger.debug("Wrote list of sources to file at: {0})".format(
                                                            catalog_output_path))
            img.export_image(outfile = image_output_path_local,
                                 img_type = 'gaus_resid', clobber = True,
                                 img_format = "fits")
            self.logger.debug("Wrote fits image with substracted sources"
                                  " at: {0})".format(image_output_path_local))

            # Save the frequency from image header of the original input file,
            # This information is not written by pybdsm to the exported image
            frequency = img.frequency


        # if not set the maximum number of itteration us performed
        if n_itter_sourcefind == None:
            n_itter_sourcefind = max_sourcefind_itter

        # ********************************************************************
        # 5. The produced catalogs now need to be combined into a single list
        # Call with the number of loops and the path to the files, only combine
        # if we found sources
        self.logger.debug(
                "Writing source list to file: {0}".format(catalog_output_path))
        self._combine_source_lists(n_itter_sourcefind, catalog_output_path)

        # *********************************************************************
        # 6. Convert sourcelist to sourcedb
        self._create_source_db(catalog_output_path, sourcedb_target_path,
            working_directory, create_sourcdb_exec, False)
        # Assign the outputs
        self.outputs["catalog_output_path"] = catalog_output_path
        self.outputs["source_db"] = sourcedb_target_path
        return 0
Example #36
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(
            self.inputs['working_directory'], self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug(
            "Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # TODO: This is a backdoor option to manually add beamtables when these
        # are missing on the provided ms. There is NO use case for users of the
        # pipeline
        add_beam_tables = self.parset.getBool(
                                    "Imaging.addBeamTables", False)

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile, add_beam_tables)

        number_of_major_cycles = self.parset.getInt(
                                    "Imaging.number_of_major_cycles")

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                        "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile) # copy the output map
        for item in source_list_map:
            item.file = ""             # set all to empty string
        source_list_map.save(source_list_map_path)

        for idx_loop in range(number_of_major_cycles):
            # *****************************************************************
            # (2) Create dbs and sky model
            parmdbs_path, sourcedb_map_path = self._create_dbs(
                        concat_ms_map_path, timeslice_map_path,
                        source_list_map_path = source_list_map_path,
                        skip_create_dbs = False)

            # *****************************************************************
            # (3)  bbs_imager recipe.
            bbs_output = self._bbs(timeslice_map_path, parmdbs_path,
                        sourcedb_map_path, skip = False)

            # TODO: Extra recipe: concat timeslices using pyrap.concatms
            # (see prepare)

            # *****************************************************************
            # (4) Get parameters awimager from the prepare_parset and inputs
            aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                        idx_loop, sourcedb_map_path,
                        skip = False)

            # *****************************************************************
            # (5) Source finding
            sourcelist_map, found_sourcedb_path = self._source_finding(
                    aw_image_mapfile, idx_loop, skip = False)
            # should the output be a sourcedb? instead of a sourcelist

        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map = self._finalize(aw_image_mapfile,
            processed_ms_dir, ms_per_image_map_path, sourcelist_map,
            minbaseline, maxbaseline, target_mapfile, output_image_mapfile,
            found_sourcedb_path)

        # *********************************************************************
        # (7) Get metadata
        # create a parset with information that is available on the toplevel
        toplevel_meta_data = parameterset()
        toplevel_meta_data.replace("numberOfMajorCycles", 
                                           str(number_of_major_cycles))

        # Create a parset containing the metadata for MAC/SAS at nodes
        metadata_file = "%s_feedback_SkyImage" % (self.parset_file,)
        self.run_task("get_metadata", placed_data_image_map,
            parset_prefix = (
                full_parset.getString('prefix') +
                full_parset.fullModuleName('DataProducts')
            ),
            product_type = "SkyImage",
            metadata_file = metadata_file)

        self.send_feedback_processing(toplevel_meta_data)
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Example #37
0
def archive(parset_file, directions, dir_output, full=False, archive_subdata=False,
    archive_state=False, archive_misc=True, archive_images=True,
    archive_inst=False, archive_pipestate=False, archive_models=False,
    archive_plots=True, clobber=False):
    """
    Archives data from a Factor run

    Parameters
    ----------
    parset_file : str
        Filename of Factor parset for run of interest
    directions : list of str
        List of direction names for which to archive the calibrated data
    dir_output : str
        Name of output directory where archived data will be stored
    full : bool, optional
        Make a full archive suitable for resuming?
    archive_subdata : bool, optional
        Archive the subtracted data MS files?
    archive_state : bool, optional
        Archive the state files?
    archive_misc : bool, optional
        Archive miscelaneous files?
    archive_images : bool, optional
        Archive the facet and field images?
    archive_inst : bool, optional
        Archive the instrument tables?
    archive_pipestate : bool, optional
        Archive the pipeline state files?
    archive_models : bool, optional
        Archive the sky models?
    archive_plots : bool, optional
        Archive the selfcal plots?
    clobber : bool, optional
        Clobber existing files in output directory?

    """
    # Read in parset and get directions
    all_directions, parset = load_directions(parset_file)
    if len(all_directions) == 0:
        log.error('No directions found in Factor working directory. Please check '
            'the parset')
        sys.exit(1)
    all_names = [d.name for d in all_directions]
    if len(directions) != 0:
        if directions[0].lower() == 'all':
            directions = all_names
        for dname in directions:
            if dname not in all_names:
                log.warning('Direction {} not found. Skipping it...'.format(dname))

    if full:
        # Archive everything
        archive_subdata = True
        archive_state = True
        archive_misc = True
        archive_images = True
        archive_inst = True
        archive_pipestate = True
        archive_models = True
        archive_plots = True

    working_dir = all_directions[0].working_dir
    if archive_subdata:
        log.info('Archiving subtracted data files...')
        chunks_dir = os.path.join(working_dir, 'chunks')
        copy(chunks_dir, dir_output, clobber)

    if archive_state:
        log.info('Archiving state files...')
        state_dir = os.path.join(working_dir, 'state')
        copy(state_dir, dir_output, clobber)

    if archive_misc:
        log.info('Archiving miscelaneous files...')
        misc_dir = os.path.join(dir_output, 'misc')
        if 'directions_file' in parset['direction_specific']:
            directions_file = parset['direction_specific']['directions_file']
        else:
            directions_file = os.path.join(working_dir, 'factor_directions.txt')
        file_list = [directions_file,
                     parset_file,
                     '{}/factor.log'.format(working_dir),
                     '{}/regions/facets_ds9.reg'.format(working_dir),
                     '{}/regions/calimages_ds9.reg'.format(working_dir)]
        for f in file_list:
            copy(f, misc_dir, clobber)

    if archive_images:
        log.info('Archiving field images...')
        file_list = glob.glob(os.path.join(working_dir, 'results',
            'field*', 'field', '*.fits'))
        if len(file_list) == 0:
            log.warning('No field images found.')
        else:
            for i, f in enumerate(file_list):
                log.info('  Archiving image {0} of {1}...'.format(i+1, len(file_list)))
                subdir = f.split('/')[-3]
                image_dir = os.path.join(dir_output, 'images', 'field', subdir)
                copy(f, image_dir, clobber)

    if archive_models:
        log.info('Archiving direction-independent sky models...')
        band_state_files = glob.glob(os.path.join(working_dir, 'state',
            'Band_*'))
        file_list = []
        band_list = []
        for bf in band_state_files:
            try:
                with open(bf, 'r') as f:
                    b = pickle.load(f)
                    file_list.append(b['skymodel_dirindep'])
                    band_list.append(b['name'])
            except:
                pass
        for i, f in enumerate(file_list):
            skymodel_dir = os.path.join(dir_output, 'chunks', band_list[i])
            log.info('  Copying sky model file {0} of {1}...'.format(i+1, len(file_list)))
            copy(f, skymodel_dir, clobber)

    for d in all_directions:
        if archive_images:
            log.info('Archiving facet images for direction {}...'.format(d.name))
            file_list = glob.glob(os.path.join(working_dir, 'results',
                'facetimage*', d.name, '*full2*image.fits'))
            if len(file_list) == 0:
                log.warning('No facet images found for direction {}.'.format(d.name))
            else:
                for i, f in enumerate(file_list):
                    subdir = f.split('/')[-3]
                    image_dir = os.path.join(dir_output, 'images', d.name, subdir)
                    copy(f, image_dir, clobber)

        if archive_models:
            log.info('Archiving sky models for direction {}...'.format(d.name))
            if hasattr(d, 'sourcedb_new_facet_sources'):
                file_list = check_existing_files(d.sourcedb_new_facet_sources)
            else:
                file_list = []
            if len(file_list) == 0:
                log.warning('No sky models found for direction {}.'.format(d.name))
            else:
                sourcedb_dir = os.path.join(dir_output, 'sky_models', d.name)
                for i, f in enumerate(file_list):
                    log.info('  Copying sky model file {0} of {1}...'.format(i+1, len(file_list)))
                    copy(f, sourcedb_dir, clobber)

        if archive_inst:
            log.info('Archiving instrument tables for direction {}...'.format(d.name))
            if hasattr(d, 'preapply_h5parm_mapfile'):
                file_list.append(check_existing_files(d.preapply_parmdb_mapfile))
            if len(file_list) == 0:
                log.warning('No h5parms found for direction {}.'.format(d.name))
            else:
                inst_table_dir = os.path.join(dir_output, 'h5parms', d.name)
                for i, f in enumerate(file_list):
                    log.info('  Copying h5parm file {0} of {1}...'.format(i+1, len(file_list)))
                    copy(f, inst_table_dir, clobber)

        if archive_plots:
            log.info('Archiving plots for direction {}...'.format(d.name))
            file_list = glob.glob(os.path.join(working_dir, 'results', 'facetselfcal', d.name, '*png'))
            if len(file_list) == 0:
                file_list = glob.glob(os.path.join(working_dir, 'results', 'facetpeel', d.name, '*png'))
            if len(file_list) == 0:
                file_list = glob.glob(os.path.join(working_dir, 'results', 'outlierpeel', d.name, '*png'))
            if len(file_list) == 0:
                log.warning('No plots found for direction {}.'.format(d.name))
            else:
                plot_dir = os.path.join(dir_output, 'plots', d.name)
                for i, f in enumerate(file_list):
                    copy(f, plot_dir, clobber)

        if archive_pipestate:
            log.info('Archiving pipeline state files for direction {}...'.format(d.name))
            file_list = glob.glob(os.path.join(working_dir, 'results', 'facetselfcal', d.name, 'mapfiles', '*'))
            op_name = 'facetselfcal'
            if len(file_list) == 0:
                file_list = glob.glob(os.path.join(working_dir, 'results', 'facetpeel', d.name, 'mapfiles', '*'))
                op_name = 'facetpeel'
            if len(file_list) == 0:
                file_list = glob.glob(os.path.join(working_dir, 'results', 'outlierpeel', d.name, 'mapfiles', '*'))
                op_name = 'outlierpeel'
            if len(file_list) == 0:
                log.warning('No pipeline state files found for direction {}.'.format(d.name))
            else:
                mapfile_dir = os.path.join(dir_output, 'pipeline_state', d.name, op_name)
                for f in file_list:
                    copy(f, mapfile_dir, clobber)

            # Also archive "final_image" mapfile for facetimage (needed for mosaicking)
            file_list = glob.glob(os.path.join(working_dir, 'results',
                'facetimage*', d.name, 'mapfiles', 'final_image.mapfile'))
            if len(file_list) > 0:
                for i, f in enumerate(file_list):
                    subdir = f.split('/')[-4]
                    mapfile_dir = os.path.join(dir_output, 'pipeline_state', d.name, subdir)
                    copy(f, mapfile_dir, clobber)

        if d.name in directions:
            log.info('Archiving calibrated data for direction {}...'.format(d.name))
            if hasattr(d, 'image_data_mapfile'):
                file_list = check_existing_files(d.image_data_mapfile)
            else:
                file_list = []
            if len(file_list) == 0:
                log.warning('No data found for direction {}. Skipping it...'.format(d.name))
                continue

            # Make the output directory
            cal_data_dir = os.path.join(dir_output, 'calibrated_data', d.name)
            create_directory(cal_data_dir)

            # Sort the files into time chunks
            data_mapfile = d.name+'_calibrated_data.mapfile'
            sort_times_into_freqGroups.main(file_list, filename=data_mapfile,
                mapfile_dir=cal_data_dir)

            # Read the new, grouped file lists
            datamap = DataMap.load(os.path.join(cal_data_dir, data_mapfile))

            # Run DPPP to concatenate each time chunk in frequency
            nchunks = len(datamap)
            for i, item in enumerate(datamap):
                log.info('  Concatenating files for time chunk {0} of {1}...'.format(i+1, nchunks))
                outfile = os.path.join(cal_data_dir, '{0}_calibrated_data_chunk{1}.ms'.format(d.name, i))
                if os.path.exists(outfile):
                    if not clobber:
                        log.warning(' Output file for this chuck exists and clobber = False. Skipping it...')
                        continue
                    else:
                        os.system('rm -rf {0}'.format(outfile))
                dppp_concat(item.file, outfile)

            # Clean up
            os.system('rm -f {0}'.format(os.path.join(cal_data_dir, data_mapfile)))
            os.system('rm -f {0}_groups'.format(os.path.join(cal_data_dir, data_mapfile)))

    log.info('Archiving complete.')
Example #38
0
    def go(self):
        self.logger.info("Starting BBS run")
        super(new_bbs, self).go()

        #                Check for relevant input parameters in the parset-file
        # ---------------------------------------------------------------------
        self.logger.debug("Reading parset from %s" % self.inputs['parset'])
        self.parset = parameterset(self.inputs['parset'])

        self._set_input('db_host', 'BBDB.Host')
        self._set_input('db_user', 'BBDB.User')
        self._set_input('db_name', 'BBDB.Name')
        self._set_input('db_key', 'BBDB.Key')

        #self.logger.debug("self.inputs = %s" % self.inputs)

        #                                         Clean the blackboard database
        # ---------------------------------------------------------------------
        self.logger.info(
            "Cleaning BBS database for key '%s'" % (self.inputs['db_key'])
        )
        command = ["psql",
                   "-h", self.inputs['db_host'],
                   "-U", self.inputs['db_user'],
                   "-d", self.inputs['db_name'],
                   "-c", "DELETE FROM blackboard.session WHERE key='%s';" %
                         self.inputs['db_key']
                  ]
        self.logger.debug(command)
        if subprocess.call(command) != 0:
            self.logger.warning(
                "Failed to clean BBS database for key '%s'" %
                self.inputs['db_key']
            )

        #                  Create a bbs_map describing the file mapping on disk
        # ---------------------------------------------------------------------
        if not self._make_bbs_map():
            return 1

        # Produce a GVDS file, describing the data that must be processed.
        gvds_file = self.run_task(
            "vdsmaker",
            self.inputs['data_mapfile'],
            gvds=self.inputs['gvds']
        )['gvds']

        #      Construct a parset for BBS GlobalControl by patching the GVDS
        #           file and database information into the supplied template
        # ------------------------------------------------------------------
        self.logger.debug("Building parset for BBS control")
        # Create a location for parsets
        job_directory = self.config.get(
                            "layout", "job_directory")
        parset_directory = os.path.join(job_directory, "parsets")
        create_directory(parset_directory)

        # patch the parset and copy result to target location remove tempfile
        try:
            bbs_parset = utilities.patch_parset(
                self.parset,
                {
                    'Observation': gvds_file,
                    'BBDB.Key': self.inputs['db_key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #'BBDB.Port': self.inputs['db_name'],
                }
            )
            bbs_parset_path = os.path.join(parset_directory,
                                           "bbs_control.parset")
            shutil.copyfile(bbs_parset, bbs_parset_path)
            self.logger.debug("BBS control parset is %s" % (bbs_parset_path,))

        finally:
            # Always remove the file in the tempdir
            os.remove(bbs_parset)

        try:
            #        When one of our processes fails, we set the killswitch.
            #      Everything else will then come crashing down, rather than
            #                                         hanging about forever.
            # --------------------------------------------------------------
            self.killswitch = threading.Event()
            self.killswitch.clear()
            signal.signal(signal.SIGTERM, self.killswitch.set)

            #                           GlobalControl runs in its own thread
            # --------------------------------------------------------------
            run_flag = threading.Event()
            run_flag.clear()
            bbs_control = threading.Thread(
                target=self._run_bbs_control,
                args=(bbs_parset, run_flag)
            )
            bbs_control.start()
            run_flag.wait()    # Wait for control to start before proceeding

            #      We run BBS KernelControl on each compute node by directly
            #                             invoking the node script using SSH
            #      Note that we use a job_server to send out job details and
            #           collect logging information, so we define a bunch of
            #    ComputeJobs. However, we need more control than the generic
            #     ComputeJob.dispatch method supplies, so we'll control them
            #                                          with our own threads.
            # --------------------------------------------------------------
            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobpool = {}
            bbs_kernels = []
            with job_server(self.logger, jobpool, self.error) as(jobhost,
                                                                   jobport):
                self.logger.debug("Job server at %s:%d" % (jobhost, jobport))
                for job_id, details in enumerate(self.bbs_map):
                    host, files = details
                    jobpool[job_id] = ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['kernel_exec'],
                            files,
                            self.inputs['db_key'],
                            self.inputs['db_name'],
                            self.inputs['db_user'],
                            self.inputs['db_host']
                        ]
                    )
                    bbs_kernels.append(
                        threading.Thread(
                            target=self._run_bbs_kernel,
                            args=(host, command, job_id, jobhost, str(jobport))
                        )
                    )
                self.logger.info("Starting %d threads" % len(bbs_kernels))
                for thread in bbs_kernels:
                    thread.start()
                self.logger.debug("Waiting for all kernels to complete")
                for thread in bbs_kernels:
                    thread.join()

            #         When GlobalControl finishes, our work here is done
            # ----------------------------------------------------------
            self.logger.info("Waiting for GlobalControl thread")
            bbs_control.join()
        finally:
            os.unlink(bbs_parset)

        if self.killswitch.isSet():
            #  If killswitch is set, then one of our processes failed so
            #                                   the whole run is invalid
            # ----------------------------------------------------------
            return 1

        self.outputs['mapfile'] = self.inputs['data_mapfile']
        return 0
Example #39
0
config = ConfigParser({
    "job_name": tree_id,
    "cwd": os.getcwd(),
    "start_time": start_time,
})
config.read(config_file)
config.set('DEFAULT', 'runtime_directory', runtime_directory)
config.set('DEFAULT', 'default_working_directory', working_directory)

# Extract input file list from parset
to_process = input_parset.getStringVector(
    'ObsSW.Observation.DataProducts.measurementSets')

# Read config file to establish location of parset directory to use
parset_directory = config.get("layout", "parset_directory")
create_directory(parset_directory)

# For each task (currently only ndppp), extract and write parset
tasks = ConfigParser(config.defaults())
tasks.read(string_to_list(config.get("DEFAULT", "task_files")))
ndppp_parset_location = tasks.get("ndppp", "parset")
input_parset.makeSubset(
    "ObsSW.Observation.ObservationControl.PythonControl.DPPP.").writeFile(
        ndppp_parset_location)

# Run pipeline & wait for result
subprocess.check_call([
    'python', pipeline_definition, '-j', tree_id, '-d', '--config',
    config_file, '--runtime-directory', runtime_directory,
    '--default-working-directory', working_directory, '--start-time',
    start_time
Example #40
0
    def _filter_bad_stations(self, time_slice_path_list, asciistat_executable,
                             statplot_executable, msselect_executable):
        """
        A Collection of scripts for finding and filtering of bad stations:

        1. First a number of statistics with regards to the spread of the data
           is collected using the asciistat_executable.
        2. Secondly these statistics are consumed by the statplot_executable
           which produces a set of bad stations.
        3. In the final step the bad stations are removed from the dataset 
           using ms select

        REF: http://www.lofar.org/wiki/lib/exe/fetch.php?media=msss:pandeymartinez-week9-v1p2.pdf
        """
        # run asciistat to collect statistics about the ms
        self.logger.info("Filtering bad stations")
        self.logger.debug("Collecting statistical properties of input data")
        asciistat_output = []
        asciistat_proc_group = SubProcessGroup(self.logger)
        for ms in time_slice_path_list:
            output_dir = ms + ".filter_temp"
            create_directory(output_dir)
            asciistat_output.append((ms, output_dir))

            cmd_string = "{0} -i {1} -r {2}".format(asciistat_executable, ms,
                                                    output_dir)
            asciistat_proc_group.run(cmd_string)

        if asciistat_proc_group.wait_for_finish() != None:
            raise Exception("an ASCIIStats run failed!")

        # Determine the station to remove
        self.logger.debug("Select bad stations depending on collected stats")
        asciiplot_output = []
        asciiplot_proc_group = SubProcessGroup(self.logger)
        for (ms, output_dir) in asciistat_output:
            ms_stats = os.path.join(output_dir,
                                    os.path.split(ms)[1] + ".stats")

            cmd_string = "{0} -i {1} -o {2}".format(statplot_executable,
                                                    ms_stats, ms_stats)
            asciiplot_output.append((ms, ms_stats))
            asciiplot_proc_group.run(cmd_string)

        if asciiplot_proc_group.wait_for_finish() != None:
            raise Exception("an ASCIIplot run failed!")

        # remove the bad stations
        self.logger.debug("Use ms select to remove bad stations")
        msselect_output = {}
        msselect_proc_group = SubProcessGroup(self.logger)
        for ms, ms_stats in asciiplot_output:
            # parse the .tab file containing the bad stations
            station_to_filter = []
            file_pointer = open(ms_stats + ".tab")

            for line in file_pointer.readlines():
                # skip headed line
                if line[0] == "#":
                    continue

                entries = line.split()
                # if the current station is bad (the last entry on the line)
                if entries[-1] == "True":
                    # add the name of station
                    station_to_filter.append(entries[1])

            # if this measurement does not contain baselines to skip do not
            # filter and provide the original ms as output
            if len(station_to_filter) == 0:
                msselect_output[ms] = ms
                continue

            ms_output_path = ms + ".filtered"
            msselect_output[ms] = ms_output_path

            # use msselect to remove the stations from the ms
            msselect_baseline = "!{0}".format(",".join(station_to_filter))
            cmd_string = "{0} in={1} out={2} baseline={3} deep={4}".format(
                msselect_executable, ms, ms_output_path, msselect_baseline,
                "False")
            msselect_proc_group.run(cmd_string)

        if msselect_proc_group.wait_for_finish() != None:
            raise Exception("an MSselect run failed!")

        filtered_list_of_ms = []
        # The order of the inputs needs to be preserved when producing the
        # filtered output!
        for input_ms in time_slice_path_list:
            filtered_list_of_ms.append(msselect_output[input_ms])

        return filtered_list_of_ms
Example #41
0
    def run(self, environment, parset, working_dir, processed_ms_dir,
            ndppp_executable, output_measurement_set, time_slices_per_image,
            subbands_per_group, raw_ms_mapfile, asciistat_executable,
            statplot_executable, msselect_executable, rficonsole_executable,
            add_beam_tables):
        """
        Entry point for the node recipe
        """
        self.environment.update(environment)
        with log_time(self.logger):
            input_map = DataMap.load(raw_ms_mapfile)

            #******************************************************************
            # I. Create the directories used in this recipe
            create_directory(processed_ms_dir)

            # time slice dir_to_remove: assure empty directory: Stale data
            # is problematic for dppp
            time_slice_dir = os.path.join(working_dir, _time_slice_dir_name)
            create_directory(time_slice_dir)
            for root, dirs, files in os.walk(time_slice_dir):
                for file_to_remove in files:
                    os.unlink(os.path.join(root, file_to_remove))
                for dir_to_remove in dirs:
                    shutil.rmtree(os.path.join(root, dir_to_remove))
            self.logger.debug("Created directory: {0}".format(time_slice_dir))
            self.logger.debug("and assured it is empty")

            #******************************************************************
            # 1. Copy the input files
            copied_ms_map = self._copy_input_files(processed_ms_dir, input_map)

            #******************************************************************
            # 2. run dppp: collect frequencies into larger group
            time_slices_path_list = \
                self._run_dppp(working_dir, time_slice_dir,
                    time_slices_per_image, copied_ms_map, subbands_per_group,
                    processed_ms_dir, parset, ndppp_executable)

            # If no timeslices were created, bail out with exit status 1
            if len(time_slices_path_list) == 0:
                self.logger.error("No timeslices were created.")
                self.logger.error("Exiting with error state 1")
                return 1

            self.logger.debug(
                "Produced time slices: {0}".format(time_slices_path_list))
            #***********************************************************
            # 3. run rfi_concole: flag datapoints which are corrupted
            self._run_rficonsole(rficonsole_executable, time_slice_dir,
                                 time_slices_path_list)

            #******************************************************************
            # 4. Add imaging columns to each timeslice
            # ndppp_executable fails if not present
            for time_slice_path in time_slices_path_list:
                pt.addImagingColumns(time_slice_path)
                self.logger.debug(
                    "Added imaging columns to time_slice: {0}".format(
                        time_slice_path))

            #*****************************************************************
            # 5. Filter bad stations
            time_slice_filtered_path_list = self._filter_bad_stations(
                time_slices_path_list, asciistat_executable,
                statplot_executable, msselect_executable)

            #*****************************************************************
            # Add measurmenttables
            if add_beam_tables:
                self.add_beam_tables(time_slice_filtered_path_list)

            #******************************************************************
            # 6. Perform the (virtual) concatenation of the timeslices
            self._concat_timeslices(time_slice_filtered_path_list,
                                    output_measurement_set)

            #******************************************************************
            # return
            self.outputs["time_slices"] = \
                time_slices_path_list

        return 0
Example #42
0
    def run(self, executable, environment, parset, working_directory,
            output_image, concatenated_measurement_set, sourcedb_path,
             mask_patch_size, autogenerate_parameters, specify_fov, fov):
        """
        :param executable: Path to awimager executable
        :param environment: environment for catch_segfaults (executable runner)
        :param parset: parameters for the awimager,
        :param working_directory: directory the place temporary files
        :param output_image: location and filesname to story the output images
          the multiple images are appended with type extentions
        :param concatenated_measurement_set: Input measurement set
        :param sourcedb_path: Path the the sourcedb used to create the image 
          mask
        :param mask_patch_size: Scaling of the patch around the source in the 
          mask
        :param autogenerate_parameters: Turns on the autogeneration of: 
           cellsize, npix, wprojplanes, wmax, fov
        :param fov: if  autogenerate_parameters is false calculate 
           imageparameter (cellsize, npix, wprojplanes, wmax) relative to this 
           fov
        :rtype: self.outputs["image"] The path to the output image
        """
        self.logger.info("Start imager_awimager node run:")
        log4_cplus_name = "imager_awimager"
        self.environment.update(environment)

        with log_time(self.logger):
            # Read the parameters as specified in the parset
            parset_object = get_parset(parset)

            # *************************************************************
            # 1. Calculate awimager parameters that depend on measurement set
            # and the parset

            cell_size, npix, w_max, w_proj_planes = \
                    self._get_imaging_parameters(
                            concatenated_measurement_set,
                            parset,
                            autogenerate_parameters,
                            specify_fov,
                            fov)

            self.logger.info("Using autogenerated parameters; ")
            self.logger.info(
                 "Calculated parameters: cell_size: {0}, npix: {1}".format(
                     cell_size, npix))

            self.logger.info("w_max: {0}, w_proj_planes: {1} ".format(
                        w_max, w_proj_planes))

            # ****************************************************************
            # 2. Get the target image location from the mapfile for the parset.
            # Create target dir if it not exists
            image_path_head = os.path.dirname(output_image)
            create_directory(image_path_head)
            self.logger.debug("Created directory to place awimager output"
                              " files: {0}".format(image_path_head))

            # ****************************************************************
            # 3. Create the mask
            mask_file_path = self._create_mask(npix, cell_size, output_image,
                         concatenated_measurement_set, executable,
                         working_directory, log4_cplus_name, sourcedb_path,
                          mask_patch_size, image_path_head)

            # *****************************************************************
            # 4. Update the parset with calculated parameters, and output image
            patch_dictionary = {'uselogger': 'True',  # enables log4cpluscd log
                               'ms': str(concatenated_measurement_set),
                               'cellsize': str(cell_size),
                               'npix': str(npix),
                               'wmax': str(w_max),
                               'wprojplanes': str(w_proj_planes),
                               'image': str(output_image),
                               'maxsupport': str(npix),
                               # 'mask':str(mask_file_path),  #TODO REINTRODUCE
                               # MASK, excluded to speed up in this debug stage
                               }

            # save the parset at the target dir for the image
            calculated_parset_path = os.path.join(image_path_head,
                                                       "parset.par")

            try:
                temp_parset_filename = patch_parset(parset, patch_dictionary)
                # Copy tmp file to the final location
                shutil.copyfile(temp_parset_filename, calculated_parset_path)
                self.logger.debug("Wrote parset for awimager run: {0}".format(
                                                    calculated_parset_path))
            finally:
                # remove temp file
                os.remove(temp_parset_filename)

            # *****************************************************************
            # 5. Run the awimager with the updated parameterset
            cmd = [executable, calculated_parset_path]
            try:
                with CatchLog4CPlus(working_directory,
                        self.logger.name + "." +
                        os.path.basename(log4_cplus_name),
                        os.path.basename(executable)
                ) as logger:
                    catch_segfaults(cmd, working_directory, self.environment,
                                            logger)

            # Thrown by catch_segfault
            except CalledProcessError, exception:
                self.logger.error(str(exception))
                return 1

            except Exception, exception:
                self.logger.error(str(exception))
                return 1
Example #43
0
    def run(self, awimager_output, ms_per_image, sourcelist, target,
            output_image, minbaseline, maxbaseline, processed_ms_dir,
            fillrootimagegroup_exec, environment, sourcedb, concat_ms, 
            correlated_output_location, msselect_executable):
        self.environment.update(environment)
        """
        :param awimager_output: Path to the casa image produced by awimager 
        :param ms_per_image: The X (90) measurements set scheduled to 
            create the image
        :param sourcelist: list of sources found in the image 
        :param target: <unused>
        :param minbaseline: Minimum baseline used for the image 
        :param maxbaseline: largest/maximum baseline used for the image
        :param processed_ms_dir: The X (90) measurements set actually used to 
            create the image
        :param fillrootimagegroup_exec: Executable used to add image data to
            the hdf5 image  
                 
        :rtype: self.outputs['hdf5'] set to "succes" to signal node succes
        :rtype: self.outputs['image'] path to the produced hdf5 image
        """
        with log_time(self.logger):
            ms_per_image_map = DataMap.load(ms_per_image)

            # *****************************************************************
            # 1. add image info                      
            # Get all the files in the processed measurement dir
            file_list = os.listdir(processed_ms_dir)

            processed_ms_paths = []
            ms_per_image_map.iterator = DataMap.SkipIterator
            for item in ms_per_image_map:
                ms_path = item.file
                processed_ms_paths.append(ms_path)

            #add the information the image
            try:
                self.logger.debug("Start addImage Info")
                addimg.addImagingInfo(awimager_output, processed_ms_paths,
                    sourcedb, minbaseline, maxbaseline)

            except Exception, error:
                self.logger.warn("addImagingInfo Threw Exception:")
                self.logger.warn(error)
                # Catch raising of already done error: allows for rerunning
                # of the recipe
                if "addImagingInfo already done" in str(error):
                    self.logger.warn("addImagingInfo already done, continue")
                    pass
                else:
                    raise Exception(error) 
                #The majority of the tables is updated correctly

            # ***************************************************************
            # 2. convert to hdf5 image format
            output_directory = None
            pim_image = pim.image(awimager_output)
            try:
                self.logger.info("Saving image in HDF5 Format to: {0}" .format(
                                output_image))
                # Create the output directory
                output_directory = os.path.dirname(output_image)
                create_directory(output_directory)
                # save the image
                pim_image.saveas(output_image, hdf5=True)

            except Exception, error:
                self.logger.error(
                    "Exception raised inside pyrap.images: {0}".format(
                                                                str(error)))
                raise error
Example #44
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile")
        self.input_data.save(input_data_mapfile)
        output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data.save(output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data))

        # *********************************************************************
        # 2. Create VDS-file and databases. The latter are needed when doing
        #    demixing within DPPP.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create a parameter database that will be used by the NDPPP demixing
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", input_data_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'),
                suffix='.dppp.parmdb'
            )['mapfile']
                
        # Create a source database from a user-supplied sky model
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_data_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'),
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']


        # *********************************************************************
        # 3. Average and flag data, using NDPPP.

        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            output_data_mapfile = self.run_task("ndppp",
                (input_data_mapfile, output_data_mapfile),
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile
            )['mapfile']

        # *********************************************************************
        # 6. Create feedback file for further processing by the LOFAR framework
        # Create a parset containing the metadata
        metadata_file = "%s_feedback_Correlated" % (self.parset_file,)
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_data_mapfile,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="Correlated",
                metadata_file=metadata_file)

        self.send_feedback_processing(parameterset({'feedback_version': feedback_version}))
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Example #45
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            'ObsSW.Observation.ObservationControl.PythonControl.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # Create some needed directories
        job_dir = self.config.get("layout", "job_directory")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(mapfile_dir)
        parset_dir = os.path.join(job_dir, "parsets")
        create_directory(parset_dir)

        # *********************************************************************
        # 2. Copy the instrument files to the correct node
        # The instrument files are currently located on the wrong nodes
        # Copy to correct nodes and assign the instrument table the now
        # correct data

        # Copy the instrument files to the corrent nodes: failures might happen
        # update both intrument and datamap to contain only successes!
        self._copy_instrument_files(mapfile_dir)

        # Write input- and output data map-files.
        data_mapfile = os.path.join(mapfile_dir, "data.mapfile")
        self.input_data['data'].save(data_mapfile)
        copied_instrument_mapfile = os.path.join(mapfile_dir,
                                                 "copied_instrument.mapfile")
        self.input_data['instrument'].save(copied_instrument_mapfile)
        self.logger.debug("Wrote input data mapfile: %s" % data_mapfile)

        # Save copied files to a new mapfile
        corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile")
        self.output_data['data'].save(corrected_mapfile)
        self.logger.debug("Wrote output corrected data mapfile: %s" %
                          corrected_mapfile)

        # Validate number of copied files, abort on zero files copied
        if len(self.input_data['data']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
                          ', '.join(str(f) for f in self.input_data['data']))

        # *********************************************************************
        # 3. Create database needed for performing work:
        #    - GVDS, describing data on the compute nodes
        #    - SourceDB, for skymodel (A-team)
        #    - ParmDB for outputtting solutions
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds']

        # Read metadata (e.g., start- and end-time) from the GVDS file.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb",
                                           data_mapfile)['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share',
                'pipeline',
                'skymodels',
                skymodel + '.skymodel')
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task("setupsourcedb",
                                             data_mapfile,
                                             skymodel=skymodel,
                                             suffix='.dppp.sourcedb',
                                             type='blob')['mapfile']

        # *********************************************************************
        # 4. Run NDPPP to demix the A-Team sources
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp",
                data_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=py_parset.getStringVector(
                    'PreProcessing.demix_always'),
                demix_if_needed=py_parset.getStringVector(
                    'PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.mapfile'))['mapfile']

        # ********************************************************************
        # 5. Run bss using the instrument file from the target observation
        # Create an empty sourcedb for BBS
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task("setupsourcedb",
                                             data_mapfile)['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the target source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer",
                dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=copied_instrument_mapfile,
                sky_mapfile=sourcedb_mapfile)['data_mapfile']

        # *********************************************************************
        # 6. Copy the MS's to their final output destination.
        # When the copier recipe has run, the map-file named in
        # corrected_mapfile will contain an updated map of output files.
        with duration(self, "copier"):
            self.run_task("copier",
                          mapfile_source=bbs_mapfile,
                          mapfile_target=corrected_mapfile,
                          mapfiles_dir=mapfile_dir,
                          mapfile=corrected_mapfile)

        # *********************************************************************
        # 7. Create feedback for further processing by the LOFAR framework
        metadata_file = "%s_feedback_Correlated" % (self.parset_file, )
        with duration(self, "get_metadata"):
            self.run_task(
                "get_metadata",
                corrected_mapfile,
                parset_prefix=(self.parset.getString('prefix') +
                               self.parset.fullModuleName('DataProducts')),
                product_type="Correlated",
                metadata_file=metadata_file)

        self.send_feedback_processing(parameterset())
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Example #46
0
    def __init__(self, parset, bands, direction, name=None):
        self.parset = parset.copy()
        self.bands = bands
        self.name = name.lower()
        self.parset['op_name'] = name
        self.direction = direction
        _logging.set_level(self.parset['logging_level'])
        self.log = logging.getLogger('factor:{0}'.format(self.name))
        self.hostname = socket.gethostname()
        self.node_list = parset['cluster_specific']['node_list']

        # Working directory
        self.factor_working_dir = parset['dir_working']

        # Pipeline runtime and working dirs (pipeline makes subdir here with
        # name of direction)
        self.pipeline_runtime_dir = os.path.join(self.factor_working_dir, 'results',
            self.name)
        self.pipeline_working_dir = self.pipeline_runtime_dir
        create_directory(self.pipeline_runtime_dir)

        # Directory that holds the mapfiles
        self.pipeline_mapfile_dir = os.path.join(self.pipeline_runtime_dir,
            self.direction.name, 'mapfiles')
        create_directory(self.pipeline_mapfile_dir)

        # Directory in the runtime dir that holds parset and config files (also
        # the results of the pipeline)
        self.pipeline_parset_dir = os.path.join(self.pipeline_runtime_dir,
            self.direction.name)
        create_directory(self.pipeline_parset_dir)

        # Directory that holds the mapfiles
        self.pipeline_mapfile_dir = os.path.join(self.pipeline_runtime_dir,
            self.direction.name, 'mapfiles')
        create_directory(self.pipeline_mapfile_dir)

        # Local scratch directory and corresponding node recipes
        if self.parset['cluster_specific']['dir_local'] is None:
            # Not specified, specify scratch directory in normal work directory
            self.local_scratch_dir = os.path.join(self.pipeline_working_dir,
                self.direction.name)
            self.dppp_nodescript = 'executable_args'
        elif self.parset['cluster_specific']['clusterdesc_file'].lower() == 'pbs':
            # PBS = "system in Hamburg" -> use special NDPPP nodescript
            self.local_scratch_dir = self.parset['cluster_specific']['dir_local']
            self.dppp_nodescript = 'dppp_scratch'
        else:
            # other: use given scratch directory an standard nodescrit
            self.local_scratch_dir = self.parset['cluster_specific']['dir_local']
            self.dppp_nodescript = 'executable_args'

        # Directory that holds logs in a convenient place
        self.log_dir = os.path.join(self.factor_working_dir, 'logs', self.name)
        create_directory(self.log_dir)

        # Log name used for logs in log_dir
        self.logbasename = os.path.join(self.log_dir, self.direction.name)

        # Below are paths for scripts, etc. in the Factor install directory
        self.factor_root_dir = os.path.split(DIR)[0]
        self.factor_pipeline_dir = os.path.join(self.factor_root_dir, 'pipeline')
        self.factor_script_dir = os.path.join(self.factor_root_dir, 'scripts')
        self.factor_parset_dir = os.path.join(self.factor_root_dir, 'parsets')
        self.factor_skymodel_dir = os.path.join(self.factor_root_dir, 'skymodels')

        # Below are the templates and output paths for the pipeline parset and
        # config files. These may need to be re-defined in the subclasses
        # if the operation has non-standard template names
        self.pipeline_parset_template = '{0}_pipeline.parset'.format(self.name)
        self.pipeline_parset_file = os.path.join(self.pipeline_parset_dir,
            'pipeline.parset')
        self.pipeline_config_template = 'pipeline.cfg'
        self.pipeline_config_file = os.path.join(self.pipeline_parset_dir,
            'pipeline.cfg')

        # Define parameters needed for the pipeline config.
        self.cfg_dict = {'lofarroot': parset['lofarroot'],
                         'pythonpath': parset['lofarpythonpath'],
                         'factorroot': self.factor_root_dir,
                         'pipeline_working_dir': self.pipeline_working_dir,
                         'pipeline_runtime_dir': self.pipeline_runtime_dir,
                         'casa_executable': parset['casa_executable'],
                         'wsclean_executable': parset['wsclean_executable'],
                         'image2fits_executable': parset['image2fits_executable'],
                         'dppp_nodescript': self.dppp_nodescript}

        # Define global parameters needed by all pipeline parsets. Other,
        # pipeline-specific, parameters should be defined in the subclasses by
        # updating this dictionary
        self.parms_dict = {'parset_dir': self.factor_parset_dir,
                           'skymodel_dir': self.factor_skymodel_dir,
                           'mapfile_dir': self.pipeline_mapfile_dir,
                           'pipeline_dir': self.factor_pipeline_dir,
                           'script_dir': self.factor_script_dir,
                           'local_dir': self.local_scratch_dir,
                           'hosts': self.node_list}

        # Update the dictionaries with the attributes of the operation's
        # direction object. Any attributes set in the direction object that are
        # also in the parms_dict will be set to those of the direction object
        # (e.g., 'max_cpus_per_node', which is set in the direction object by
        # factor.cluster.divide_nodes() will override the value set above)
        self.cfg_dict.update(self.direction.__dict__)
        self.parms_dict.update(self.direction.__dict__)

        # Add cluster-related info
        if self.parset['cluster_specific']['clustertype'] == 'local':
            self.cfg_dict['remote'] = '[remote]\n'\
                + 'method = local\n'\
                + 'max_per_node = {0}\n'.format(self.cfg_dict['max_cpus_per_node'])
        elif self.parset['cluster_specific']['clustertype'] == 'juropa_slurm':
            self.cfg_dict['remote'] = '[remote]\n'\
                + 'method = slurm_srun\n'\
                + 'max_per_node = {0}\n'.format(self.cfg_dict['max_cpus_per_node'])
        elif self.parset['cluster_specific']['clustertype'] == 'pbs':
            self.cfg_dict['remote'] = ''
        else:
            self.log.error('Could not determine the nature of your cluster!')
            sys.exit(1)

        # an absolute path in ...['clusterdesc'] will overrule the the "working_dir"
        self.cfg_dict['clusterdesc'] = os.path.join(self.factor_working_dir,
            self.parset['cluster_specific']['clusterdesc'])
Example #47
0
    def go(self):
        """
        Contains functionality of the vdsmaker
        """
        super(vdsmaker, self).go()
        # **********************************************************************
        # 1. Load data from disk create output files
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # Skip items in `data` that have 'skip' set to True
        data.iterator = DataMap.SkipIterator

        # Create output vds names
        vdsnames = [
            os.path.join(self.inputs['directory'],
                         os.path.basename(item.file) + '.vds') for item in data
        ]

        # *********************************************************************
        # 2. Call vdsmaker
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for inp, vdsfile in zip(data, vdsnames):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file,
                               self.config.get('cluster', 'clusterdesc'),
                               vdsfile, self.inputs['makevds']
                           ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        vdsnames = [
            vds for vds, job in zip(vdsnames, jobs)
            if job.results['returncode'] == 0
        ]
        if not vdsnames:
            self.logger.error("All makevds processes failed. Bailing out!")
            return 1

        # *********************************************************************
        # 3. Combine VDS files to produce GDS
        failure = False
        self.logger.info("Combining VDS files")
        executable = self.inputs['combinevds']
        gvds_out = self.inputs['gvds']
        # Create the gvds directory for output files, needed for combine
        create_directory(os.path.dirname(gvds_out))

        try:
            command = [executable, gvds_out] + vdsnames
            combineproc = subprocess.Popen(command,
                                           close_fds=True,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE)
            sout, serr = combineproc.communicate()
            log_process_output(executable, sout, serr, self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)
            self.outputs['gvds'] = gvds_out
            self.logger.info("Wrote combined VDS file: %s" % gvds_out)
        except subprocess.CalledProcessError, cpe:
            self.logger.exception("combinevds failed with status %d: %s" %
                                  (cpe.returncode, serr))
            failure = True
Example #48
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(
            self.inputs['working_directory'], self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug(
            "Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # Location of the output measurement set
        output_correlated_mapfile = os.path.join(self.mapfile_dir, 
                                                 "correlated.mapfile")
        self.output_correlated_data.save(output_correlated_mapfile)
        self.logger.debug(
            "Wrote output correlated mapfile: {0}".format(output_correlated_mapfile))

        # Get pipeline parameters from the toplevel recipe
        # TODO: This is a backdoor option to manually add beamtables when these
        # are missing on the provided ms. There is NO use case for users of the
        # pipeline
        add_beam_tables = self.parset.getBool(
                                    "Imaging.addBeamTables", False)


        number_of_major_cycles = self.parset.getInt(
                                    "Imaging.number_of_major_cycles")

        # Almost always a users wants a partial succes above a failed pipeline
        output_result_of_last_succesfull_cycle = self.parset.getBool(
                            "Imaging.output_on_error", True)


        if number_of_major_cycles < 3:
            self.logger.error(
                "The number of major cycles must be 3 or higher, correct"
                " the key: Imaging.number_of_major_cycles")
            raise PipelineException(
                     "Incorrect number_of_major_cycles in the parset")


        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile, add_beam_tables)

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                        "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile) # copy the output map
        for item in source_list_map:
            item.file = ""             # set all to empty string
        source_list_map.save(source_list_map_path)

        succesfull_cycle_mapfiles_dict = None
        for idx_cycle in range(number_of_major_cycles):
            try:
                # *****************************************************************
                # (2) Create dbs and sky model
                parmdbs_path, sourcedb_map_path = self._create_dbs(
                            concat_ms_map_path, timeslice_map_path, idx_cycle,
                            source_list_map_path = source_list_map_path,
                            skip_create_dbs = False)


                # *****************************************************************
                # (3)  bbs_imager recipe.
                bbs_output = self._bbs(concat_ms_map_path, timeslice_map_path, 
                        parmdbs_path, sourcedb_map_path, idx_cycle, skip = False)

            
                # TODO: Extra recipe: concat timeslices using pyrap.concatms
                # (see prepare) redmine issue #6021
                # Done in imager_bbs.p at the node level after calibration 

                # *****************************************************************
                # (4) Get parameters awimager from the prepare_parset and inputs
                aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                            idx_cycle, sourcedb_map_path, number_of_major_cycles,
                            skip = False)

                # *****************************************************************
                # (5) Source finding
                source_list_map_path, found_sourcedb_path = self._source_finding(
                        aw_image_mapfile, idx_cycle, skip = False)
                # should the output be a sourcedb? instead of a sourcelist

                # save the active mapfiles: locations and content
                # Used to output last succesfull cycle on error
                mapfiles_to_save = {'aw_image_mapfile':aw_image_mapfile,
                                    'source_list_map_path':source_list_map_path,
                                    'found_sourcedb_path':found_sourcedb_path,
                                    'concat_ms_map_path':concat_ms_map_path}
                succesfull_cycle_mapfiles_dict = self._save_active_mapfiles(idx_cycle, 
                                      self.mapfile_dir, mapfiles_to_save)

            # On exception there is the option to output the results of the 
            # last cycle without errors
            except KeyboardInterrupt as ex:
                raise ex

            except Exception as ex:
                self.logger.error("Encountered an fatal exception during self"
                                  "calibration. Aborting processing and return"
                                  " the last succesfull cycle results")
                self.logger.error(str(ex))

                # if we are in the first cycle always exit with exception
                if idx_cycle == 0:
                    raise ex

                if not output_result_of_last_succesfull_cycle:
                    raise ex
                
                # restore the mapfile variables
                aw_image_mapfile = succesfull_cycle_mapfiles_dict['aw_image_mapfile']
                source_list_map_path = succesfull_cycle_mapfiles_dict['source_list_map_path']
                found_sourcedb_path = succesfull_cycle_mapfiles_dict['found_sourcedb_path']
                concat_ms_map_path = succesfull_cycle_mapfiles_dict['concat_ms_map_path']

                # set the number_of_major_cycles to the correct number
                number_of_major_cycles = idx_cycle - 1
                max_cycles_reached = False
                break
            else:
                max_cycles_reached = True


        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map, placed_correlated_map =  \
                                        self._finalize(aw_image_mapfile, 
            processed_ms_dir, ms_per_image_map_path, source_list_map_path,
            minbaseline, maxbaseline, target_mapfile, output_image_mapfile,
            found_sourcedb_path, concat_ms_map_path, output_correlated_mapfile)

        # *********************************************************************
        # (7) Get metadata
        # create a parset with information that is available on the toplevel

        self._get_meta_data(number_of_major_cycles, placed_data_image_map,
                       placed_correlated_map, full_parset, 
                       max_cycles_reached)


        return 0
Example #49
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting imager pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(self.inputs['working_directory'],
                                              self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)
        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile")
        self.output_data.save(output_image_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_image_mapfile))

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._prepare_phase(input_mapfile,
                                    target_mapfile)

        number_of_major_cycles = self.parset.getInt(
            "Imaging.number_of_major_cycles")

        # We start with an empty source_list map. It should contain n_output
        # entries all set to empty strings
        source_list_map_path = os.path.join(self.mapfile_dir,
                                            "initial_sourcelist.mapfile")
        source_list_map = DataMap.load(target_mapfile)  # copy the output map
        for item in source_list_map:
            item.file = ""  # set all to empty string
        source_list_map.save(source_list_map_path)

        for idx_loop in range(number_of_major_cycles):
            # *****************************************************************
            # (2) Create dbs and sky model
            parmdbs_path, sourcedb_map_path = self._create_dbs(
                concat_ms_map_path,
                timeslice_map_path,
                source_list_map_path=source_list_map_path,
                skip_create_dbs=False)

            # *****************************************************************
            # (3)  bbs_imager recipe.
            bbs_output = self._bbs(timeslice_map_path,
                                   parmdbs_path,
                                   sourcedb_map_path,
                                   skip=False)

            # TODO: Extra recipe: concat timeslices using pyrap.concatms
            # (see prepare)

            # *****************************************************************
            # (4) Get parameters awimager from the prepare_parset and inputs
            aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path,
                                                            idx_loop,
                                                            sourcedb_map_path,
                                                            skip=False)

            # *****************************************************************
            # (5) Source finding
            sourcelist_map, found_sourcedb_path = self._source_finding(
                aw_image_mapfile, idx_loop, skip=False)
            # should the output be a sourcedb? instead of a sourcelist

        # TODO: minbaseline should be a parset value as is maxbaseline..
        minbaseline = 0

        # *********************************************************************
        # (6) Finalize:
        placed_data_image_map = self._finalize(
            aw_image_mapfile, processed_ms_dir, ms_per_image_map_path,
            sourcelist_map, minbaseline, maxbaseline, target_mapfile,
            output_image_mapfile, found_sourcedb_path)

        # *********************************************************************
        # (7) Get metadata
        # Create a parset containing the metadata for MAC/SAS
        metadata_file = "%s_feedback_SkyImage" % (self.parset_file, )
        self.run_task(
            "get_metadata",
            placed_data_image_map,
            parset_prefix=(full_parset.getString('prefix') +
                           full_parset.fullModuleName('DataProducts')),
            product_type="SkyImage",
            metadata_file=metadata_file)

        self.send_feedback_processing(parameterset())
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
Example #50
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.

        Note: return 0 on success, 1 on failure.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        #
        # Note that PULP will read many of these fields directly. That makes
        # the following fields, and possibly others, part of the API towards
        # PULP:
        #
        # self.config
        # self.logger
        # self.input_data
        # self.output_data
        # self.parset_feedback_file
        # self.job_dir

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        self.job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(self.job_dir, "parsets")
        mapfile_dir = os.path.join(self.job_dir, "mapfiles")
        
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        # Coherent Stokes
        self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile")
        self.input_data['coherent'].save(self.input_CS_mapfile)
        # Incoherent Stokes
        self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile")
        self.input_data['incoherent'].save(self.input_IS_mapfile)
        # Output data
        self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data['data'].save(self.output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.')
        pulsar_parset = os.path.join(parset_dir, "Pulsar.parset")
        self.pulsar_parms.writeFile(pulsar_parset)
            
        self.logger.debug("Processing: %s" %
          ', '.join(str(f) for f in self.input_data))
        
        # Rebuilding sys.argv without the options given automatically by framework
        # --auto = automatic run from framework
        # -q = quiet mode, no user interaction
        sys.argv = ['pulp.py', '--auto', '-q']
      
        if (not self.coherentStokesEnabled):
          sys.argv.extend(["--noCS", "--noCV", "--noFE"])
          
        if (not self.incoherentStokesEnabled):
          sys.argv.append("--noIS")       

        # Tell PULP where to write the feedback to
        self.parset_feedback_file =  "%s_feedback" % (self.parset_file,)
       
        # Run the pulsar pipeline
        self.logger.debug("Starting pulp with: " + join(sys.argv))
        p = pulp.pulp(self) # TODO: MUCK self to capture the API

        # NOTE: PULP returns 0 on SUCCESS!!
        if p.go():
          self.logger.error("PULP did not succeed. Bailing out!")
          return 1

        # Read and forward the feedback
        try:
          metadata = parameterset(self.parset_feedback_file)
        except IOError, e:
          self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e))
          return 1
    def run(self, executable, environment, parset, working_directory,
            output_image, concatenated_measurement_set, sourcedb_path,
             mask_patch_size, autogenerate_parameters, specify_fov, fov, 
             major_cycle, nr_cycles, perform_self_cal):
        """
        :param executable: Path to awimager executable
        :param environment: environment for catch_segfaults (executable runner)
        :param parset: parameters for the awimager,
        :param working_directory: directory the place temporary files
        :param output_image: location and filesname to story the output images
          the multiple images are appended with type extentions
        :param concatenated_measurement_set: Input measurement set
        :param sourcedb_path: Path the the sourcedb used to create the image 
          mask
        :param mask_patch_size: Scaling of the patch around the source in the 
          mask
        :param autogenerate_parameters: Turns on the autogeneration of: 
           cellsize, npix, wprojplanes, wmax, fov
        :param fov: if  autogenerate_parameters is false calculate 
           imageparameter (cellsize, npix, wprojplanes, wmax) relative to this 
           fov
        :param major_cycle: number of the self calibration cycle to determine 
            the imaging parameters: cellsize, npix, wprojplanes, wmax, fov            
        :param nr_cycles: The requested number of self cal cycles           
        :param perform_self_cal: Bool used to control the selfcal functionality
            or the old semi-automatic functionality       
        :rtype: self.outputs["image"] The path to the output image
        """
        self.logger.info("Start selfcal_awimager node run:")
        log4_cplus_name = "selfcal_awimager"
        self.environment.update(environment)

        with log_time(self.logger):
            # Read the parameters as specified in the parset
            parset_object = get_parset(parset)

            # *************************************************************
            # 1. Calculate awimager parameters that depend on measurement set
            # and the parset
            if perform_self_cal:
                # Calculate awimager parameters that depend on measurement set
                # and the parset              
                self.logger.info(
                   "Calculating selfcalibration parameters  ")
                cell_size, npix, w_max, w_proj_planes, \
                   UVmin, UVmax, robust, threshold =\
                        self._get_selfcal_parameters(
                            concatenated_measurement_set,
                            parset, major_cycle, nr_cycles) 

                self._save_selfcal_info(concatenated_measurement_set, 
                                        major_cycle, npix, UVmin, UVmax)

            else:
                self.logger.info(
                   "Calculating parameters.. ( NOT selfcalibration)")
                cell_size, npix, w_max, w_proj_planes = \
                    self._get_imaging_parameters(
                            concatenated_measurement_set,
                            parset,
                            autogenerate_parameters,
                            specify_fov,
                            fov)

            self.logger.info("Using autogenerated parameters; ")
            self.logger.info(
                 "Calculated parameters: cell_size: {0}, npix: {1}".format(
                     cell_size, npix))

            self.logger.info("w_max: {0}, w_proj_planes: {1} ".format(
                        w_max, w_proj_planes))

            # ****************************************************************
            # 2. Get the target image location from the mapfile for the parset.
            # Create target dir if it not exists
            image_path_head = os.path.dirname(output_image)
            create_directory(image_path_head)
            self.logger.debug("Created directory to place awimager output"
                              " files: {0}".format(image_path_head))

            # ****************************************************************
            # 3. Create the mask
            #mask_file_path = self._create_mask(npix, cell_size, output_image,
            #             concatenated_measurement_set, executable,
            #             working_directory, log4_cplus_name, sourcedb_path,
            #              mask_patch_size, image_path_head)
            # *****************************************************************
            # 4. Update the parset with calculated parameters, and output image
            patch_dictionary = {'uselogger': 'True',  # enables log4cpluscd log
                               'ms': str(concatenated_measurement_set),
                               'cellsize': str(cell_size),
                               'npix': str(npix),
                               'wmax': str(w_max),
                               'wprojplanes': str(w_proj_planes),
                               'image': str(output_image),
                               'maxsupport': str(npix)
                               # 'mask':str(mask_file_path),  #TODO REINTRODUCE
                               # MASK, excluded to speed up in this debug stage                               
                               }

            # Add some aditional keys from the self calibration method
            if perform_self_cal:
                self_cal_patch_dict = {
                               'weight': 'briggs', 
                               'padding': str(1.18),
                               'niter' : str(1000000), 
                               'operation' : 'mfclark',
                               'timewindow' : '300',
                               'fits' : '',
                               'threshold' : str(threshold),
                               'robust' : str(robust),
                               'UVmin' : str(UVmin), 
                               'UVmax' : str(UVmax),
                               'maxbaseline' : str(10000000),
                               'select' : str("sumsqr(UVW[:2])<1e12"), 
                               }
                patch_dictionary.update(self_cal_patch_dict)

            # save the parset at the target dir for the image
            calculated_parset_path = os.path.join(image_path_head,
                                                       "parset.par")

            try:
                temp_parset_filename = patch_parset(parset, patch_dictionary)
                # Copy tmp file to the final location
                shutil.copyfile(temp_parset_filename, calculated_parset_path)
                self.logger.debug("Wrote parset for awimager run: {0}".format(
                                                    calculated_parset_path))
            finally:
                # remove temp file
                os.remove(temp_parset_filename)

            # *****************************************************************
            # 5. Run the awimager with the parameterset

            cmd = [executable, calculated_parset_path]
            self.logger.debug("Parset used for awimager run:")
            self.logger.debug(cmd)
            try:
                with CatchLog4CPlus(working_directory,
                        self.logger.name + "." +
                        os.path.basename(log4_cplus_name),
                        os.path.basename(executable)
                ) as logger:
                    catch_segfaults(cmd, working_directory, self.environment,
                                            logger, usageStats=self.resourceMonitor)

            # Thrown by catch_segfault
            except CalledProcessError, exception:
                self.logger.error(str(exception))
                return 1

            except Exception, exception:
                self.logger.error(str(exception))
                return 1
Example #52
0
    def run(self, executable, environment, parset, working_directory,
            output_image, concatenated_measurement_set, sourcedb_path,
             mask_patch_size, autogenerate_parameters, specify_fov, fov, 
             major_cycle, nr_cycles, perform_self_cal):
        """
        :param executable: Path to awimager executable
        :param environment: environment for catch_segfaults (executable runner)
        :param parset: parameters for the awimager,
        :param working_directory: directory the place temporary files
        :param output_image: location and filesname to story the output images
          the multiple images are appended with type extentions
        :param concatenated_measurement_set: Input measurement set
        :param sourcedb_path: Path the the sourcedb used to create the image 
          mask
        :param mask_patch_size: Scaling of the patch around the source in the 
          mask
        :param autogenerate_parameters: Turns on the autogeneration of: 
           cellsize, npix, wprojplanes, wmax, fov
        :param fov: if  autogenerate_parameters is false calculate 
           imageparameter (cellsize, npix, wprojplanes, wmax) relative to this 
           fov
        :param major_cycle: number of the self calibration cycle to determine 
            the imaging parameters: cellsize, npix, wprojplanes, wmax, fov            
        :param nr_cycles: The requested number of self cal cycles           
        :param perform_self_cal: Bool used to control the selfcal functionality
            or the old semi-automatic functionality       
        :rtype: self.outputs["image"] The path to the output image
        """
        self.logger.info("Start selfcal_awimager node run:")
        log4_cplus_name = "selfcal_awimager"
        self.environment.update(environment)

        with log_time(self.logger):
            # Read the parameters as specified in the parset
            parset_object = get_parset(parset)

            # *************************************************************
            # 1. Calculate awimager parameters that depend on measurement set
            # and the parset
            if perform_self_cal:
                # Calculate awimager parameters that depend on measurement set
                # and the parset              
                self.logger.info(
                   "Calculating selfcalibration parameters  ")
                cell_size, npix, w_max, w_proj_planes, \
                   UVmin, UVmax, robust, threshold =\
                        self._get_selfcal_parameters(
                            concatenated_measurement_set,
                            parset, major_cycle, nr_cycles) 

                self._save_selfcal_info(concatenated_measurement_set, 
                                        major_cycle, npix, UVmin, UVmax)

            else:
                self.logger.info(
                   "Calculating parameters.. ( NOT selfcalibration)")
                cell_size, npix, w_max, w_proj_planes = \
                    self._get_imaging_parameters(
                            concatenated_measurement_set,
                            parset,
                            autogenerate_parameters,
                            specify_fov,
                            fov)

            self.logger.info("Using autogenerated parameters; ")
            self.logger.info(
                 "Calculated parameters: cell_size: {0}, npix: {1}".format(
                     cell_size, npix))

            self.logger.info("w_max: {0}, w_proj_planes: {1} ".format(
                        w_max, w_proj_planes))

            # ****************************************************************
            # 2. Get the target image location from the mapfile for the parset.
            # Create target dir if it not exists
            image_path_head = os.path.dirname(output_image)
            create_directory(image_path_head)
            self.logger.debug("Created directory to place awimager output"
                              " files: {0}".format(image_path_head))

            # ****************************************************************
            # 3. Create the mask
            #mask_file_path = self._create_mask(npix, cell_size, output_image,
            #             concatenated_measurement_set, executable,
            #             working_directory, log4_cplus_name, sourcedb_path,
            #              mask_patch_size, image_path_head)
            # *****************************************************************
            # 4. Update the parset with calculated parameters, and output image
            patch_dictionary = {'uselogger': 'True',  # enables log4cpluscd log
                               'ms': str(concatenated_measurement_set),
                               'cellsize': str(cell_size),
                               'npix': str(npix),
                               'wmax': str(w_max),
                               'wprojplanes': str(w_proj_planes),
                               'image': str(output_image),
                               'maxsupport': str(npix)
                               # 'mask':str(mask_file_path),  #TODO REINTRODUCE
                               # MASK, excluded to speed up in this debug stage                               
                               }

            # Add some aditional keys from the self calibration method
            if perform_self_cal:
                self_cal_patch_dict = {
                               'weight': 'briggs', 
                               'padding': str(1.18),
                               'niter' : str(1000000), 
                               'operation' : 'mfclark',
                               'timewindow' : '300',
                               'fits' : '',
                               'threshold' : str(threshold),
                               'robust' : str(robust),
                               'UVmin' : str(UVmin), 
                               'UVmax' : str(UVmax),
                               'maxbaseline' : str(10000000),
                               'select' : str("sumsqr(UVW[:2])<1e12"), 
                               }
                patch_dictionary.update(self_cal_patch_dict)

            # save the parset at the target dir for the image
            calculated_parset_path = os.path.join(image_path_head,
                                                       "parset.par")

            try:
                temp_parset_filename = patch_parset(parset, patch_dictionary)
                # Copy tmp file to the final location
                shutil.copyfile(temp_parset_filename, calculated_parset_path)
                self.logger.debug("Wrote parset for awimager run: {0}".format(
                                                    calculated_parset_path))
            finally:
                # remove temp file
                os.remove(temp_parset_filename)

            # *****************************************************************
            # 5. Run the awimager with the parameterset

            cmd = [executable, calculated_parset_path]
            self.logger.debug("Parset used for awimager run:")
            self.logger.debug(cmd)
            try:
                with CatchLog4CPlus(working_directory,
                        self.logger.name + "." +
                        os.path.basename(log4_cplus_name),
                        os.path.basename(executable)
                ) as logger:
                    catch_segfaults(cmd, working_directory, self.environment,
                                            logger, usageStats=self.resourceMonitor)

            # Thrown by catch_segfault
            except CalledProcessError as exception:
                self.logger.error(str(exception))
                return 1

            except Exception as exception:
                self.logger.error(str(exception))
                return 1

        # *********************************************************************
        # 6. Return output
        # Append static .restored: This might change but prob. not
        # The actual output image has this extention always, default of
        # awimager
        self.outputs["image"] = output_image + ".restored"
        return 0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            'ObsSW.Observation.ObservationControl.PythonControl.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # Create some needed directories
        job_dir = self.config.get("layout", "job_directory")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(mapfile_dir)
        parset_dir = os.path.join(job_dir, "parsets")
        create_directory(parset_dir)

        # *********************************************************************
        # 2. Copy the instrument files to the correct node
        # The instrument files are currently located on the wrong nodes
        # Copy to correct nodes and assign the instrument table the now
        # correct data

        # Copy the instrument files to the corrent nodes: failures might happen
        # update both intrument and datamap to contain only successes!
        self._copy_instrument_files(mapfile_dir)

        # Write input- and output data map-files.
        data_mapfile = os.path.join(mapfile_dir, "data.mapfile")
        self.input_data['data'].save(data_mapfile)
        copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile")
        self.input_data['instrument'].save(copied_instrument_mapfile)
        self.logger.debug(
            "Wrote input data mapfile: %s" % data_mapfile
        )

        # Save copied files to a new mapfile
        corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile")
        self.output_data['data'].save(corrected_mapfile)
        self.logger.debug(
            "Wrote output corrected data mapfile: %s" % corrected_mapfile
        )

        # Validate number of copied files, abort on zero files copied
        if len(self.input_data['data']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data['data'])
        )

        # *********************************************************************
        # 3. Create database needed for performing work: 
        #    - GVDS, describing data on the compute nodes
        #    - SourceDB, for skymodel (A-team)
        #    - ParmDB for outputtting solutions
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds']

        # Read metadata (e.g., start- and end-time) from the GVDS file.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", data_mapfile,
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']

        # *********************************************************************
        # 4. Run NDPPP to demix the A-Team sources
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task("ndppp",
                data_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.mapfile')
            )['mapfile']

        # ********************************************************************
        # 5. Run bss using the instrument file from the target observation
        # Create an empty sourcedb for BBS
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", data_mapfile
            )['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the target source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task("bbs_reducer",
                dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=copied_instrument_mapfile,
                sky_mapfile=sourcedb_mapfile
            )['data_mapfile']

        # *********************************************************************
        # 6. Copy the MS's to their final output destination.
        # When the copier recipe has run, the map-file named in
        # corrected_mapfile will contain an updated map of output files.
        with duration(self, "copier"):
            self.run_task("copier",
                mapfile_source=bbs_mapfile,
                mapfile_target=corrected_mapfile,
                mapfiles_dir=mapfile_dir,
                mapfile=corrected_mapfile
            )

        # *********************************************************************
        # 7. Create feedback file for further processing by the LOFAR framework
        # (MAC)
        # Create a parset-file containing the metadata for MAC/SAS
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", corrected_mapfile,
                parset_file=self.parset_feedback_file,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')
                ),
                product_type="Correlated")

        return 0
Example #54
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.

        Note: return 0 on success, 1 on failure.
        """
        # *********************************************************************
        # 1. Prepare phase, collect data from parset and input mapfiles.
        #
        # Note that PULP will read many of these fields directly. That makes
        # the following fields, and possibly others, part of the API towards
        # PULP:
        #
        # self.config
        # self.logger
        # self.input_data
        # self.output_data
        # self.parset_feedback_file
        # self.job_dir

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        self.job_dir = self.config.get("layout", "job_directory")
        self.globalfs = self.config.has_option("remote", "globalfs") and self.config.getboolean("remote", "globalfs")
        parset_dir = os.path.join(self.job_dir, "parsets")
        mapfile_dir = os.path.join(self.job_dir, "mapfiles")
        
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        # Coherent Stokes
        self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile")
        self.input_data['coherent'].save(self.input_CS_mapfile)
        # Incoherent Stokes
        self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile")
        self.input_data['incoherent'].save(self.input_IS_mapfile)
        # Output data
        self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile")
        self.output_data['data'].save(self.output_data_mapfile)

        if len(self.input_data) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.')
        pulsar_parset = os.path.join(parset_dir, "Pulsar.parset")

        if self.globalfs:
          # patch for Pulp in case of DOCKER
          for k in [x for x in self.pulsar_parms.keys() if x.endswith("_extra_opts")]:
            self.pulsar_parms.replace(k, self.pulsar_parms[k].getString().replace(" ","\\\\ "))

        self.pulsar_parms.writeFile(pulsar_parset)
            
        self.logger.debug("Processing: %s" %
          ', '.join(str(f) for f in self.input_data))
        
        # Rebuilding sys.argv without the options given automatically by framework
        # --auto = automatic run from framework
        # -q = quiet mode, no user interaction
        sys.argv = ['pulp.py', '--auto', '-q']

        if self.globalfs:
          project = self.parset.getString(self.parset.fullModuleName('Campaign') + '.name')
          sys.argv.extend(['--slurm', '--globalfs', '--docker', '--docker-container=lofar-pulp:%s' % os.environ.get("LOFAR_TAG"), '--raw=/data/projects/%s' % project])
        else:
          sys.argv.append("--auto")
      
        if (not self.coherentStokesEnabled):
          sys.argv.extend(["--noCS", "--noCV", "--noFE"])
          
        if (not self.incoherentStokesEnabled):
          sys.argv.append("--noIS")       

        # Tell PULP where to write the feedback to
        self.parset_feedback_file =  "%s_feedback" % (self.parset_file,)
       
        # Run the pulsar pipeline
        self.logger.debug("Starting pulp with: " + join(sys.argv))
        self.logger.debug("Calling pulp.pulp(self) with self = %s", pprint.pformat(vars(self)))
        p = pulp.pulp(self) # TODO: MUCK self to capture the API

        # NOTE: PULP returns 0 on SUCCESS!!
        if p.go():
          self.logger.error("PULP did not succeed. Bailing out!")
          return 1

        # Read and forward the feedback
        try:
          metadata = parameterset(self.parset_feedback_file)
        except IOError, e:
          self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e))
          return 1
Example #55
0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        self.logger.info("Starting longbaseline pipeline")

        # Define scratch directory to be used by the compute nodes.
        self.scratch_directory = os.path.join(self.inputs['working_directory'],
                                              self.inputs['job_name'])
        # Get input/output-data products specifications.
        self._get_io_product_specs()

        # remove prepending parset identifiers, leave only pipelinecontrol
        full_parset = self.parset
        self.parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')  # remove this

        # Create directories to store communication and data files

        job_dir = self.config.get("layout", "job_directory")

        self.parset_dir = os.path.join(job_dir, "parsets")
        create_directory(self.parset_dir)
        self.mapfile_dir = os.path.join(job_dir, "mapfiles")
        create_directory(self.mapfile_dir)

        # *********************************************************************
        # (INPUT) Get the input from external sources and create pipeline types
        # Input measure ment sets
        input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile")
        self.input_data.save(input_mapfile)

        ## ***************************************************************
        #output_mapfile_path = os.path.join(self.mapfile_dir, "output.mapfile")
        #self.output_mapfile.save(output_mapfile_path)

        # storedata_map(input_mapfile, self.input_data)
        self.logger.debug(
            "Wrote input UV-data mapfile: {0}".format(input_mapfile))

        # Provides location for the scratch directory and concat.ms location
        target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile")
        self.target_data.save(target_mapfile)
        self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile))

        # images datafiles
        output_ms_mapfile = os.path.join(self.mapfile_dir, "output.mapfile")
        self.output_data.save(output_ms_mapfile)
        self.logger.debug(
            "Wrote output sky-image mapfile: {0}".format(output_ms_mapfile))

        # TODO: This is a backdoor option to manually add beamtables when these
        # are missing on the provided ms. There is NO use case for users of the
        # pipeline
        add_beam_tables = self.parset.getBool("Imaging.addBeamTables", False)

        # ******************************************************************
        # (1) prepare phase: copy and collect the ms
        concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \
            processed_ms_dir = self._long_baseline(input_mapfile,
                         target_mapfile, add_beam_tables, output_ms_mapfile)

        # *********************************************************************
        # (7) Get metadata
        # create a parset with information that is available on the toplevel
        toplevel_meta_data = parameterset(
            {'feedback_version': feedback_version})

        # get some parameters from the imaging pipeline parset:
        subbandgroups_per_ms = self.parset.getInt(
            "LongBaseline.subbandgroups_per_ms")
        subbands_per_subbandgroup = self.parset.getInt(
            "LongBaseline.subbands_per_subbandgroup")

        toplevel_meta_data.replace("subbandsPerSubbandGroup",
                                   str(subbands_per_subbandgroup))
        toplevel_meta_data.replace("subbandGroupsPerMS",
                                   str(subbandgroups_per_ms))

        # Create a parset-file containing the metadata for MAC/SAS at nodes
        metadata_file = "%s_feedback_Correlated" % (self.parset_file, )
        self.run_task(
            "get_metadata",
            output_ms_mapfile,
            parset_prefix=(full_parset.getString('prefix') +
                           full_parset.fullModuleName('DataProducts')),
            product_type="Correlated",
            metadata_file=metadata_file)

        self.send_feedback_processing(toplevel_meta_data)
        self.send_feedback_dataproducts(parameterset(metadata_file))

        return 0
    def pipeline_logic(self):
        """
        Define the individual tasks that comprise the current pipeline.
        This method will be invoked by the base-class's `go()` method.
        """
        # *********************************************************************
        # 1. Get input from parset, validate and cast to pipeline 'data types'
        #    Only perform work on existing files
        #    Created needed directories 
        # Create a parameter-subset containing only python-control stuff.
        py_parset = self.parset.makeSubset(
            self.parset.fullModuleName('PythonControl') + '.')

        # Get input/output-data products specifications.
        self._get_io_product_specs()

        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")

        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # Write input- and output data map-files
        input_correlated_mapfile = os.path.join(
            mapfile_dir, "input_correlated.mapfile"
        )
        output_correlated_mapfile = os.path.join(
            mapfile_dir, "output_correlated.mapfile"
        )
        output_instrument_mapfile = os.path.join(
            mapfile_dir, "output_instrument.mapfile"
        )
        self.input_data['correlated'].save(input_correlated_mapfile)
        self.output_data['correlated'].save(output_correlated_mapfile)
        self.output_data['instrument'].save(output_instrument_mapfile)

        if len(self.input_data['correlated']) == 0:
            self.logger.warn("No input data files to process. Bailing out!")
            return 0

        self.logger.debug("Processing: %s" %
            ', '.join(str(f) for f in self.input_data['correlated']))

        # *********************************************************************
        # 2. Create database needed for performing work: 
        #    Vds, descibing data on the nodes
        #    sourcedb, For skymodel (A-team)
        #    parmdb for outputtting solutions
        # Produce a GVDS file describing the data on the compute nodes.
        with duration(self, "vdsmaker"):
            gvds_file = self.run_task(
                "vdsmaker", input_correlated_mapfile
            )['gvds']

        # Read metadata (start, end times, pointing direction) from GVDS.
        with duration(self, "vdsreader"):
            vdsinfo = self.run_task("vdsreader", gvds=gvds_file)

        # Create an empty parmdb for DPPP
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", input_correlated_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'),
                suffix='.dppp.parmdb'
            )['mapfile']

        # Create a sourcedb to be used by the demixing phase of DPPP
        # The user-supplied sky model can either be a name, in which case the
        # pipeline will search for a file <name>.skymodel in the default search
        # path $LOFARROOT/share/pipeline/skymodels; or a full path.
        # It is an error if the file does not exist.
        skymodel = py_parset.getString('PreProcessing.SkyModel')
        if not os.path.isabs(skymodel):
            skymodel = os.path.join(
                # This should really become os.environ['LOFARROOT']
                self.config.get('DEFAULT', 'lofarroot'),
                'share', 'pipeline', 'skymodels', skymodel + '.skymodel'
            )
        if not os.path.isfile(skymodel):
            raise PipelineException("Skymodel %s does not exist" % skymodel)
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_correlated_mapfile,
                mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'),
                skymodel=skymodel,
                suffix='.dppp.sourcedb',
                type='blob'
            )['mapfile']

        # *********************************************************************
        # 3. Run NDPPP to demix the A-Team sources
        #    TODOW: Do flagging?
        # Create a parameter-subset for DPPP and write it to file.
        ndppp_parset = os.path.join(parset_dir, "NDPPP.parset")
        py_parset.makeSubset('DPPP.').writeFile(ndppp_parset)

        # Run the Default Pre-Processing Pipeline (DPPP);
        with duration(self, "ndppp"):
            dppp_mapfile = self.run_task(
                "ndppp", input_correlated_mapfile,
                data_start_time=vdsinfo['start_time'],
                data_end_time=vdsinfo['end_time'],
                demix_always=
                    py_parset.getStringVector('PreProcessing.demix_always'),
                demix_if_needed=
                    py_parset.getStringVector('PreProcessing.demix_if_needed'),
                parset=ndppp_parset,
                parmdb_mapfile=parmdb_mapfile,
                sourcedb_mapfile=sourcedb_mapfile
            )['mapfile']

        # *********************************************************************
        # 4. Run BBS with a model of the calibrator
        #    Create a parmdb for calibration solutions
        #    Create sourcedb with known calibration solutions
        #    Run bbs with both
        # Create an empty parmdb for BBS
        with duration(self, "setupparmdb"):
            parmdb_mapfile = self.run_task(
                "setupparmdb", dppp_mapfile,
                mapfile=os.path.join(mapfile_dir, 'bbs.parmdb.mapfile'),
                suffix='.bbs.parmdb'
            )['mapfile']

        # Create a sourcedb based on sourcedb's input argument "skymodel"
        with duration(self, "setupsourcedb"):
            sourcedb_mapfile = self.run_task(
                "setupsourcedb", input_correlated_mapfile,
                skymodel=os.path.join(
                    self.config.get('DEFAULT', 'lofarroot'),
                    'share', 'pipeline', 'skymodels',
                    py_parset.getString('Calibration.SkyModel') +
                        '.skymodel'),
                mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'),
                suffix='.bbs.sourcedb')['mapfile']

        # Create a parameter-subset for BBS and write it to file.
        bbs_parset = os.path.join(parset_dir, "BBS.parset")
        py_parset.makeSubset('BBS.').writeFile(bbs_parset)

        # Run BBS to calibrate the calibrator source(s).
        with duration(self, "bbs_reducer"):
            bbs_mapfile = self.run_task(
                "bbs_reducer", dppp_mapfile,
                parset=bbs_parset,
                instrument_mapfile=parmdb_mapfile,
                sky_mapfile=sourcedb_mapfile
            )['data_mapfile']

        # *********************************************************************
        # 5. Perform gain outlier correction on the found calibration solutions
        #    Swapping outliers in the gains with the median 
        # Export the calibration solutions using gainoutliercorrection and store
        # the results in the files specified in the instrument mapfile.
        export_instrument_model = py_parset.getBool(
            'Calibration.exportCalibrationParameters', False)

        with duration(self, "gainoutliercorrection"):
            self.run_task("gainoutliercorrection",
                      (parmdb_mapfile, output_instrument_mapfile),
                      sigma=1.0,
                      export_instrument_model=export_instrument_model) # TODO: Parset parameter

        # *********************************************************************
        # 6. Copy corrected MS's to their final output destination.
        with duration(self, "copier"):
            self.run_task("copier",
                mapfile_source=bbs_mapfile,
                mapfile_target=output_correlated_mapfile,
                mapfiles_dir=mapfile_dir,
                mapfile=output_correlated_mapfile
            )

        # *********************************************************************
        # 7. Create feedback file for further processing by the LOFAR framework
        #    a. get metadata of the measurement sets
        #    b. get metadata of the instrument models
        #    c. join the two files and write the final feedback file
        correlated_metadata = os.path.join(parset_dir, "correlated.metadata")
        instrument_metadata = os.path.join(parset_dir, "instrument.metadata")
        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_correlated_mapfile,
                parset_file=correlated_metadata,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="Correlated")

        with duration(self, "get_metadata"):
            self.run_task("get_metadata", output_instrument_mapfile,
                parset_file=instrument_metadata,
                parset_prefix=(
                    self.parset.getString('prefix') +
                    self.parset.fullModuleName('DataProducts')),
                product_type="InstrumentModel")

        parset = parameterset(correlated_metadata)
        parset.adoptFile(instrument_metadata)
        parset.writeFile(self.parset_feedback_file)

        return 0
Example #57
0
    def pipeline_logic(self):
        try:
            parset_file = os.path.abspath(self.inputs['args'][0])
        except IndexError:
            return self.usage()
        try:
            if self.parset.keys == []:
                self.parset.adoptFile(parset_file)
                self.parset_feedback_file = parset_file + "_feedback"
        except RuntimeError:
            print >> sys.stderr, "Error: Parset file not found!"
            return self.usage()
        self._replace_values()
        # just a reminder that this has to be implemented
        validator = GenericPipelineParsetValidation(self.parset)
        if not validator.validate_pipeline():
            self.usage()
            exit(1)
        if not validator.validate_steps():
            self.usage()
            exit(1)

        #set up directories
        job_dir = self.config.get("layout", "job_directory")
        parset_dir = os.path.join(job_dir, "parsets")
        mapfile_dir = os.path.join(job_dir, "mapfiles")
        # Create directories for temporary parset- and map files
        create_directory(parset_dir)
        create_directory(mapfile_dir)

        # *********************************************************************
        # maybe we dont need a subset but just a steplist
        # at the moment only a list with stepnames is given for the pipeline.steps parameter
        # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....]
        # the names will be the prefix for parset subsets
        pipeline_args = self.parset.makeSubset(
            self.parset.fullModuleName('pipeline') + '.')
        pipeline_steps = self.parset.makeSubset(
            self.parset.fullModuleName('steps') + '.')
        # *********************************************************************
        # forward declaration of things. just for better overview and understanding whats in here.
        # some of this might be removed in upcoming iterations, or stuff gets added.
        step_name_list = pipeline_args.getStringVector('steps')
        # construct the step name list if there were pipeline.steps.<subset>
        for item in pipeline_steps.keys():
            if item in step_name_list:
                loc = step_name_list.index(item)
                step_name_list[loc:loc] = pipeline_steps.getStringVector(item)
                step_name_list.remove(item)

        step_control_dict = {}
        step_parset_files = {}
        step_parset_obj = {}
        activeloop = ['']
        # construct the list of step names and controls
        self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
        # initial parameters to be saved in resultsdict so that recipes have access to this step0
        # double init values. 'input' should be considered deprecated
        # self.name would be consistent to use in subpipelines
        input_dictionary = {
            'parset': parset_file,
            'parsetobj': self.parset,
            'parset_dir': parset_dir,
            'mapfile_dir': mapfile_dir}

        resultdicts = {}
        for section in self.config.sections():
            tmp_dict = {}
            for entry in self.config.items(section):
                input_dictionary[entry[0]] = entry[1]
                tmp_dict[entry[0]] = entry[1]
            resultdicts.update({section: copy.deepcopy(tmp_dict)})

        resultdicts.update({'input': input_dictionary})
        resultdicts.update({self.name: input_dictionary})

        if 'pipeline.mapfile' in self.parset.keywords():
            resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile'])
            resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile'])

        # *********************************************************************
        # main loop
        # there is a distinction between recipes and plugins for user scripts.
        # plugins are not used at the moment and might better be replaced with master recipes
        while step_name_list:
            stepname = step_name_list.pop(0)
            self.logger.info("Beginning step %s" % (stepname,))
            step = step_control_dict[stepname]
            #step_parset = step_parset_obj[stepname]
            inputdict = {}
            inputargs = []
            resultdict = {}
            # default kind_of_step to recipe.
            try:
                kind_of_step = step.getString('kind')
            except:
                kind_of_step = 'recipe'
            try:
                typeval = step.getString('type')
            except:
                typeval = ''
            adds = None
            if stepname in step_parset_obj:
                adds = self._construct_step_parset(inputdict,
                                             step_parset_obj[stepname],
                                             resultdicts,
                                             step_parset_files[stepname],
                                             stepname)
            # stepname not a valid input for old recipes
            if kind_of_step == 'recipe':
                if self.task_definitions.get(typeval, 'recipe') == 'executable_args':
                    inputdict['stepname'] = stepname
                    if adds:
                        inputdict.update(adds)

            self._construct_cmdline(inputargs, step, resultdicts)

            if stepname in step_parset_files:
                inputdict['parset'] = step_parset_files[stepname]


            self._construct_input(inputdict, step, resultdicts)
            # hack, popping 'type' is necessary, why? because you deleted kind already in parsets
            try:
                inputdict.pop('type')
            except:
                pass
            try:
                inputdict.pop('kind')
            except:
                pass
            # \hack
            # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input.
            # python buildin functions cant handle the string returned from parset class.
            if 'environment' in inputdict.keys():
                val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '')
                splitval = str(val).split(',')
                valdict = {}
                for item in splitval:
                    valdict[item.split(':')[0]] = item.split(':')[1]
                inputdict['environment'] = valdict

            # subpipeline. goal is to specify a pipeline within a pipeline.
            # load other existing pipeline parset and add them to your own.
            if kind_of_step == 'pipeline':
                subpipeline_parset = Parset()
                subpipeline_parset.adoptFile(typeval)
                submapfile = ''
                subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps')

                if 'pipeline.mapfile' in subpipeline_parset.keywords():
                    submapfile = subpipeline_parset['pipeline.mapfile']
                    subpipeline_parset.remove('pipeline.mapfile')
                if 'mapfile_in' in inputdict.keys():
                    submapfile = inputdict.pop('mapfile_in')
                resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: {
                    'parset': typeval,
                    'mapfile': submapfile,
                }})
                #todo: take care of pluginpathes and everything other then individual steps
                # make a pipeline parse methods that returns everything needed.
                # maybe as dicts to combine them to one

                subpipeline_parset.remove('pipeline.steps')
                if 'pipeline.pluginpath' in subpipeline_parset.keywords():
                    subpipeline_parset.remove('pipeline.pluginpath')
                checklist = copy.deepcopy(subpipeline_steplist)
                for k in self._keys(subpipeline_parset):
                    if 'loopsteps' in k:
                        for item in subpipeline_parset.getStringVector(k):
                            checklist.append(item)
                # *********************************************************************
                # master parset did not handle formatting and comments in the parset.
                # proper format only after use of parset.makesubset. then it is a different object
                # from a different super class :(. this also explains use of parset.keys and parset.keys()
                # take the parset from subpipeline and add it to the master parset.
                # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered.
                # *********************************************************************
                # replace names of steps with the subpipeline stepname to create a unique identifier.
                # replacement values starting with ! will be taken from the master parset and overwrite
                # the ones in the subpipeline. only works if the ! value is already in the subpipeline
                for k in self._keys(subpipeline_parset):
                    val = subpipeline_parset[k]
                    if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'):
                        for item in checklist:
                            if item+".output" in str(val):
                                val = str(val).replace(item, stepname + '-' + item)

                        self.parset.add(stepname + '-' + k, str(val))
                    else:
                        # remove replacements strings to prevent loading the same key twice
                        if k in self._keys(self.parset):
                            self.parset.remove(k)
                        self.parset.add(k, str(val))
                for i, item in enumerate(subpipeline_steplist):
                    subpipeline_steplist[i] = stepname + '-' + item
                for item in step_parset_obj[stepname].keys():
                    for k in self._keys(self.parset):
                        if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip():
                            self.parset.remove(k)
                            self.parset.add('! ' + item, str(step_parset_obj[stepname][item]))
                self._replace_values()

                self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                for j in reversed(subpipeline_steplist):
                    name = j
                    step_control_dict[name] = step_control_dict[j]
                    step_name_list.insert(0, name)


            # loop
            if kind_of_step == 'loop':
                # remember what loop is running to stop it from a conditional step
                if activeloop[0] is not stepname:
                    activeloop.insert(0, stepname)
                # prepare
                counter = 0
                breakloop = False
                if stepname in resultdicts:
                    counter = int(resultdicts[stepname]['counter']) + 1
                    breakloop = resultdicts[stepname]['break']
                loopsteps = step.getStringVector('loopsteps')

                # break at max iteration or when other step sets break variable
                if counter is step.getInt('loopcount'):
                    breakloop = True
                if not breakloop:
                    # add loop steps to the pipeline including the loop itself
                    step_name_list.insert(0, stepname)
                    self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir)
                    for j in reversed(loopsteps):
                        name = j
                        step_control_dict[name] = step_control_dict[j]
                        step_name_list.insert(0, name)
                    # results for other steps to check and write states
                    resultdict = {'counter': counter, 'break': breakloop}
                else:
                    # reset values for second use of the loop (but why would you do that?)
                    resultdict = {'counter': -1, 'break': False}
                    activeloop.pop(0)

            # recipes
            if kind_of_step == 'recipe':
                with duration(self, stepname):
                    resultdict = self.run_task(
                        typeval,
                        inputargs,
                        **inputdict
                    )

            # plugins
            if kind_of_step == 'plugin':
                bla = str(self.config.get('DEFAULT', 'recipe_directories'))
                pluginpath = bla.rstrip(']').lstrip('[').split(',')
                for i, item in enumerate(pluginpath):
                    pluginpath[i] = os.path.join(item, 'plugins')
                if 'pluginpath' in pipeline_args.keys():
                    pluginpath.append(pipeline_args.getString('pluginpath'))
                with duration(self, stepname):
                    resultdict = loader.call_plugin(typeval, pluginpath,
                                                    inputargs,
                                                    **inputdict)
            resultdicts[stepname] = resultdict

            # breaking the loopstep
            # if the step has the keyword for loopbreaks assign the value
            if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict:
                resultdicts[activeloop[0]]['break'] = resultdict['break']
Example #58
0
    def run(self, environment, parset, working_dir, processed_ms_dir,
            ndppp_executable, output_measurement_set, subbandgroups_per_ms,
            subbands_per_subbandgroup, ms_mapfile, asciistat_executable,
            statplot_executable, msselect_executable, rficonsole_executable,
            add_beam_tables, globalfs, final_output_path):
        """
        Entry point for the node recipe
        """
        self.environment.update(environment)
        self.globalfs = globalfs

        with log_time(self.logger):
            input_map = DataMap.load(ms_mapfile)
            #******************************************************************
            # I. Create the directories used in this recipe
            create_directory(processed_ms_dir)
            create_directory(working_dir)

            # time slice dir_to_remove: assure empty directory: Stale data
            # is problematic for dppp
            time_slice_dir = os.path.join(working_dir, _time_slice_dir_name)
            create_directory(time_slice_dir)
            for root, dirs, files in os.walk(time_slice_dir):
                for file_to_remove in files:
                    os.unlink(os.path.join(root, file_to_remove))
                for dir_to_remove in dirs:
                    shutil.rmtree(os.path.join(root, dir_to_remove))
            self.logger.debug("Created directory: {0}".format(time_slice_dir))
            self.logger.debug("and assured it is empty")

            #******************************************************************
            # 1. Copy the input files
            processed_ms_map = self._copy_input_files(processed_ms_dir,
                                                      input_map)

            #******************************************************************
            # 2. run dppp: collect frequencies into larger group
            time_slices_path_list = \
                self._run_dppp(working_dir, time_slice_dir,
                    subbandgroups_per_ms, processed_ms_map, subbands_per_subbandgroup,
                    processed_ms_dir, parset, ndppp_executable)

            # If no timeslices were created, bail out with exit status 1
            if len(time_slices_path_list) == 0:
                self.logger.error("No timeslices were created.")
                self.logger.error("Exiting with error state 1")
                return 1

            self.logger.debug(
                "Produced time slices: {0}".format(time_slices_path_list))

            #***********************************************************
            # 3. run rfi_concole: flag datapoints which are corrupted
            if False:
                self._run_rficonsole(rficonsole_executable, time_slice_dir,
                                     time_slices_path_list)

            #******************************************************************
            # 4. Add imaging columns to each timeslice
            # ndppp_executable fails if not present
            for time_slice_path in time_slices_path_list:
                pt.addImagingColumns(time_slice_path)
                self.logger.debug(
                    "Added imaging columns to time_slice: {0}".format(
                        time_slice_path))

            #*****************************************************************
            # 5. Filter bad stations
            #if not(asciistat_executable == "" or
            #     statplot_executable == "" or
            #     msselect_executable == "" or True):
            if False:
                time_slice_filtered_path_list = self._filter_bad_stations(
                    time_slices_path_list, asciistat_executable,
                    statplot_executable, msselect_executable)
            else:
                # use the unfiltered list
                time_slice_filtered_path_list = time_slices_path_list

            #*****************************************************************
            # 6. Add measurmenttables
            if add_beam_tables:
                self.add_beam_tables(time_slice_filtered_path_list)

            #******************************************************************
            # 7. Perform Convert polarization:
            self._convert_polarization(time_slice_filtered_path_list)

            #******************************************************************
            # 8. Perform the (virtual) concatenation of the timeslices
            self._concat_timeslices(time_slice_filtered_path_list,
                                    output_measurement_set)

            #*****************************************************************
            # 9. Use table.copy(deep=true) to copy the ms to the correct
            # output location: create a new measurement set.
            self._deep_copy_to_output_location(output_measurement_set,
                                               final_output_path)

            # Write the actually used ms for the created dataset to the input
            # mapfile
            processed_ms_map.save(ms_mapfile)

            #******************************************************************
            # return
            self.outputs["time_slices"] = \
                time_slices_path_list

        return 0