Example #1
0
    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"),
            suffix=".%s" % (os.path.basename(__file__), ))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = parmdbm_process.communicate(template % pdbfile)
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError, err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1
Example #2
0
def catch_segfaults(cmd, cwd, env, logger, max=1, cleanup=lambda: None):
    """
    Run cmd in cwd with env, sending output to logger.

    If it segfaults, retry upto max times.
    """
    tries = 0
    while tries <= max:
        if tries > 0:
            logger.debug("Retrying...")
        logger.debug("Running: %s" % (' '.join(cmd), ))
        process = spawn_process(cmd, logger, cwd, env)
        sout, serr = process.communicate()
        log_process_output(cmd[0], sout, serr, logger)
        if process.returncode == 0:
            break
        elif process.returncode == -11:
            logger.warn("%s process segfaulted!" % cmd[0])
            cleanup()
            tries += 1
            continue
        else:
            raise subprocess.CalledProcessError(process.returncode, cmd[0])
    if tries > max:
        logger.error("Too many segfaults from %s; aborted" % (cmd[0]))
        raise subprocess.CalledProcessError(process.returncode, cmd[0])
    return process
Example #3
0
    def _create_parmdb(self, parmdb_executable, target_dir_path):
        """
        _create_parmdb, creates a parmdb_executable at the target_dir_path using 
        the suplied executable. Does not test for existence of target parent dir       
        returns 1 if parmdb_executable failed 0 otherwise
        """
        # Format the template string by inserting the target dir
        formatted_template = _TEMPLATE_PARMDB.format(target_dir_path)
        try:
            # Spawn a subprocess and connect the pipelines
            parmdbm_process = subprocess.Popen(parmdb_executable,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            # Send formatted template on stdin
            sout, serr = communicate_returning_strings(
                parmdbm_process, input=formatted_template)

            # Log the output
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError as oserror:
            self.logger.error("Failed to spawn parmdbm: {0}".format(
                str(oserror)))
            return 1

        return 0
    def _create_parmdb(self, parmdb_executable, target_dir_path):
        """
        _create_parmdb, creates a parmdb_executable at the target_dir_path using 
        the suplied executable. Does not test for existence of target parent dir       
        returns 1 if parmdb_executable failed 0 otherwise
        """
        # Format the template string by inserting the target dir
        formatted_template = _TEMPLATE_PARMDB.format(target_dir_path)
        try:
            # Spawn a subprocess and connect the pipelines
            parmdbm_process = subprocess.Popen(
                parmdb_executable,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            # Send formatted template on stdin
            sout, serr = parmdbm_process.communicate(formatted_template)

            # Log the output
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError, oserror:
            self.logger.error("Failed to spawn parmdbm: {0}".format(
                                                            str(oserror)))
            return 1
Example #5
0
    def go(self):
        self.logger.info("Starting example recipe run")
        super(example, self).go()

        self.logger.info("This is a log message")

        my_process = subprocess.Popen(
            [
                self.inputs['executable']
            ],
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE
        )
        sout, serr = my_process.communicate()
        self.outputs['stdout'] = sout
        log_process_output(
            self.inputs['executable'],
            sout,
            serr,
            self.logger
        )

        if my_process.returncode == 0:
            return 0
        else:
            self.logger.warn(
                "Return code (%d) is not 0." % my_process.returncode
            )
            return 1
Example #6
0
    def _run_bbs_control(self, bbs_parset, run_flag):
        """
        Run BBS Global Control and wait for it to finish. Return its return
        code.
        """
        self.logger.info("Running BBS GlobalControl")
        working_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
        with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl",
                            os.path.basename(self.inputs['control_exec'])):
            with utilities.log_time(self.logger):
                try:
                    bbs_control_process = utilities.spawn_process(
                        [self.inputs['control_exec'], bbs_parset, "0"],
                        self.logger,
                        cwd=working_dir,
                        env=self.environment)
                    # _monitor_process() needs a convenient kill() method.
                    bbs_control_process.kill = lambda: os.kill(
                        bbs_control_process.pid, signal.SIGKILL)
                except OSError as e:
                    self.logger.error("Failed to spawn BBS Control (%s)" %
                                      str(e))
                    self.killswitch.set()
                    return 1
                finally:
                    run_flag.set()

            returncode = self._monitor_process(bbs_control_process,
                                               "BBS Control")
            sout, serr = communicate_returning_strings(bbs_control_process)
        shutil.rmtree(working_dir)
        log_process_output(self.inputs['control_exec'], sout, serr,
                           self.logger)
        return returncode
Example #7
0
def catch_segfaults(cmd, cwd, env, logger, max = 1, cleanup = lambda: None):
    """
    Run cmd in cwd with env, sending output to logger.

    If it segfaults, retry upto max times.
    """
    tries = 0
    while tries <= max:
        if tries > 0:
            logger.debug("Retrying...")
        logger.debug("Running: %s" % (' '.join(cmd),))
        process = spawn_process(cmd, logger, cwd, env)
        sout, serr = process.communicate()
        log_process_output(cmd[0], sout, serr, logger)
        if process.returncode == 0:
            break
        elif process.returncode == -11:
            logger.warn("%s process segfaulted!" % cmd[0])
            cleanup()
            tries += 1
            continue
        else:
            raise subprocess.CalledProcessError(
                process.returncode, cmd[0]
            )
    if tries > max:
        logger.error("Too many segfaults from %s; aborted" % (cmd[0]))
        raise subprocess.CalledProcessError(process.returncode, cmd[0])
    return process
Example #8
0
 def convert_mwimager_parset(parset):
     try:
         with patched_parset(
             parset,
             {
                 'dataset': dataset,
                 'Images.frequency': frequency,
                 'msDirType': ms_dir_type,
                 'msDirRa': ms_dir_ra,
                 'msDirDec': ms_dir_dec,
                 'restore': restore # cimager bug: non-restored image unusable
             }
         ) as cimager_parset:
             fd, converted_parset = tempfile.mkstemp(
                 dir=self.config.get("layout", "job_directory")
             )
             convert_process = spawn_process(
                 [
                     self.inputs['convert_exec'],
                     cimager_parset,
                     converted_parset
                 ],
                 self.logger
             )
             os.close(fd)
             sout, serr = convert_process.communicate()
             log_process_output(self.inputs['convert_exec'], sout, serr, self.logger)
             if convert_process.returncode != 0:
                 raise subprocess.CalledProcessError(
                     convert_process.returncode, convert_exec
                 )
             return converted_parset
     except OSError, e:
         self.logger.error("Failed to spawn convertimagerparset (%s)" % str(e))
         raise
Example #9
0
 def convert_mwimager_parset(parset):
     try:
         with patched_parset(
                 parset,
             {
                 'dataset': dataset,
                 'Images.frequency': frequency,
                 'msDirType': ms_dir_type,
                 'msDirRa': ms_dir_ra,
                 'msDirDec': ms_dir_dec,
                 'restore':
                 restore  # cimager bug: non-restored image unusable
             }) as cimager_parset:
             fd, converted_parset = tempfile.mkstemp(
                 dir=self.config.get("layout", "job_directory"))
             convert_process = spawn_process([
                 self.inputs['convert_exec'], cimager_parset,
                 converted_parset
             ], self.logger)
             os.close(fd)
             sout, serr = convert_process.communicate()
             log_process_output(self.inputs['convert_exec'], sout, serr,
                                self.logger)
             if convert_process.returncode != 0:
                 raise subprocess.CalledProcessError(
                     convert_process.returncode, convert_exec)
             return converted_parset
     except OSError as e:
         self.logger.error("Failed to spawn convertimagerparset (%s)" %
                           str(e))
         raise
     except subprocess.CalledProcessError as e:
         self.logger.error(str(e))
         raise
Example #10
0
    def _run_bbs_kernel(self, host, command, *arguments):
        """
        Run command with arguments on the specified host using ssh. Return its
        return code.

        The resultant process is monitored for failure; see
        _monitor_process() for details.
        """
        try:
            bbs_kernel_process = run_remote_command(
                self.config,
                self.logger,
                host,
                command,
                self.environment,
                arguments=arguments
            )
        except OSError:
            self.logger.exception("BBS Kernel failed to start")
            self.killswitch.set()
            return 1
        result = self._monitor_process(bbs_kernel_process,
                                       "BBS Kernel on %s" % host)
        sout, serr = bbs_kernel_process.communicate()
        serr = serr.replace("Connection to %s closed.\r\n" % host, "")
        log_process_output("SSH session (BBS kernel)", sout, serr, self.logger)
        return result
Example #11
0
    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory")
        )
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location 
        try:
            parmdbm_process = subprocess.Popen(
                [self.inputs['executable']],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            sout, serr = parmdbm_process.communicate(template % pdbfile)
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError, err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1
Example #12
0
    def dispatch(self, logger, config, limiter, id, jobhost, jobport,
                  error, killswitch):

        """
        Dispatch this job to the relevant compute node.

        Note that error is an instance of threading.Event, which will be set
        if the remote job fails for some reason.
        """
        # time the duration of this node
        time_info_start = time.time()
        self.id = id
        limiter[self.host].acquire()
        try:
            if killswitch.isSet():
                logger.debug("Shutdown in progress: not starting remote job")
                self.results['returncode'] = 1
                error.set()
                return 1
            process = run_remote_command(
                config,
                logger,
                self.host,
                self.command,
                {
                    "PATH": os.environ.get('PATH'),
                    "PYTHONPATH": os.environ.get('PYTHONPATH'),
                    "LD_LIBRARY_PATH": os.environ.get('LD_LIBRARY_PATH')
                },
                arguments = [id, jobhost, jobport]
            )
            # Wait for process to finish. In the meantime, if the killswitch
            # is set (by an exception in the main thread), forcibly kill our
            # job off.
            while process.poll() == None:
                if killswitch.isSet():
                    process.kill()
                else:
                    time.sleep(1)
            sout, serr = process.communicate()

            serr = serr.replace("Connection to %s closed.\r\n" % self.host, "")
            log_process_output("Remote command", sout, serr, logger)
        except Exception, e:
            logger.exception("Failed to run remote process %s (%s)" % (self.command, str(e)))
            self.results['returncode'] = 1
            error.set()
            return 1
Example #13
0
    def dispatch(self, logger, config, limiter, id, jobhost, jobport, error,
                 killswitch):
        """
        Dispatch this job to the relevant compute node.

        Note that error is an instance of threading.Event, which will be set
        if the remote job fails for some reason.
        """
        self.id = id
        limiter[self.host].acquire()
        # Start the time after we aquire the lock!
        time_info_start = time.time()
        try:
            if killswitch.isSet():
                logger.debug("Shutdown in progress: not starting remote job")
                self.results['returncode'] = 1
                error.set()
                return 1
            process = run_remote_command(
                config,
                logger,
                self.host,
                self.command, {
                    "PATH": os.environ.get('PATH'),
                    "PYTHONPATH": os.environ.get('PYTHONPATH'),
                    "LD_LIBRARY_PATH": os.environ.get('LD_LIBRARY_PATH'),
                    "LOFARROOT": os.environ.get('LOFARROOT'),
                    "QUEUE_PREFIX": os.environ.get('QUEUE_PREFIX', '')
                },
                arguments=[id, jobhost, jobport])
            # Wait for process to finish. In the meantime, if the killswitch
            # is set (by an exception in the main thread), forcibly kill our
            # job off.
            while process.poll() == None:
                if killswitch.isSet():
                    process.kill()
                else:
                    time.sleep(1)
            sout, serr = process.communicate()

            serr = serr.replace("Connection to %s closed.\r\n" % self.host, "")
            log_process_output("Remote command", sout, serr, logger)
        except Exception, e:
            logger.exception("Failed to run remote process %s (%s)" %
                             (self.command, str(e)))
            self.results['returncode'] = 1
            error.set()
            return 1
Example #14
0
    def go(self):
        self.logger.info("Starting example recipe run")
        super(example, self).go()

        self.logger.info("This is a log message")

        my_process = subprocess.Popen([self.inputs['executable']],
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE)
        sout, serr = communicate_returning_strings(my_process)
        self.outputs['stdout'] = sout
        log_process_output(self.inputs['executable'], sout, serr, self.logger)

        if my_process.returncode == 0:
            return 0
        else:
            self.logger.warn("Return code (%d) is not 0." %
                             my_process.returncode)
            return 1
Example #15
0
def catch_segfaults(cmd,
                    cwd,
                    env,
                    logger,
                    max=1,
                    cleanup=lambda: None,
                    usageStats=None):
    """
    Run cmd in cwd with env, sending output to logger.

    If it segfaults, retry upto max times.
    """
    tries = 0
    while tries <= max:
        if tries > 0:
            logger.debug("Retrying...")
        logger.debug("Running: %s" % (' '.join(cmd), ))
        process = spawn_process(cmd, logger, cwd, env)
        #add the created process to the usageStat object
        if usageStats:
            usageStats.addPID(process.pid)

        if 'casa' in cmd[0]:
            import time
            while process.returncode is None:
                process.poll()
                time.sleep(1)
        sout, serr = process.communicate()
        log_process_output(cmd[0], sout, serr, logger)
        if process.returncode == 0:
            break
        elif process.returncode == -11:
            logger.warn("%s process segfaulted!" % cmd[0])
            cleanup()
            tries += 1
            continue
        else:
            raise subprocess.CalledProcessError(process.returncode, cmd[0])
    if tries > max:
        logger.error("Too many segfaults from %s; aborted" % (cmd[0]))
        raise subprocess.CalledProcessError(process.returncode, cmd[0])
    return process
Example #16
0
def catch_segfaults(cmd, cwd, env, logger, max = 1, cleanup = lambda: None,
                    usageStats = None):
    """
    Run cmd in cwd with env, sending output to logger.

    If it segfaults, retry upto max times.
    """
    tries = 0
    while tries <= max:
        if tries > 0:
            logger.debug("Retrying...")
        logger.debug("Running: %s" % (' '.join(cmd),))
        process = spawn_process(cmd, logger, cwd, env)
        #add the created process to the usageStat object
        if usageStats:
            usageStats.addPID(process.pid)

        if 'casa' in cmd[0]:
	    import time
            while process.returncode is None:
                process.poll()
                time.sleep(1)
        sout, serr = process.communicate()
        log_process_output(cmd[0], sout, serr, logger)
        if process.returncode == 0:
            break
        elif process.returncode == -11:
            logger.warn("%s process segfaulted!" % cmd[0])
            cleanup()
            tries += 1
            continue
        else:
            raise subprocess.CalledProcessError(
                process.returncode, cmd[0]
            )
    if tries > max:
        logger.error("Too many segfaults from %s; aborted" % (cmd[0]))
        raise subprocess.CalledProcessError(process.returncode, cmd[0])
    return process
Example #17
0
    def run(self, executable, initscript, infile, key, db_name, db_user,
            db_host):
        #                           executable: path to KernelControl executable
        #                           initscript:             path to lofarinit.sh
        #                               infile:    MeasurementSet for processing
        #       key, db_name, db_user, db_host:   database connection parameters
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % (infile))
            else:
                self.logger.error("Dataset %s does not exist" % (infile))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up kernel parset")
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile))
            fd, parset_filename = mkstemp()
            kernel_parset = Parset()
            for key, value in {
                    "ObservationPart.Filesystem": filesystem,
                    "ObservationPart.Path": infile,
                    "BBDB.Key": key,
                    "BBDB.Name": db_name,
                    "BBDB.User": db_user,
                    "BBDB.Host": db_host,
                    "ParmLog": "",
                    "ParmLoglevel": "",
                    "ParmDB.Sky": infile + ".sky",
                    "ParmDB.Instrument": infile + ".instrument"
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_filename)
            os.close(fd)
            self.logger.debug("Parset written to %s" % (parset_filename, ))

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp()
            env = read_initscript(self.logger, initscript)
            try:
                cmd = [executable, parset_filename, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                        working_dir,
                        self.logger.name + "." + os.path.basename(infile),
                        os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(cmd,
                                               stdout=PIPE,
                                               stderr=PIPE,
                                               cwd=working_dir)
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(bbs_kernel_process.returncode,
                                             executable)
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
Example #18
0
    def go(self):
        """
        Contains functionality of the vdsmaker
        """
        super(vdsmaker, self).go()
        # **********************************************************************
        # 1. Load data from disk create output files
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # Skip items in `data` that have 'skip' set to True
        data.iterator = DataMap.SkipIterator

        # Create output vds names
        vdsnames = [
            os.path.join(self.inputs['directory'],
                         os.path.basename(item.file) + '.vds') for item in data
        ]

        # *********************************************************************
        # 2. Call vdsmaker
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for inp, vdsfile in zip(data, vdsnames):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file,
                               self.config.get('cluster', 'clusterdesc'),
                               vdsfile, self.inputs['makevds']
                           ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        vdsnames = [
            vds for vds, job in zip(vdsnames, jobs)
            if job.results['returncode'] == 0
        ]
        if not vdsnames:
            self.logger.error("All makevds processes failed. Bailing out!")
            return 1

        # *********************************************************************
        # 3. Combine VDS files to produce GDS
        failure = False
        self.logger.info("Combining VDS files")
        executable = self.inputs['combinevds']
        gvds_out = self.inputs['gvds']
        # Create the gvds directory for output files, needed for combine
        create_directory(os.path.dirname(gvds_out))

        try:
            command = [executable, gvds_out] + vdsnames
            combineproc = subprocess.Popen(command,
                                           close_fds=True,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE)
            sout, serr = combineproc.communicate()
            log_process_output(executable, sout, serr, self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)
            self.outputs['gvds'] = gvds_out
            self.logger.info("Wrote combined VDS file: %s" % gvds_out)
        except subprocess.CalledProcessError, cpe:
            self.logger.exception("combinevds failed with status %d: %s" %
                                  (cpe.returncode, serr))
            failure = True
Example #19
0
File: bbs.py Project: jjdmol/LOFAR
    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory"
        )
        inputs['mapfile'] = self.task_definitions.get('parmdb','mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb','mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(
            self.config.get("layout", "job_directory"),
            "vds",
            "bbs.gvds"
        )
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file,))


        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug(
                "Cleaning BBS database for key %s" % (self.inputs["key"])
            )
            with closing(
                psycopg2.connect(
                    host=self.inputs["db_host"],
                    user=self.inputs["db_user"],
                    database=self.inputs["db_name"]
                )
            ) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT
                )
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"],)
                    )

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process(
                [
                    self.inputs['combinevds'],
                    vds_file,
                ] + vds_files,
                self.logger
            )
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr, self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(
                    combineproc.returncode, command
                )

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
    #                'BBDB.Port': self.inputs['db_name'],
                }
            )
            self.logger.debug("BBS control parset is %s" % (bbs_parset,))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(
                    target=self._run_bbs_control,
                    args=(bbs_parset, run_flag)
                )
                bbs_control.start()
                run_flag.wait()    # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace('master', 'nodes'))
                env = {
                    "LOFARROOT": utilities.read_initscript(self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH": self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool, self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" % (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host, command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'],
                                file,
                                self.inputs['key'],
                                self.inputs['db_name'],
                                self.inputs['db_user'],
                                self.inputs['db_host']
                            ]
                        )
                        bbs_kernels.append(
                            threading.Thread(
                                target=self._run_bbs_kernel,
                                args=(host, command, env, job_id,
                                    jobhost, str(jobport)
                                )
                            )
                        )
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]


                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0
Example #20
0
class bbs(BaseRecipe):
    """
    The bbs recipe coordinates running BBS on a group of MeasurementSets. It
    runs both GlobalControl and KernelControl; as yet, SolverControl has not
    been integrated.

    The recipe will also run the sourcedb and parmdb recipes on each of the
    input MeasuementSets.

    **Arguments**

    A mapfile describing the data to be processed.
    """
    inputs = {
        'control_exec':
        ingredient.ExecField('--control-exec',
                             dest="control_exec",
                             help="BBS Control executable"),
        'kernel_exec':
        ingredient.ExecField('--kernel-exec',
                             dest="kernel_exec",
                             help="BBS Kernel executable"),
        'initscript':
        ingredient.FileField('--initscript',
                             dest="initscript",
                             help="Initscript to source (ie, lofarinit.sh)"),
        'parset':
        ingredient.FileField('-p',
                             '--parset',
                             dest="parset",
                             help="BBS configuration parset"),
        'key':
        ingredient.StringField('--key',
                               dest="key",
                               help="Key to identify BBS session"),
        'db_host':
        ingredient.StringField('--db-host',
                               dest="db_host",
                               help="Database host with optional port"),
        'db_user':
        ingredient.StringField('--db-user',
                               dest="db_user",
                               help="Database user"),
        'db_name':
        ingredient.StringField('--db-name',
                               dest="db_name",
                               help="Database name"),
        'makevds':
        ingredient.ExecField('--makevds', help="makevds executable"),
        'combinevds':
        ingredient.ExecField('--combinevds', help="combinevds executable"),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'makesourcedb':
        ingredient.ExecField('--makesourcedb', help="makesourcedb executable"),
        'parmdbm':
        ingredient.ExecField('--parmdbm', help="parmdbm executable"),
        'skymodel':
        ingredient.FileField('-s',
                             '--skymodel',
                             dest="skymodel",
                             help="Input sky catalogue")
    }

    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory")
        inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(self.config.get("layout", "job_directory"),
                                "vds", "bbs.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file, ))

        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug("Cleaning BBS database for key %s" %
                              (self.inputs["key"]))
            with closing(
                    psycopg2.connect(
                        host=self.inputs["db_host"],
                        user=self.inputs["db_user"],
                        database=self.inputs["db_name"])) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"], ))

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp()
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process([
                self.inputs['combinevds'],
                vds_file,
            ] + vds_files, self.logger)
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr,
                               self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #                'BBDB.Port': self.inputs['db_name'],
                })
            self.logger.debug("BBS control parset is %s" % (bbs_parset, ))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(target=self._run_bbs_control,
                                               args=(bbs_parset, run_flag))
                bbs_control.start()
                run_flag.wait()  # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace(
                    'master', 'nodes'))
                env = {
                    "LOFARROOT":
                    utilities.read_initscript(
                        self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH":
                    self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH":
                    self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool,
                                self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" %
                                      (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host,
                            command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'], file,
                                self.inputs['key'], self.inputs['db_name'],
                                self.inputs['db_user'], self.inputs['db_host']
                            ])
                        bbs_kernels.append(
                            threading.Thread(target=self._run_bbs_kernel,
                                             args=(host, command, env, job_id,
                                                   jobhost, str(jobport))))
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]

                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0

    def _run_bbs_kernel(self, host, command, env, *arguments):
        """
        Run command with arguments on the specified host using ssh. Return its
        return code.

        The resultant process is monitored for failure; see
        _monitor_process() for details.
        """
        try:
            bbs_kernel_process = run_remote_command(self.config,
                                                    self.logger,
                                                    host,
                                                    command,
                                                    env,
                                                    arguments=arguments)
        except Exception, e:
            self.logger.exception("BBS Kernel failed to start")
            self.killswitch.set()
            return 1
        result = self._monitor_process(bbs_kernel_process,
                                       "BBS Kernel on %s" % host)
        sout, serr = bbs_kernel_process.communicate()
        serr = serr.replace("Connection to %s closed.\r\n" % host, "")
        log_process_output("SSH session (BBS kernel)", sout, serr, self.logger)
        return result
Example #21
0
    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"),
            suffix=".%s" % (os.path.basename(__file__), ))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = communicate_returning_strings(
                parmdbm_process, input=(template % pdbfile).encode())
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError as err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1

        # *********************************************************************
        # 2. Call node side of recipe with template and possible targets
        #    If output location are provided as input these are validated.
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            args = self.inputs['args']
            self.logger.debug("Loading input-data mapfile: %s" % args[0])
            indata = DataMap.load(args[0])
            if len(args) > 1:
                # If output location provide validate the input and outputmap
                self.logger.debug("Loading output-data mapfile: %s" % args[1])
                outdata = DataMap.load(args[1])
                if not validate_data_maps(indata, outdata):
                    self.logger.error(
                        "Validation of input/output data mapfiles failed")
                    return 1
                # else output location is inputlocation+suffix
            else:
                outdata = copy.deepcopy(indata)
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        os.path.basename(item.file) + self.inputs['suffix'])
            #  Call the node side
            command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
            outdata.iterator = DataMap.SkipIterator
            jobs = []
            for outp in outdata:
                jobs.append(
                    ComputeJob(outp.host,
                               command,
                               arguments=[pdbfile, outp.file]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for job, outp in zip(jobs, outdata):
                # If the returncode is 123456, failing ssh
                if job.results['returncode'] == 123456:
                    self.logger.warning(
                        "ssh connection with {0} failed."
                        "Skipping further work on this task".format(outp.host))
                    self.logger.warning("Error code 123456.")
                    outp.skip = True
                elif job.results['returncode'] != 0:
                    outp.skip = True

        # *********************************************************************
        # 3. validate performance, cleanup of temp files, construct output
        finally:
            self.logger.debug("Removing template parmdb")
            shutil.rmtree(pdbdir, ignore_errors=True)

        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing parmdb map file: %s" %
                          self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Example #22
0
    def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time):
        #       imager_exec:                          path to cimager executable
        #               vds:           VDS file describing the data to be imaged
        #            parset:                                imager configuration
        #        resultsdir:                         place resulting images here
        #        start_time:                        )    time range to be imaged
        #          end_time:                        )   in seconds (may be None)
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            self.logger.info("Processing %s" % (vds, ))

            #    Bail out if destination exists (can thus resume a partial run).
            #                                            Should be configurable?
            # ------------------------------------------------------------------
            parset_data = Parset(parset)
            image_names = parset_data.getStringVector("Cimager.Images.Names")
            for image_name in image_names:
                outputfile = os.path.join(resultsdir, image_name + ".restored")
                self.logger.info(outputfile)
                if os.path.exists(outputfile):
                    self.logger.info("Image already exists: aborting.")
                    return 0
            try:
                working_dir = mkdtemp(suffix=".%s" %
                                      (os.path.basename(__file__), ))

                #   If a time range has been specified, copy that section of the
                #                                  input MS and only image that.
                # --------------------------------------------------------------
                query = []
                if start_time:
                    self.logger.debug("Start time is %s" % start_time)
                    start_time = quantity(float(start_time), 's')
                    query.append("TIME > %f" % start_time.get('s').get_value())
                if end_time:
                    self.logger.debug("End time is %s" % end_time)
                    end_time = quantity(float(end_time), 's')
                    query.append("TIME < %f" % end_time.get('s').get_value())
                query = " AND ".join(query)
                if query:
                    #                             Select relevant section of MS.
                    # ----------------------------------------------------------
                    self.logger.debug("Query is %s" % query)
                    output = os.path.join(working_dir, "timeslice.MS")
                    vds_parset = get_parset(vds)
                    t = table(vds_parset.getString("FileName"))
                    t.query(query, name=output)
                    #       Patch updated information into imager configuration.
                    # ----------------------------------------------------------
                    parset = patch_parset(parset, {'Cimager.dataset': output})
                else:
                    self.logger.debug("No time range selected")

                self.logger.debug("Running cimager")
                with CatchLog4CXX(
                        working_dir,
                        self.logger.name + "." + os.path.basename(vds)):
                    cimager_process = Popen([imager_exec, "-inputs", parset],
                                            stdout=PIPE,
                                            stderr=PIPE,
                                            cwd=working_dir)
                    sout, serr = cimager_process.communicate()
                log_process_output("cimager", sout, serr, self.logger)
                if cimager_process.returncode != 0:
                    raise CalledProcessError(cimager_process.returncode,
                                             imager_exec)

                #        Dump the resulting images in the pipeline results area.
                #    I'm not aware of a foolproof way to predict the image names
                #                that will be produced, so we read them from the
                #                      parset and add standard cimager prefixes.
                # --------------------------------------------------------------
                parset_data = Parset(parset)
                image_names = parset_data.getStringVector(
                    "Cimager.Images.Names")
                prefixes = [
                    "image", "psf", "residual", "weights", "sensitivity"
                ]
                self.logger.debug("Copying images to %s" % resultsdir)
                for image_name in image_names:
                    for prefix in prefixes:
                        filename = image_name.replace("image", prefix, 1)
                        shutil.move(os.path.join(working_dir, filename),
                                    os.path.join(resultsdir, filename))
                    if parset_data.getBool('Cimager.restore'):
                        shutil.move(
                            os.path.join(working_dir,
                                         image_name + ".restored"),
                            os.path.join(resultsdir, image_name + ".restored"))
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
Example #23
0
                    # _monitor_process() needs a convenient kill() method.
                    bbs_control_process.kill = lambda: os.kill(
                        bbs_control_process.pid, signal.SIGKILL)
                except OSError, e:
                    self.logger.error("Failed to spawn BBS Control (%s)" %
                                      str(e))
                    self.killswitch.set()
                    return 1
                finally:
                    run_flag.set()

            returncode = self._monitor_process(bbs_control_process,
                                               "BBS Control")
            sout, serr = bbs_control_process.communicate()
        shutil.rmtree(working_dir)
        log_process_output(self.inputs['control_exec'], sout, serr,
                           self.logger)
        return returncode

    def _monitor_process(self, process, name="Monitored process"):
        """
        Monitor a process for successful exit. If it fails, set the kill
        switch, so everything else gets killed too. If the kill switch is set,
        then kill this process off.

        Name is an optional parameter used only for identification in logs.
        """
        while True:
            try:
                returncode = process.poll()
                if returncode == None:  # Process still running
                    time.sleep(1)
Example #24
0
    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory")
        inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(self.config.get("layout", "job_directory"),
                                "vds", "bbs.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file, ))

        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug("Cleaning BBS database for key %s" %
                              (self.inputs["key"]))
            with closing(
                    psycopg2.connect(
                        host=self.inputs["db_host"],
                        user=self.inputs["db_user"],
                        database=self.inputs["db_name"])) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"], ))

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process([
                self.inputs['combinevds'],
                vds_file,
            ] + vds_files, self.logger)
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr,
                               self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #                'BBDB.Port': self.inputs['db_name'],
                })
            self.logger.debug("BBS control parset is %s" % (bbs_parset, ))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(target=self._run_bbs_control,
                                               args=(bbs_parset, run_flag))
                bbs_control.start()
                run_flag.wait()  # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace(
                    'master', 'nodes'))
                env = {
                    "LOFARROOT":
                    utilities.read_initscript(
                        self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH":
                    self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH":
                    self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool,
                                self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" %
                                      (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host,
                            command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'], file,
                                self.inputs['key'], self.inputs['db_name'],
                                self.inputs['db_user'], self.inputs['db_host']
                            ])
                        bbs_kernels.append(
                            threading.Thread(target=self._run_bbs_kernel,
                                             args=(host, command, env, job_id,
                                                   jobhost, str(jobport))))
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]

                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0
Example #25
0
    def run(self, executable, infiles, db_key, db_name, db_user, db_host):
        """
        Depricated functionality
        """
        # executable : path to KernelControl executable
        # infiles    : tuple of MS, instrument- and sky-model files
        # db_*       : database connection parameters
        # ----------------------------------------------------------------------
        self.logger.debug("executable = %s" % executable)
        self.logger.debug("infiles = %s" % str(infiles))
        self.logger.debug("db_key = %s" % db_key)
        self.logger.debug("db_name = %s" % db_name)
        self.logger.debug("db_user = %s" % db_user)
        self.logger.debug("db_host = %s" % db_host)

        (ms, parmdb_instrument, parmdb_sky) = infiles

        with log_time(self.logger):
            if os.path.exists(ms):
                self.logger.info("Processing %s" % (ms))
            else:
                self.logger.error("Dataset %s does not exist" % (ms))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up BBSKernel parset")
            # Getting the filesystem must be done differently, using the
            # DataProduct keys in the parset provided by the scheduler.
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms))
            fd, parset_file = mkstemp()
            kernel_parset = parameterset()
            for key, value in {
                    "ObservationPart.Filesystem": filesystem,
                    "ObservationPart.Path": ms,
                    "BBDB.Key": db_key,
                    "BBDB.Name": db_name,
                    "BBDB.User": db_user,
                    "BBDB.Host": db_host,
                    "ParmDB.Sky": parmdb_sky,
                    "ParmDB.Instrument": parmdb_instrument
            }.items():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_file)
            os.close(fd)
            self.logger.debug("BBSKernel parset written to %s" % parset_file)

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" %
                                  (os.path.basename(__file__), ))
            try:
                self.logger.info("******** {0}".format(
                    open(parset_file).read()))
                cmd = [executable, parset_file, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                        working_dir,
                        self.logger.name + "." + os.path.basename(ms),
                        os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(cmd,
                                               stdout=PIPE,
                                               stderr=PIPE,
                                               cwd=working_dir)
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(bbs_kernel_process.returncode,
                                             executable)
            except CalledProcessError as e:
                self.logger.error(str(e))
                return 1
            finally:
                os.unlink(parset_file)
                shutil.rmtree(working_dir)
            return 0
Example #26
0
    def run(self, imager_exec, vds, parset, resultsdir, start_time, end_time):
        #       imager_exec:                          path to cimager executable
        #               vds:           VDS file describing the data to be imaged
        #            parset:                                imager configuration
        #        resultsdir:                         place resulting images here
        #        start_time:                        )    time range to be imaged
        #          end_time:                        )   in seconds (may be None)
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            self.logger.info("Processing %s" % (vds,))

            #    Bail out if destination exists (can thus resume a partial run).
            #                                            Should be configurable?
            # ------------------------------------------------------------------
            parset_data = Parset(parset)
            image_names = parset_data.getStringVector("Cimager.Images.Names")
            for image_name in image_names:
                outputfile = os.path.join(resultsdir, image_name + ".restored")
                self.logger.info(outputfile)
                if os.path.exists(outputfile):
                    self.logger.info("Image already exists: aborting.")
                    return 0
            try:
                working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))

                #   If a time range has been specified, copy that section of the
                #                                  input MS and only image that.
                # --------------------------------------------------------------
                query = []
                if start_time:
                    self.logger.debug("Start time is %s" % start_time)
                    start_time = quantity(float(start_time), 's')
                    query.append("TIME > %f" % start_time.get('s').get_value())
                if end_time:
                    self.logger.debug("End time is %s" % end_time)
                    end_time = quantity(float(end_time), 's')
                    query.append("TIME < %f" % end_time.get('s').get_value())
                query = " AND ".join(query)
                if query:
                    #                             Select relevant section of MS.
                    # ----------------------------------------------------------
                    self.logger.debug("Query is %s" % query)
                    output = os.path.join(working_dir, "timeslice.MS")
                    vds_parset = get_parset(vds)
                    t = table(vds_parset.getString("FileName"))
                    t.query(query, name=output)
                    #       Patch updated information into imager configuration.
                    # ----------------------------------------------------------
                    parset = patch_parset(parset,
                        {
                            'Cimager.dataset': output
                        }
                    )
                else:
                    self.logger.debug("No time range selected")

                self.logger.debug("Running cimager")
                with CatchLog4CXX(
                    working_dir,
                    self.logger.name + "." + os.path.basename(vds)
                ):
                    cimager_process = Popen(
                        [imager_exec, "-inputs", parset],
                        stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = cimager_process.communicate()
                log_process_output("cimager", sout, serr, self.logger)
                if cimager_process.returncode != 0:
                    raise CalledProcessError(
                        cimager_process.returncode, imager_exec
                    )

                #        Dump the resulting images in the pipeline results area.
                #    I'm not aware of a foolproof way to predict the image names
                #                that will be produced, so we read them from the
                #                      parset and add standard cimager prefixes.
                # --------------------------------------------------------------
                parset_data = Parset(parset)
                image_names = parset_data.getStringVector("Cimager.Images.Names")
                prefixes = [
                    "image", "psf", "residual", "weights", "sensitivity"
                ]
                self.logger.debug("Copying images to %s" % resultsdir)
                for image_name in image_names:
                    for prefix in prefixes:
                        filename = image_name.replace("image", prefix, 1)
                        shutil.move(
                            os.path.join(working_dir, filename),
                            os.path.join(resultsdir, filename)
                        )
                    if parset_data.getBool('Cimager.restore'):
                        shutil.move(
                            os.path.join(working_dir, image_name + ".restored"),
                            os.path.join(resultsdir, image_name + ".restored")
                        )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
Example #27
0
File: bbs.py Project: jjdmol/LOFAR
    def run(
        self, executable, initscript, infile, key, db_name, db_user, db_host
    ):
        #                           executable: path to KernelControl executable
        #                           initscript:             path to lofarinit.sh
        #                               infile:    MeasurementSet for processing
        #       key, db_name, db_user, db_host:   database connection parameters
        # ----------------------------------------------------------------------
        with log_time(self.logger):
            if os.path.exists(infile):
                self.logger.info("Processing %s" % (infile))
            else:
                self.logger.error("Dataset %s does not exist" % (infile))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up kernel parset")
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile))
            fd, parset_filename = mkstemp()
            kernel_parset = Parset()
            for key, value in {
                "ObservationPart.Filesystem": filesystem,
                "ObservationPart.Path": infile,
                "BBDB.Key": key,
                "BBDB.Name": db_name,
                "BBDB.User": db_user,
                "BBDB.Host": db_host,
                "ParmLog": "",
                "ParmLoglevel": "",
                "ParmDB.Sky": infile + ".sky",
                "ParmDB.Instrument": infile + ".instrument"
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_filename)
            os.close(fd)
            self.logger.debug("Parset written to %s" % (parset_filename,))


            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            env = read_initscript(self.logger, initscript)
            try:
                cmd = [executable, parset_filename, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                    working_dir,
                    self.logger.name + "." + os.path.basename(infile),
                    os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(
                        cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(
                        bbs_kernel_process.returncode, executable
                    )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
Example #28
0
    def run(self, executable, infiles, db_key, db_name, db_user, db_host):
        """
        Depricated functionality
        """
        # executable : path to KernelControl executable
        # infiles    : tuple of MS, instrument- and sky-model files
        # db_*       : database connection parameters
        # ----------------------------------------------------------------------
        self.logger.debug("executable = %s" % executable)
        self.logger.debug("infiles = %s" % str(infiles))
        self.logger.debug("db_key = %s" % db_key)
        self.logger.debug("db_name = %s" % db_name)
        self.logger.debug("db_user = %s" % db_user)
        self.logger.debug("db_host = %s" % db_host)

        (ms, parmdb_instrument, parmdb_sky) = infiles

        with log_time(self.logger):
            if os.path.exists(ms):
                self.logger.info("Processing %s" % (ms))
            else:
                self.logger.error("Dataset %s does not exist" % (ms))
                return 1

            #        Build a configuration parset specifying database parameters
            #                                                     for the kernel
            # ------------------------------------------------------------------
            self.logger.debug("Setting up BBSKernel parset")
            # Getting the filesystem must be done differently, using the
            # DataProduct keys in the parset provided by the scheduler.
            filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms))
            fd, parset_file = mkstemp()
            kernel_parset = parameterset()
            for key, value in {
                "ObservationPart.Filesystem": filesystem,
                "ObservationPart.Path": ms,
                "BBDB.Key": db_key,
                "BBDB.Name": db_name,
                "BBDB.User": db_user,
                "BBDB.Host": db_host,
                "ParmDB.Sky": parmdb_sky,
                "ParmDB.Instrument": parmdb_instrument
            }.iteritems():
                kernel_parset.add(key, value)
            kernel_parset.writeFile(parset_file)
            os.close(fd)
            self.logger.debug("BBSKernel parset written to %s" % parset_file)

            #                                                     Run the kernel
            #               Catch & log output from the kernel logger and stdout
            # ------------------------------------------------------------------
            working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            try:
                self.logger.info("******** {0}".format(open(parset_file).read()))
                cmd = [executable, parset_file, "0"]
                self.logger.debug("Executing BBS kernel")
                with CatchLog4CPlus(
                    working_dir,
                    self.logger.name + "." + os.path.basename(ms),
                    os.path.basename(executable),
                ):
                    bbs_kernel_process = Popen(
                        cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir
                    )
                    sout, serr = bbs_kernel_process.communicate()
                log_process_output("BBS kernel", sout, serr, self.logger)
                if bbs_kernel_process.returncode != 0:
                    raise CalledProcessError(
                        bbs_kernel_process.returncode, executable
                    )
            except CalledProcessError, e:
                self.logger.error(str(e))
                return 1
            finally:
Example #29
0
                                    bbs_control_process.pid, signal.SIGKILL)
                except OSError, e:
                    self.logger.error(
                            "Failed to spawn BBS Control (%s)" % str(e))
                    self.killswitch.set()
                    return 1
                finally:
                    run_flag.set()

            returncode = self._monitor_process(
                bbs_control_process, "BBS Control"
            )
            sout, serr = bbs_control_process.communicate()
        shutil.rmtree(working_dir)
        log_process_output(
            self.inputs['control_exec'], sout, serr, self.logger
        )
        return returncode

    def _monitor_process(self, process, name="Monitored process"):
        """
        Monitor a process for successful exit. If it fails, set the kill
        switch, so everything else gets killed too. If the kill switch is set,
        then kill this process off.

        Name is an optional parameter used only for identification in logs.
        """
        while True:
            try:
                returncode = process.poll()
                # Process still running