예제 #1
0
파일: cimager.py 프로젝트: mfkiwl/lofar-1
 def convert_mwimager_parset(parset):
     try:
         with patched_parset(
                 parset,
             {
                 'dataset': dataset,
                 'Images.frequency': frequency,
                 'msDirType': ms_dir_type,
                 'msDirRa': ms_dir_ra,
                 'msDirDec': ms_dir_dec,
                 'restore':
                 restore  # cimager bug: non-restored image unusable
             }) as cimager_parset:
             fd, converted_parset = tempfile.mkstemp(
                 dir=self.config.get("layout", "job_directory"))
             convert_process = spawn_process([
                 self.inputs['convert_exec'], cimager_parset,
                 converted_parset
             ], self.logger)
             os.close(fd)
             sout, serr = convert_process.communicate()
             log_process_output(self.inputs['convert_exec'], sout, serr,
                                self.logger)
             if convert_process.returncode != 0:
                 raise subprocess.CalledProcessError(
                     convert_process.returncode, convert_exec)
             return converted_parset
     except OSError as e:
         self.logger.error("Failed to spawn convertimagerparset (%s)" %
                           str(e))
         raise
     except subprocess.CalledProcessError as e:
         self.logger.error(str(e))
         raise
예제 #2
0
파일: cimager.py 프로젝트: jjdmol/LOFAR
 def convert_mwimager_parset(parset):
     try:
         with patched_parset(
             parset,
             {
                 'dataset': dataset,
                 'Images.frequency': frequency,
                 'msDirType': ms_dir_type,
                 'msDirRa': ms_dir_ra,
                 'msDirDec': ms_dir_dec,
                 'restore': restore # cimager bug: non-restored image unusable
             }
         ) as cimager_parset:
             fd, converted_parset = tempfile.mkstemp(
                 dir=self.config.get("layout", "job_directory")
             )
             convert_process = spawn_process(
                 [
                     self.inputs['convert_exec'],
                     cimager_parset,
                     converted_parset
                 ],
                 self.logger
             )
             os.close(fd)
             sout, serr = convert_process.communicate()
             log_process_output(self.inputs['convert_exec'], sout, serr, self.logger)
             if convert_process.returncode != 0:
                 raise subprocess.CalledProcessError(
                     convert_process.returncode, convert_exec
                 )
             return converted_parset
     except OSError, e:
         self.logger.error("Failed to spawn convertimagerparset (%s)" % str(e))
         raise
예제 #3
0
def run_via_mpiexec(logger, command, arguments, host):
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring = ["mpiexec", "-x", "-np=1", "/bin/sh", "-c", "hostname && " + command]
    process = spawn_process(commandstring, logger)
    process.kill = lambda : os.kill(process.pid, signal.SIGKILL)
    return process
예제 #4
0
 def _run_bbs_control(self, bbs_parset, run_flag):
     """
     Run BBS Global Control and wait for it to finish. Return its return
     code.
     """
     env = utilities.read_initscript(self.logger, self.inputs['initscript'])
     self.logger.info("Running BBS GlobalControl")
     working_dir = tempfile.mkdtemp(suffix=".%s" %
                                    (os.path.basename(__file__), ))
     with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl",
                         os.path.basename(self.inputs['control_exec'])):
         with utilities.log_time(self.logger):
             try:
                 bbs_control_process = utilities.spawn_process(
                     [self.inputs['control_exec'], bbs_parset, "0"],
                     self.logger,
                     cwd=working_dir,
                     env=env)
                 # _monitor_process() needs a convenient kill() method.
                 bbs_control_process.kill = lambda: os.kill(
                     bbs_control_process.pid, signal.SIGKILL)
             except OSError, e:
                 self.logger.error("Failed to spawn BBS Control (%s)" %
                                   str(e))
                 self.killswitch.set()
                 return 1
             finally:
예제 #5
0
    def _run_bbs_control(self, bbs_parset, run_flag):
        """
        Run BBS Global Control and wait for it to finish. Return its return
        code.
        """
        self.logger.info("Running BBS GlobalControl")
        working_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
        with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl",
                            os.path.basename(self.inputs['control_exec'])):
            with utilities.log_time(self.logger):
                try:
                    bbs_control_process = utilities.spawn_process(
                        [self.inputs['control_exec'], bbs_parset, "0"],
                        self.logger,
                        cwd=working_dir,
                        env=self.environment)
                    # _monitor_process() needs a convenient kill() method.
                    bbs_control_process.kill = lambda: os.kill(
                        bbs_control_process.pid, signal.SIGKILL)
                except OSError as e:
                    self.logger.error("Failed to spawn BBS Control (%s)" %
                                      str(e))
                    self.killswitch.set()
                    return 1
                finally:
                    run_flag.set()

            returncode = self._monitor_process(bbs_control_process,
                                               "BBS Control")
            sout, serr = communicate_returning_strings(bbs_control_process)
        shutil.rmtree(working_dir)
        log_process_output(self.inputs['control_exec'], sout, serr,
                           self.logger)
        return returncode
예제 #6
0
def run_via_mpiexec_cep(logger, command, arguments, host):
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring = ["mpiexec", "-x", "PYTHONPATH", "-x", "LD_LIBRARY_PATH", "-x", "PATH", "-H", host, "/bin/sh", "-c", "hostname ; " + command]
    process = spawn_process(commandstring, logger)
    process.kill = lambda : os.kill(process.pid, signal.SIGKILL)
    return process
예제 #7
0
파일: new_bbs.py 프로젝트: jjdmol/LOFAR
 def _run_bbs_control(self, bbs_parset, run_flag):
     """
     Run BBS Global Control and wait for it to finish. Return its return
     code.
     """
     self.logger.info("Running BBS GlobalControl")
     working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
     with CatchLog4CPlus(
         working_dir,
         self.logger.name + ".GlobalControl",
         os.path.basename(self.inputs['control_exec'])
     ):
         with utilities.log_time(self.logger):
             try:
                 bbs_control_process = utilities.spawn_process(
                     [
                         self.inputs['control_exec'],
                         bbs_parset,
                         "0"
                     ],
                     self.logger,
                     cwd=working_dir,
                     env=self.environment
                 )
                 # _monitor_process() needs a convenient kill() method.
                 bbs_control_process.kill = lambda : os.kill(
                                 bbs_control_process.pid, signal.SIGKILL)
             except OSError, e:
                 self.logger.error(
                         "Failed to spawn BBS Control (%s)" % str(e))
                 self.killswitch.set()
                 return 1
             finally:
예제 #8
0
def run_via_local(logger, command, arguments):
    commandstring = ["/bin/sh", "-c"]
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring.append(command)
    process = spawn_process(commandstring, logger)
    process.kill = lambda: os.kill(process.pid, signal.SIGKILL)
    return process
예제 #9
0
def run_via_local(logger, command, arguments):
    commandstring = ["/bin/sh", "-c"]
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring.append(command)
    process = spawn_process(commandstring, logger)
    process.kill = lambda : os.kill(process.pid, signal.SIGKILL)
    return process
예제 #10
0
def run_via_mpiexec(logger, command, arguments, host):
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring = [
        "mpiexec", "-x", "-np=1", "/bin/sh", "-c", "hostname && " + command
    ]
    process = spawn_process(commandstring, logger)
    process.kill = lambda: os.kill(process.pid, signal.SIGKILL)
    return process
예제 #11
0
def run_via_mpiexec_cep(logger, command, arguments, host):
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring = [
        "mpiexec", "-x", "PYTHONPATH", "-x", "LD_LIBRARY_PATH", "-x", "PATH",
        "-H", host, "/bin/sh", "-c", "hostname ; " + command
    ]
    process = spawn_process(commandstring, logger)
    process.kill = lambda: os.kill(process.pid, signal.SIGKILL)
    return process
예제 #12
0
def run_via_custom_cmdline(logger, host, command, environment, arguments,
                           config):
    """
    Dispatch a remote command via a customisable command line

    We return a Popen object pointing at the running executable, to which we add a
    kill method for shutting down the connection if required.

    The command line is taken from the "remote.cmdline" configuration option,
    with the following strings replaced:

      {host}         := host to execute command on
      {command}      := bash command line to be executed
      {uid}          := uid of the calling user

      {image}        := docker.image configuration option
      {slurm_job_id} := the SLURM job id to allocate resources in

    """
    commandArray = [
        "%s=%s" % (key, value) for key, value in environment.items()
    ]
    commandArray.append(command)
    commandArray.extend(re.escape(str(arg)) for arg in arguments)
    commandStr = " ".join(commandArray)

    try:
        image = config.get('docker', 'image')
    except:
        image = "lofar"

    # Construct the full command line, except for {command}, as that itself
    # can contain spaces which we don't want to split on.
    full_command_line = config.get('remote', 'cmdline').format(
        uid=os.geteuid(),
        slurm_job_id=os.environ.get("SLURM_JOB_ID"),
        docker_image=image,
        host=host,
        command="{command}").split(' ')

    # Fill in {command} somewhere
    full_command_line = [
        x.format(command=commandStr) for x in full_command_line
    ]

    logger.debug("Dispatching command to %s with custom_cmdline: %s" %
                 (host, full_command_line))

    process = spawn_process(full_command_line, logger)
    process.kill = lambda: os.kill(process.pid, signal.SIGKILL)
    return process
예제 #13
0
def run_via_slurm_srun_cep3(logger, command, arguments, host):
    logger.debug("Dispatching command to %s with srun" % host)
    for arg in arguments:
        command = command + " " + str(arg)
    commandstring = [
        "srun", "-N 1", "-n 1", "-w", host, "/bin/sh", "-c",
        "hostname && " + command
    ]
    #commandstring = ["srun","-N 1","--cpu_bind=map_cpu:none","-w",host, "/bin/sh", "-c", "hostname && " + command]
    # we have a bug that crashes jobs when too many get startet at the same time
    # temporary NOT 100% reliable workaround
    #from random import randint
    #time.sleep(randint(0,10))
    ##########################
    process = spawn_process(commandstring, logger)
    process.kill = lambda: os.kill(process.pid, signal.SIGKILL)
    return process
예제 #14
0
def run_via_ssh(logger, host, command, environment, arguments):
    """
    Dispatch a remote command via SSH.

    We return a Popen object pointing at the SSH session, to which we add a
    kill method for shutting down the connection if required.
    """
    logger.debug("Dispatching command to %s with ssh" % host)
    ssh_cmd = ["ssh", "-n", "-tt", "-x", host, "--", "/bin/sh", "-c"]

    commandstring = ["%s=%s" % (key, value) for key, value in environment.items()]
    commandstring.append(command)
    commandstring.extend(re.escape(str(arg)) for arg in arguments)
    ssh_cmd.append('"' + " ".join(commandstring) + '"')
    process = spawn_process(ssh_cmd, logger)
    process.kill = lambda : os.kill(process.pid, signal.SIGKILL)
    return process
예제 #15
0
def run_via_ssh(logger, host, command, environment, arguments):
    """
    Dispatch a remote command via SSH.

    We return a Popen object pointing at the SSH session, to which we add a
    kill method for shutting down the connection if required.
    """
    logger.debug("Dispatching command to %s with ssh" % host)
    ssh_cmd = ["ssh", "-n", "-tt", "-x", host, "--", "/bin/sh", "-c"]

    commandstring = [
        "%s=%s" % (key, value) for key, value in environment.items()
    ]
    commandstring.append(command)
    commandstring.extend(re.escape(str(arg)) for arg in arguments)
    ssh_cmd.append('"' + " ".join(commandstring) + '"')
    process = spawn_process(ssh_cmd, logger)
    process.kill = lambda: os.kill(process.pid, signal.SIGKILL)
    return process
예제 #16
0
def run_via_mpirun(logger, host, command, environment, arguments):
    """
    Dispatch a remote command via mpirun.

    Return a Popen object pointing at the MPI command, to which we add a kill
    method for shutting down the connection if required.
    """
    logger.debug("Dispatching command to %s with mpirun" % host)
    mpi_cmd = ["/usr/bin/mpirun", "-host", host]
    for key in environment.keys():
        mpi_cmd.extend(["-x", key])
    mpi_cmd.append("--")
    mpi_cmd.extend(command.split())  # command is split into (python, script)
    mpi_cmd.extend(str(arg) for arg in arguments)
    env = os.environ
    env.update(environment)
    process = spawn_process(mpi_cmd, logger, env = env)
    # mpirun should be killed with a SIGTERM to enable it to shut down the
    # remote command.
    process.kill = lambda : os.kill(process.pid, signal.SIGTERM)
    return process
예제 #17
0
def run_via_mpirun(logger, host, command, environment, arguments):
    """
    Dispatch a remote command via mpirun.

    Return a Popen object pointing at the MPI command, to which we add a kill
    method for shutting down the connection if required.
    """
    logger.debug("Dispatching command to %s with mpirun" % host)
    mpi_cmd = ["/usr/bin/mpirun", "-host", host]
    for key in environment.keys():
        mpi_cmd.extend(["-x", key])
    mpi_cmd.append("--")
    mpi_cmd.extend(command.split())  # command is split into (python, script)
    mpi_cmd.extend(str(arg) for arg in arguments)
    env = os.environ
    env.update(environment)
    process = spawn_process(mpi_cmd, logger, env=env)
    # mpirun should be killed with a SIGTERM to enable it to shut down the
    # remote command.
    process.kill = lambda: os.kill(process.pid, signal.SIGTERM)
    return process
예제 #18
0
def run_via_mpirun_fionn(logger, host, command, environment, arguments):
    """
    Dispatch a remote command via mpirun.

    Return a Popen object pointing at the MPI command, to which we add a kill
    method for shutting down the connection if required.
    """
    logger.debug("Dispatching command to %s with mpirun" % host)
    mpi_cmd = ["mpirun", "-hosts", host, "-np", "1"]
    envlst = ''
    for key in environment.keys():
        envlst = envlst + ',' + str(key)  # remember to remove first comma)
    mpi_cmd.extend(['-envlist', envlst[1:]])
    mpi_cmd.extend(command.split())  # command is split into (python, script)
    mpi_cmd.extend(str(arg) for arg in arguments)
    # print("MPI command NEW = "+str(mpi_cmd))
    env = os.environ
    env.update(environment)
    process = spawn_process(mpi_cmd, logger, env=env)
    # mpirun should be killed with a SIGTERM to enable it to shut down the
    # remote command.
    process.kill = lambda: os.kill(process.pid, signal.SIGTERM)
    return process
예제 #19
0
파일: bbs.py 프로젝트: jjdmol/LOFAR
    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory"
        )
        inputs['mapfile'] = self.task_definitions.get('parmdb','mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb','mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(
            self.config.get("layout", "job_directory"),
            "vds",
            "bbs.gvds"
        )
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file,))


        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug(
                "Cleaning BBS database for key %s" % (self.inputs["key"])
            )
            with closing(
                psycopg2.connect(
                    host=self.inputs["db_host"],
                    user=self.inputs["db_user"],
                    database=self.inputs["db_name"]
                )
            ) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT
                )
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"],)
                    )

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),))
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process(
                [
                    self.inputs['combinevds'],
                    vds_file,
                ] + vds_files,
                self.logger
            )
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr, self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(
                    combineproc.returncode, command
                )

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
    #                'BBDB.Port': self.inputs['db_name'],
                }
            )
            self.logger.debug("BBS control parset is %s" % (bbs_parset,))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(
                    target=self._run_bbs_control,
                    args=(bbs_parset, run_flag)
                )
                bbs_control.start()
                run_flag.wait()    # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace('master', 'nodes'))
                env = {
                    "LOFARROOT": utilities.read_initscript(self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH": self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool, self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" % (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host, command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'],
                                file,
                                self.inputs['key'],
                                self.inputs['db_name'],
                                self.inputs['db_user'],
                                self.inputs['db_host']
                            ]
                        )
                        bbs_kernels.append(
                            threading.Thread(
                                target=self._run_bbs_kernel,
                                args=(host, command, env, job_id,
                                    jobhost, str(jobport)
                                )
                            )
                        )
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]


                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0
예제 #20
0
    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory")
        inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(self.config.get("layout", "job_directory"),
                                "vds", "bbs.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file, ))

        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug("Cleaning BBS database for key %s" %
                              (self.inputs["key"]))
            with closing(
                    psycopg2.connect(
                        host=self.inputs["db_host"],
                        user=self.inputs["db_user"],
                        database=self.inputs["db_name"])) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"], ))

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process([
                self.inputs['combinevds'],
                vds_file,
            ] + vds_files, self.logger)
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr,
                               self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #                'BBDB.Port': self.inputs['db_name'],
                })
            self.logger.debug("BBS control parset is %s" % (bbs_parset, ))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(target=self._run_bbs_control,
                                               args=(bbs_parset, run_flag))
                bbs_control.start()
                run_flag.wait()  # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace(
                    'master', 'nodes'))
                env = {
                    "LOFARROOT":
                    utilities.read_initscript(
                        self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH":
                    self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH":
                    self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool,
                                self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" %
                                      (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host,
                            command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'], file,
                                self.inputs['key'], self.inputs['db_name'],
                                self.inputs['db_user'], self.inputs['db_host']
                            ])
                        bbs_kernels.append(
                            threading.Thread(target=self._run_bbs_kernel,
                                             args=(host, command, env, job_id,
                                                   jobhost, str(jobport))))
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]

                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0