def convert_mwimager_parset(parset): try: with patched_parset( parset, { 'dataset': dataset, 'Images.frequency': frequency, 'msDirType': ms_dir_type, 'msDirRa': ms_dir_ra, 'msDirDec': ms_dir_dec, 'restore': restore # cimager bug: non-restored image unusable }) as cimager_parset: fd, converted_parset = tempfile.mkstemp( dir=self.config.get("layout", "job_directory")) convert_process = spawn_process([ self.inputs['convert_exec'], cimager_parset, converted_parset ], self.logger) os.close(fd) sout, serr = convert_process.communicate() log_process_output(self.inputs['convert_exec'], sout, serr, self.logger) if convert_process.returncode != 0: raise subprocess.CalledProcessError( convert_process.returncode, convert_exec) return converted_parset except OSError as e: self.logger.error("Failed to spawn convertimagerparset (%s)" % str(e)) raise except subprocess.CalledProcessError as e: self.logger.error(str(e)) raise
def convert_mwimager_parset(parset): try: with patched_parset( parset, { 'dataset': dataset, 'Images.frequency': frequency, 'msDirType': ms_dir_type, 'msDirRa': ms_dir_ra, 'msDirDec': ms_dir_dec, 'restore': restore # cimager bug: non-restored image unusable } ) as cimager_parset: fd, converted_parset = tempfile.mkstemp( dir=self.config.get("layout", "job_directory") ) convert_process = spawn_process( [ self.inputs['convert_exec'], cimager_parset, converted_parset ], self.logger ) os.close(fd) sout, serr = convert_process.communicate() log_process_output(self.inputs['convert_exec'], sout, serr, self.logger) if convert_process.returncode != 0: raise subprocess.CalledProcessError( convert_process.returncode, convert_exec ) return converted_parset except OSError, e: self.logger.error("Failed to spawn convertimagerparset (%s)" % str(e)) raise
def run_via_mpiexec(logger, command, arguments, host): for arg in arguments: command = command + " " + str(arg) commandstring = ["mpiexec", "-x", "-np=1", "/bin/sh", "-c", "hostname && " + command] process = spawn_process(commandstring, logger) process.kill = lambda : os.kill(process.pid, signal.SIGKILL) return process
def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ env = utilities.read_initscript(self.logger, self.inputs['initscript']) self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec'])): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [self.inputs['control_exec'], bbs_parset, "0"], self.logger, cwd=working_dir, env=env) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda: os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError, e: self.logger.error("Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally:
def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) with CatchLog4CPlus(working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec'])): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [self.inputs['control_exec'], bbs_parset, "0"], self.logger, cwd=working_dir, env=self.environment) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda: os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError as e: self.logger.error("Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally: run_flag.set() returncode = self._monitor_process(bbs_control_process, "BBS Control") sout, serr = communicate_returning_strings(bbs_control_process) shutil.rmtree(working_dir) log_process_output(self.inputs['control_exec'], sout, serr, self.logger) return returncode
def run_via_mpiexec_cep(logger, command, arguments, host): for arg in arguments: command = command + " " + str(arg) commandstring = ["mpiexec", "-x", "PYTHONPATH", "-x", "LD_LIBRARY_PATH", "-x", "PATH", "-H", host, "/bin/sh", "-c", "hostname ; " + command] process = spawn_process(commandstring, logger) process.kill = lambda : os.kill(process.pid, signal.SIGKILL) return process
def _run_bbs_control(self, bbs_parset, run_flag): """ Run BBS Global Control and wait for it to finish. Return its return code. """ self.logger.info("Running BBS GlobalControl") working_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) with CatchLog4CPlus( working_dir, self.logger.name + ".GlobalControl", os.path.basename(self.inputs['control_exec']) ): with utilities.log_time(self.logger): try: bbs_control_process = utilities.spawn_process( [ self.inputs['control_exec'], bbs_parset, "0" ], self.logger, cwd=working_dir, env=self.environment ) # _monitor_process() needs a convenient kill() method. bbs_control_process.kill = lambda : os.kill( bbs_control_process.pid, signal.SIGKILL) except OSError, e: self.logger.error( "Failed to spawn BBS Control (%s)" % str(e)) self.killswitch.set() return 1 finally:
def run_via_local(logger, command, arguments): commandstring = ["/bin/sh", "-c"] for arg in arguments: command = command + " " + str(arg) commandstring.append(command) process = spawn_process(commandstring, logger) process.kill = lambda: os.kill(process.pid, signal.SIGKILL) return process
def run_via_local(logger, command, arguments): commandstring = ["/bin/sh", "-c"] for arg in arguments: command = command + " " + str(arg) commandstring.append(command) process = spawn_process(commandstring, logger) process.kill = lambda : os.kill(process.pid, signal.SIGKILL) return process
def run_via_mpiexec(logger, command, arguments, host): for arg in arguments: command = command + " " + str(arg) commandstring = [ "mpiexec", "-x", "-np=1", "/bin/sh", "-c", "hostname && " + command ] process = spawn_process(commandstring, logger) process.kill = lambda: os.kill(process.pid, signal.SIGKILL) return process
def run_via_mpiexec_cep(logger, command, arguments, host): for arg in arguments: command = command + " " + str(arg) commandstring = [ "mpiexec", "-x", "PYTHONPATH", "-x", "LD_LIBRARY_PATH", "-x", "PATH", "-H", host, "/bin/sh", "-c", "hostname ; " + command ] process = spawn_process(commandstring, logger) process.kill = lambda: os.kill(process.pid, signal.SIGKILL) return process
def run_via_custom_cmdline(logger, host, command, environment, arguments, config): """ Dispatch a remote command via a customisable command line We return a Popen object pointing at the running executable, to which we add a kill method for shutting down the connection if required. The command line is taken from the "remote.cmdline" configuration option, with the following strings replaced: {host} := host to execute command on {command} := bash command line to be executed {uid} := uid of the calling user {image} := docker.image configuration option {slurm_job_id} := the SLURM job id to allocate resources in """ commandArray = [ "%s=%s" % (key, value) for key, value in environment.items() ] commandArray.append(command) commandArray.extend(re.escape(str(arg)) for arg in arguments) commandStr = " ".join(commandArray) try: image = config.get('docker', 'image') except: image = "lofar" # Construct the full command line, except for {command}, as that itself # can contain spaces which we don't want to split on. full_command_line = config.get('remote', 'cmdline').format( uid=os.geteuid(), slurm_job_id=os.environ.get("SLURM_JOB_ID"), docker_image=image, host=host, command="{command}").split(' ') # Fill in {command} somewhere full_command_line = [ x.format(command=commandStr) for x in full_command_line ] logger.debug("Dispatching command to %s with custom_cmdline: %s" % (host, full_command_line)) process = spawn_process(full_command_line, logger) process.kill = lambda: os.kill(process.pid, signal.SIGKILL) return process
def run_via_slurm_srun_cep3(logger, command, arguments, host): logger.debug("Dispatching command to %s with srun" % host) for arg in arguments: command = command + " " + str(arg) commandstring = [ "srun", "-N 1", "-n 1", "-w", host, "/bin/sh", "-c", "hostname && " + command ] #commandstring = ["srun","-N 1","--cpu_bind=map_cpu:none","-w",host, "/bin/sh", "-c", "hostname && " + command] # we have a bug that crashes jobs when too many get startet at the same time # temporary NOT 100% reliable workaround #from random import randint #time.sleep(randint(0,10)) ########################## process = spawn_process(commandstring, logger) process.kill = lambda: os.kill(process.pid, signal.SIGKILL) return process
def run_via_ssh(logger, host, command, environment, arguments): """ Dispatch a remote command via SSH. We return a Popen object pointing at the SSH session, to which we add a kill method for shutting down the connection if required. """ logger.debug("Dispatching command to %s with ssh" % host) ssh_cmd = ["ssh", "-n", "-tt", "-x", host, "--", "/bin/sh", "-c"] commandstring = ["%s=%s" % (key, value) for key, value in environment.items()] commandstring.append(command) commandstring.extend(re.escape(str(arg)) for arg in arguments) ssh_cmd.append('"' + " ".join(commandstring) + '"') process = spawn_process(ssh_cmd, logger) process.kill = lambda : os.kill(process.pid, signal.SIGKILL) return process
def run_via_ssh(logger, host, command, environment, arguments): """ Dispatch a remote command via SSH. We return a Popen object pointing at the SSH session, to which we add a kill method for shutting down the connection if required. """ logger.debug("Dispatching command to %s with ssh" % host) ssh_cmd = ["ssh", "-n", "-tt", "-x", host, "--", "/bin/sh", "-c"] commandstring = [ "%s=%s" % (key, value) for key, value in environment.items() ] commandstring.append(command) commandstring.extend(re.escape(str(arg)) for arg in arguments) ssh_cmd.append('"' + " ".join(commandstring) + '"') process = spawn_process(ssh_cmd, logger) process.kill = lambda: os.kill(process.pid, signal.SIGKILL) return process
def run_via_mpirun(logger, host, command, environment, arguments): """ Dispatch a remote command via mpirun. Return a Popen object pointing at the MPI command, to which we add a kill method for shutting down the connection if required. """ logger.debug("Dispatching command to %s with mpirun" % host) mpi_cmd = ["/usr/bin/mpirun", "-host", host] for key in environment.keys(): mpi_cmd.extend(["-x", key]) mpi_cmd.append("--") mpi_cmd.extend(command.split()) # command is split into (python, script) mpi_cmd.extend(str(arg) for arg in arguments) env = os.environ env.update(environment) process = spawn_process(mpi_cmd, logger, env = env) # mpirun should be killed with a SIGTERM to enable it to shut down the # remote command. process.kill = lambda : os.kill(process.pid, signal.SIGTERM) return process
def run_via_mpirun(logger, host, command, environment, arguments): """ Dispatch a remote command via mpirun. Return a Popen object pointing at the MPI command, to which we add a kill method for shutting down the connection if required. """ logger.debug("Dispatching command to %s with mpirun" % host) mpi_cmd = ["/usr/bin/mpirun", "-host", host] for key in environment.keys(): mpi_cmd.extend(["-x", key]) mpi_cmd.append("--") mpi_cmd.extend(command.split()) # command is split into (python, script) mpi_cmd.extend(str(arg) for arg in arguments) env = os.environ env.update(environment) process = spawn_process(mpi_cmd, logger, env=env) # mpirun should be killed with a SIGTERM to enable it to shut down the # remote command. process.kill = lambda: os.kill(process.pid, signal.SIGTERM) return process
def run_via_mpirun_fionn(logger, host, command, environment, arguments): """ Dispatch a remote command via mpirun. Return a Popen object pointing at the MPI command, to which we add a kill method for shutting down the connection if required. """ logger.debug("Dispatching command to %s with mpirun" % host) mpi_cmd = ["mpirun", "-hosts", host, "-np", "1"] envlst = '' for key in environment.keys(): envlst = envlst + ',' + str(key) # remember to remove first comma) mpi_cmd.extend(['-envlist', envlst[1:]]) mpi_cmd.extend(command.split()) # command is split into (python, script) mpi_cmd.extend(str(arg) for arg in arguments) # print("MPI command NEW = "+str(mpi_cmd)) env = os.environ env.update(environment) process = spawn_process(mpi_cmd, logger, env=env) # mpirun should be killed with a SIGTERM to enable it to shut down the # remote command. process.kill = lambda: os.kill(process.pid, signal.SIGTERM) return process
def go(self): self.logger.info("Starting BBS run") super(bbs, self).go() # Generate source and parameter databases for all input data # ---------------------------------------------------------------------- inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['parmdbm'] inputs['working_directory'] = self.config.get( "DEFAULT", "default_working_directory" ) inputs['mapfile'] = self.task_definitions.get('parmdb','mapfile') inputs['suffix'] = ".instrument" outputs = LOFARoutput(self.inputs) if self.cook_recipe('parmdb', inputs, outputs): self.logger.warn("parmdb reports failure") return 1 inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['makesourcedb'] inputs['skymodel'] = self.inputs['skymodel'] inputs['mapfile'] = self.task_definitions.get('sourcedb','mapfile') inputs['suffix'] = ".sky" outputs = LOFARoutput(self.inputs) if self.cook_recipe('sourcedb', inputs, outputs): self.logger.warn("sourcedb reports failure") return 1 # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for BBS") vds_file = os.path.join( self.config.get("layout", "job_directory"), "vds", "bbs.gvds" ) inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = vds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(vds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("BBS GVDS is %s" % (vds_file,)) # Iterate over groups of subbands divided up for convenient cluster # procesing -- ie, no more than nproc subbands per compute node # ---------------------------------------------------------------------- for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])): # to_process is a list of (host, filename, vds) tuples # ------------------------------------------------------------------ hosts, ms_names, vds_files = map(list, zip(*to_process)) # The BBS session database should be cleared for our key # ------------------------------------------------------------------ self.logger.debug( "Cleaning BBS database for key %s" % (self.inputs["key"]) ) with closing( psycopg2.connect( host=self.inputs["db_host"], user=self.inputs["db_user"], database=self.inputs["db_name"] ) ) as db_connection: db_connection.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT ) with closing(db_connection.cursor()) as db_cursor: db_cursor.execute( "DELETE FROM blackboard.session WHERE key=%s", (self.inputs["key"],) ) # BBS GlobalControl requires a GVDS file describing all the data # to be processed. We assemble that from the separate parts # already available on disk. # ------------------------------------------------------------------ self.logger.debug("Building VDS file describing data for BBS run") vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) vds_file = os.path.join(vds_dir, "bbs.gvds") combineproc = utilities.spawn_process( [ self.inputs['combinevds'], vds_file, ] + vds_files, self.logger ) sout, serr = combineproc.communicate() log_process_output(self.inputs['combinevds'], sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError( combineproc.returncode, command ) # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") bbs_parset = utilities.patch_parset( self.inputs['parset'], { 'Observation': vds_file, 'BBDB.Key': self.inputs['key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], # 'BBDB.Port': self.inputs['db_name'], } ) self.logger.debug("BBS control parset is %s" % (bbs_parset,)) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread( target=self._run_bbs_control, args=(bbs_parset, run_flag) ) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) env = { "LOFARROOT": utilities.read_initscript(self.logger, self.inputs['initscript'])["LOFARROOT"], "PYTHONPATH": self.config.get('deploy', 'engine_ppath'), "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath') } jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as (jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(to_process): host, file, vds = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], self.inputs['initscript'], file, self.inputs['key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ] ) bbs_kernels.append( threading.Thread( target=self._run_bbs_kernel, args=(host, command, env, job_id, jobhost, str(jobport) ) ) ) self.logger.info("Starting %d threads" % len(bbs_kernels)) [thread.start() for thread in bbs_kernels] self.logger.debug("Waiting for all kernels to complete") [thread.join() for thread in bbs_kernels] # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) shutil.rmtree(vds_dir) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 return 0
def go(self): self.logger.info("Starting BBS run") super(bbs, self).go() # Generate source and parameter databases for all input data # ---------------------------------------------------------------------- inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['parmdbm'] inputs['working_directory'] = self.config.get( "DEFAULT", "default_working_directory") inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile') inputs['suffix'] = ".instrument" outputs = LOFARoutput(self.inputs) if self.cook_recipe('parmdb', inputs, outputs): self.logger.warn("parmdb reports failure") return 1 inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['makesourcedb'] inputs['skymodel'] = self.inputs['skymodel'] inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile') inputs['suffix'] = ".sky" outputs = LOFARoutput(self.inputs) if self.cook_recipe('sourcedb', inputs, outputs): self.logger.warn("sourcedb reports failure") return 1 # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for BBS") vds_file = os.path.join(self.config.get("layout", "job_directory"), "vds", "bbs.gvds") inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = vds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(vds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("BBS GVDS is %s" % (vds_file, )) # Iterate over groups of subbands divided up for convenient cluster # procesing -- ie, no more than nproc subbands per compute node # ---------------------------------------------------------------------- for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])): # to_process is a list of (host, filename, vds) tuples # ------------------------------------------------------------------ hosts, ms_names, vds_files = map(list, zip(*to_process)) # The BBS session database should be cleared for our key # ------------------------------------------------------------------ self.logger.debug("Cleaning BBS database for key %s" % (self.inputs["key"])) with closing( psycopg2.connect( host=self.inputs["db_host"], user=self.inputs["db_user"], database=self.inputs["db_name"])) as db_connection: db_connection.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) with closing(db_connection.cursor()) as db_cursor: db_cursor.execute( "DELETE FROM blackboard.session WHERE key=%s", (self.inputs["key"], )) # BBS GlobalControl requires a GVDS file describing all the data # to be processed. We assemble that from the separate parts # already available on disk. # ------------------------------------------------------------------ self.logger.debug("Building VDS file describing data for BBS run") vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) vds_file = os.path.join(vds_dir, "bbs.gvds") combineproc = utilities.spawn_process([ self.inputs['combinevds'], vds_file, ] + vds_files, self.logger) sout, serr = combineproc.communicate() log_process_output(self.inputs['combinevds'], sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError(combineproc.returncode, command) # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") bbs_parset = utilities.patch_parset( self.inputs['parset'], { 'Observation': vds_file, 'BBDB.Key': self.inputs['key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], # 'BBDB.Port': self.inputs['db_name'], }) self.logger.debug("BBS control parset is %s" % (bbs_parset, )) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread(target=self._run_bbs_control, args=(bbs_parset, run_flag)) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace( 'master', 'nodes')) env = { "LOFARROOT": utilities.read_initscript( self.logger, self.inputs['initscript'])["LOFARROOT"], "PYTHONPATH": self.config.get('deploy', 'engine_ppath'), "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath') } jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as (jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(to_process): host, file, vds = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], self.inputs['initscript'], file, self.inputs['key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ]) bbs_kernels.append( threading.Thread(target=self._run_bbs_kernel, args=(host, command, env, job_id, jobhost, str(jobport)))) self.logger.info("Starting %d threads" % len(bbs_kernels)) [thread.start() for thread in bbs_kernels] self.logger.debug("Waiting for all kernels to complete") [thread.join() for thread in bbs_kernels] # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) shutil.rmtree(vds_dir) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 return 0