def _start_guest_worker(self, worker_id, worker_config, details=None): """ Start a new guest process on this node. :param config: The guest process configuration. :type config: dict :returns: The PID of the new process. """ # prohibit starting a worker twice # if worker_id in self._workers: emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format( worker_id) self.log.error(emsg) raise ApplicationError(u'crossbar.error.worker_already_running', emsg) try: self.personality.check_guest(self.personality, worker_config) except Exception as e: raise ApplicationError( u'crossbar.error.invalid_configuration', 'invalid guest worker configuration: {}'.format(e)) options = worker_config.get('options', {}) # guest process working directory # workdir = self._node._cbdir if 'workdir' in options: workdir = os.path.join(workdir, options['workdir']) workdir = os.path.abspath(workdir) # guest process executable and command line arguments # # first try to configure the fully qualified path for the guest # executable by joining workdir and configured exectuable .. exe = os.path.abspath( os.path.join(workdir, worker_config['executable'])) if check_executable(exe): self.log.info( "Using guest worker executable '{exe}' (executable path taken from configuration)", exe=exe) else: # try to detect the fully qualified path for the guest # executable by doing a "which" on the configured executable name exe = which(worker_config['executable']) if exe is not None and check_executable(exe): self.log.info( "Using guest worker executable '{exe}' (executable path detected from environment)", exe=exe) else: emsg = "Could not start worker: could not find and executable for '{}'".format( worker_config['executable']) self.log.error(emsg) raise ApplicationError(u'crossbar.error.invalid_configuration', emsg) # guest process command line arguments # args = [exe] args.extend(worker_config.get('arguments', [])) # guest process environment # worker_env = create_process_env(options) # log name of worker # worker_logname = 'Guest' # topic URIs used (later) # starting_topic = u'{}.on_guest_starting'.format(self._uri_prefix) started_topic = u'{}.on_guest_started'.format(self._uri_prefix) # add worker tracking instance to the worker map .. # worker = GuestWorkerProcess(self, worker_id, details.caller, keeplog=options.get('traceback', None)) self._workers[worker_id] = worker # create a (custom) process endpoint # ep = WorkerProcessEndpoint(self._node._reactor, exe, args, path=workdir, env=worker_env, worker=worker) # ready handling # def on_ready_success(proto): self.log.info('{worker_logname} worker "{worker_id}" started', worker_logname=worker_logname, worker_id=worker.id) worker.on_worker_started(proto) self._node._reactor.addSystemEventTrigger( 'before', 'shutdown', self._cleanup_worker, self._node._reactor, worker, ) # directory watcher # if 'watch' in options: if HAS_FS_WATCHER: # assemble list of watched directories watched_dirs = [] for d in options['watch'].get('directories', []): watched_dirs.append( os.path.abspath(os.path.join(self._node._cbdir, d))) worker.watch_timeout = options['watch'].get('timeout', 1) # create a filesystem watcher worker.watcher = FilesystemWatcher( workdir, watched_dirs=watched_dirs) # make sure to stop the watch upon Twisted being shut down def on_shutdown(): worker.watcher.stop() self._node._reactor.addSystemEventTrigger( 'before', 'shutdown', on_shutdown) # this handler will get fired by the watcher upon detecting an FS event def on_filesystem_change(fs_event): worker.watcher.stop() proto.signal('TERM') if options['watch'].get('action', None) == 'restart': self.log.info( "Filesystem watcher detected change {fs_event} - restarting guest in {watch_timeout} seconds ..", fs_event=fs_event, watch_timeout=worker.watch_timeout) # Add a timeout large enough (perhaps add a config option later) self._node._reactor.callLater( worker.watch_timeout, self.start_worker, worker_id, worker_config, details) # Shut the worker down, after the restart event is scheduled # FIXME: all workers should have a stop() method .. # -> 'GuestWorkerProcess' object has no attribute 'stop' # worker.stop() else: self.log.info( "Filesystem watcher detected change {fs_event} - no action taken!", fs_event=fs_event) # now start watching .. worker.watcher.start(on_filesystem_change) else: self.log.warn( "Cannot watch directories for changes - feature not available" ) # assemble guest worker startup information # started_info = { u'id': worker.id, u'status': worker.status, u'started': utcstr(worker.started), u'who': worker.who, } self.publish(started_topic, started_info, options=PublishOptions(exclude=details.caller)) return started_info def on_ready_error(err): del self._workers[worker.id] emsg = 'Failed to start guest worker: {}'.format(err.value) self.log.error(emsg) raise ApplicationError(u"crossbar.error.cannot_start", emsg, ep.getlog()) worker.ready.addCallbacks(on_ready_success, on_ready_error) def on_exit_success(res): self.log.info("Guest {worker_id} exited with success", worker_id=worker.id) del self._workers[worker.id] def on_exit_error(err): self.log.error("Guest {worker_id} exited with error {err.value}", worker_id=worker.id, err=err) del self._workers[worker.id] worker.exit.addCallbacks(on_exit_success, on_exit_error) # create a transport factory for talking WAMP to the native worker # transport_factory = create_guest_worker_client_factory( worker_config, worker.ready, worker.exit) transport_factory.noisy = False self._workers[worker_id].factory = transport_factory # now (immediately before actually forking) signal the starting of the worker # starting_info = { u'id': worker_id, u'status': worker.status, u'created': utcstr(worker.created), u'who': worker.who, } # the caller gets a progressive result .. if details.progress: details.progress(starting_info) # .. while all others get an event self.publish(starting_topic, starting_info, options=PublishOptions(exclude=details.caller)) # now actually fork the worker .. # self.log.info('{worker_logname} "{worker_id}" process starting ..', worker_logname=worker_logname, worker_id=worker_id) self.log.debug( '{worker_logname} "{worker_id}" process using command line "{cli}" ..', worker_logname=worker_logname, worker_id=worker_id, cli=' '.join(args)) d = ep.connect(transport_factory) def on_connect_success(proto): # this seems to be called immediately when the child process # has been forked. even if it then immediately fails because # e.g. the executable doesn't even exist. in other words, # I'm not sure under what conditions the deferred will # errback - probably only if the forking of a new process fails # at OS level due to out of memory conditions or such. self.log.debug('{worker_logname} "{worker_id}" connected', worker_logname=worker_logname, worker_id=worker_id) # do not comment this: it will lead to on_worker_started being called # _before_ on_worker_connected, and we don't need it! # worker.on_worker_connected(proto) def on_connect_error(err): # not sure when this errback is triggered at all .. see above. self.log.failure( "Internal error: connection to forked guest worker failed ({log_failure.value})", ) # in any case, forward the error .. worker.ready.errback(err) d.addCallbacks(on_connect_success, on_connect_error) return worker.ready
def _start_guest_worker(self, worker_id, worker_config, details=None): """ Start a new guest process on this node. :param config: The guest process configuration. :type config: obj :returns: int -- The PID of the new process. """ # prohibit starting a worker twice # if worker_id in self._workers: emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(worker_id) self.log.error(emsg) raise ApplicationError(u'crossbar.error.worker_already_running', emsg) try: self.personality.check_guest(self.personality, worker_config) except Exception as e: raise ApplicationError(u'crossbar.error.invalid_configuration', 'invalid guest worker configuration: {}'.format(e)) options = worker_config.get('options', {}) # guest process working directory # workdir = self._node._cbdir if 'workdir' in options: workdir = os.path.join(workdir, options['workdir']) workdir = os.path.abspath(workdir) # guest process executable and command line arguments # # first try to configure the fully qualified path for the guest # executable by joining workdir and configured exectuable .. exe = os.path.abspath(os.path.join(workdir, worker_config['executable'])) if check_executable(exe): self.log.info("Using guest worker executable '{exe}' (executable path taken from configuration)", exe=exe) else: # try to detect the fully qualified path for the guest # executable by doing a "which" on the configured executable name exe = which(worker_config['executable']) if exe is not None and check_executable(exe): self.log.info("Using guest worker executable '{exe}' (executable path detected from environment)", exe=exe) else: emsg = "Could not start worker: could not find and executable for '{}'".format(worker_config['executable']) self.log.error(emsg) raise ApplicationError(u'crossbar.error.invalid_configuration', emsg) # guest process command line arguments # args = [exe] args.extend(worker_config.get('arguments', [])) # guest process environment # worker_env = create_process_env(options) # log name of worker # worker_logname = 'Guest' # topic URIs used (later) # starting_topic = u'{}.on_guest_starting'.format(self._uri_prefix) started_topic = u'{}.on_guest_started'.format(self._uri_prefix) # add worker tracking instance to the worker map .. # worker = GuestWorkerProcess(self, worker_id, details.caller, keeplog=options.get('traceback', None)) self._workers[worker_id] = worker # create a (custom) process endpoint # ep = WorkerProcessEndpoint(self._node._reactor, exe, args, path=workdir, env=worker_env, worker=worker) # ready handling # def on_ready_success(proto): self.log.info('{worker_logname} worker "{worker_id}" started', worker_logname=worker_logname, worker_id=worker.id) worker.on_worker_started(proto) self._node._reactor.addSystemEventTrigger( 'before', 'shutdown', self._cleanup_worker, self._node._reactor, worker, ) # directory watcher # if 'watch' in options: if HAS_FS_WATCHER: # assemble list of watched directories watched_dirs = [] for d in options['watch'].get('directories', []): watched_dirs.append(os.path.abspath(os.path.join(self._node._cbdir, d))) worker.watch_timeout = options['watch'].get('timeout', 1) # create a filesystem watcher worker.watcher = FilesystemWatcher(workdir, watched_dirs=watched_dirs) # make sure to stop the watch upon Twisted being shut down def on_shutdown(): worker.watcher.stop() self._node._reactor.addSystemEventTrigger('before', 'shutdown', on_shutdown) # this handler will get fired by the watcher upon detecting an FS event def on_filesystem_change(fs_event): worker.watcher.stop() proto.signal('TERM') if options['watch'].get('action', None) == 'restart': self.log.info("Filesystem watcher detected change {fs_event} - restarting guest in {watch_timeout} seconds ..", fs_event=fs_event, watch_timeout=worker.watch_timeout) # Add a timeout large enough (perhaps add a config option later) self._node._reactor.callLater(worker.watch_timeout, self.start_worker, worker_id, worker_config, details) # Shut the worker down, after the restart event is scheduled # FIXME: all workers should have a stop() method .. # -> 'GuestWorkerProcess' object has no attribute 'stop' # worker.stop() else: self.log.info("Filesystem watcher detected change {fs_event} - no action taken!", fs_event=fs_event) # now start watching .. worker.watcher.start(on_filesystem_change) else: self.log.warn("Cannot watch directories for changes - feature not available") # assemble guest worker startup information # started_info = { u'id': worker.id, u'status': worker.status, u'started': utcstr(worker.started), u'who': worker.who, } self.publish(started_topic, started_info, options=PublishOptions(exclude=details.caller)) return started_info def on_ready_error(err): del self._workers[worker.id] emsg = 'Failed to start guest worker: {}'.format(err.value) self.log.error(emsg) raise ApplicationError(u"crossbar.error.cannot_start", emsg, ep.getlog()) worker.ready.addCallbacks(on_ready_success, on_ready_error) def on_exit_success(res): self.log.info("Guest {worker_id} exited with success", worker_id=worker.id) del self._workers[worker.id] def on_exit_error(err): self.log.error("Guest {worker_id} exited with error {err.value}", worker_id=worker.id, err=err) del self._workers[worker.id] worker.exit.addCallbacks(on_exit_success, on_exit_error) # create a transport factory for talking WAMP to the native worker # transport_factory = create_guest_worker_client_factory(worker_config, worker.ready, worker.exit) transport_factory.noisy = False self._workers[worker_id].factory = transport_factory # now (immediately before actually forking) signal the starting of the worker # starting_info = { u'id': worker_id, u'status': worker.status, u'created': utcstr(worker.created), u'who': worker.who, } # the caller gets a progressive result .. if details.progress: details.progress(starting_info) # .. while all others get an event self.publish(starting_topic, starting_info, options=PublishOptions(exclude=details.caller)) # now actually fork the worker .. # self.log.info('{worker_logname} "{worker_id}" process starting ..', worker_logname=worker_logname, worker_id=worker_id) self.log.debug('{worker_logname} "{worker_id}" process using command line "{cli}" ..', worker_logname=worker_logname, worker_id=worker_id, cli=' '.join(args)) d = ep.connect(transport_factory) def on_connect_success(proto): # this seems to be called immediately when the child process # has been forked. even if it then immediately fails because # e.g. the executable doesn't even exist. in other words, # I'm not sure under what conditions the deferred will # errback - probably only if the forking of a new process fails # at OS level due to out of memory conditions or such. self.log.debug('{worker_logname} "{worker_id}" connected', worker_logname=worker_logname, worker_id=worker_id) # do not comment this: it will lead to on_worker_started being called # _before_ on_worker_connected, and we don't need it! # worker.on_worker_connected(proto) def on_connect_error(err): # not sure when this errback is triggered at all .. see above. self.log.failure( "Internal error: connection to forked guest worker failed ({log_failure.value})", ) # in any case, forward the error .. worker.ready.errback(err) d.addCallbacks(on_connect_success, on_connect_error) return worker.ready