def _start_native_worker(self, wtype, id, options=None, details=None): assert (wtype in ['router', 'container', 'websocket-testee']) # prohibit starting a worker twice # if id in self._workers: emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format( id) self.log.error(emsg) raise ApplicationError(u'crossbar.error.worker_already_running', emsg) # check worker options # options = options or {} try: if wtype == 'router': checkconfig.check_router_options(options) elif wtype == 'container': checkconfig.check_container_options(options) elif wtype == 'websocket-testee': checkconfig.check_websocket_testee_options(options) else: raise Exception("logic error") except Exception as e: emsg = "Could not start native worker: invalid configuration ({})".format( e) self.log.error(emsg) raise ApplicationError(u'crossbar.error.invalid_configuration', emsg) # allow override Python executable from options # if 'python' in options: exe = options['python'] # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy # if not os.path.isabs(exe): emsg = "Invalid worker configuration: python executable '{}' must be an absolute path".format( exe) self.log.error(emsg) raise ApplicationError(u'crossbar.error.invalid_configuration', emsg) # of course the path must exist and actually be executable # if not (os.path.isfile(exe) and os.access(exe, os.X_OK)): emsg = "Invalid worker configuration: python executable '{}' does not exist or isn't an executable".format( exe) self.log.error(emsg) raise ApplicationError(u'crossbar.error.invalid_configuration', emsg) else: exe = sys.executable # all native workers (routers and containers for now) start from the same script # filename = os.path.abspath( os.path.join(crossbar.__file__, "..", "worker", "process.py")) # assemble command line for forking the worker # args = [exe, "-u", filename] args.extend(["--cbdir", self._node._cbdir]) args.extend(["--node", str(self._node_id)]) args.extend(["--worker", str(id)]) args.extend(["--realm", self._realm]) args.extend(["--type", wtype]) args.extend(["--loglevel", _loglevel]) # allow override worker process title from options # if options.get('title', None): args.extend(['--title', options['title']]) # forward explicit reactor selection # if 'reactor' in options and sys.platform in options['reactor']: args.extend(['--reactor', options['reactor'][sys.platform]]) # FIXME # elif self._node.options.reactor: # args.extend(['--reactor', self._node.options.reactor]) # create worker process environment # worker_env = create_process_env(options) # We need to use the same PYTHONPATH we were started with, so we can # find the Crossbar we're working with -- it may not be the same as the # one on the default path worker_env["PYTHONPATH"] = os.pathsep.join(sys.path) # log name of worker # worker_logname = { 'router': 'Router', 'container': 'Container', 'websocket-testee': 'WebSocketTestee' }.get(wtype, 'Worker') # topic URIs used (later) # if wtype == 'router': starting_topic = 'crossbar.node.{}.on_router_starting'.format( self._node_id) started_topic = 'crossbar.node.{}.on_router_started'.format( self._node_id) elif wtype == 'container': starting_topic = 'crossbar.node.{}.on_container_starting'.format( self._node_id) started_topic = 'crossbar.node.{}.on_container_started'.format( self._node_id) elif wtype == 'websocket-testee': starting_topic = 'crossbar.node.{}.on_websocket_testee_starting'.format( self._node_id) started_topic = 'crossbar.node.{}.on_websocket_testee_started'.format( self._node_id) else: raise Exception("logic error") # add worker tracking instance to the worker map .. # if wtype == 'router': worker = RouterWorkerProcess(self, id, details.caller, keeplog=options.get( 'traceback', None)) elif wtype == 'container': worker = ContainerWorkerProcess(self, id, details.caller, keeplog=options.get( 'traceback', None)) elif wtype == 'websocket-testee': worker = WebSocketTesteeWorkerProcess(self, id, details.caller, keeplog=options.get( 'traceback', None)) else: raise Exception("logic error") self._workers[id] = worker # create a (custom) process endpoint. # if platform.isWindows(): childFDs = None # Use the default Twisted ones else: # The communication between controller and container workers is # using WAMP running over 2 pipes. # For controller->container traffic this runs over FD 0 (`stdin`) # and for the container->controller traffic, this runs over FD 3. # # Note: We use FD 3, not FD 1 (`stdout`) or FD 2 (`stderr`) for # container->controller traffic, so that components running in the # container which happen to write to `stdout` or `stderr` do not # interfere with the container-controller communication. childFDs = {0: "w", 1: "r", 2: "r", 3: "r"} ep = WorkerProcessEndpoint(self._node._reactor, exe, args, env=worker_env, worker=worker, childFDs=childFDs) # ready handling # def on_ready_success(id): self.log.info("{worker} with ID '{id}' and PID {pid} started", worker=worker_logname, id=worker.id, pid=worker.pid) self._node._reactor.addSystemEventTrigger( 'before', 'shutdown', self._cleanup_worker, self._node._reactor, worker, ) worker.status = 'started' worker.started = datetime.utcnow() started_info = { 'id': worker.id, 'status': worker.status, 'started': utcstr(worker.started), 'who': worker.who } # FIXME: make start of stats printer dependent on log level .. worker.log_stats(5.) self.publish(started_topic, started_info, options=PublishOptions(exclude=[details.caller])) return started_info def on_ready_error(err): del self._workers[worker.id] emsg = 'Failed to start native worker: {}'.format(err.value) self.log.error(emsg) raise ApplicationError(u"crossbar.error.cannot_start", emsg, worker.getlog()) worker.ready.addCallbacks(on_ready_success, on_ready_error) def on_exit_success(_): self.log.info("Node worker {} ended successfully".format( worker.id)) worker.log_stats(0) del self._workers[worker.id] return True def on_exit_error(err): self.log.info("Node worker {} ended with error ({})".format( worker.id, err)) worker.log_stats(0) del self._workers[worker.id] return False def check_for_shutdown(was_successful): shutdown = False # automatically shutdown node whenever a worker ended (successfully, or with error) # if checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT in self._node._node_shutdown_triggers: self.log.info( "Node worker ended, and trigger '{}' active".format( checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT)) shutdown = True # automatically shutdown node when worker ended with error # if not was_successful and checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR in self._node._node_shutdown_triggers: self.log.info( "Node worker ended with error, and trigger '{}' active". format( checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR)) shutdown = True # automatically shutdown node when no more workers are left # if len( self._workers ) == 0 and checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT in self._node._node_shutdown_triggers: self.log.info( "No more node workers running, and trigger '{}' active". format(checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT)) shutdown = True # initiate shutdown (but only if we are not already shutting down) # if shutdown: if not self._shutdown_requested: self.log.info("Node shutting down ..") self.shutdown() else: # ignore: shutdown already initiated .. self.log.info("Node is already shutting down.") else: self.log.info( "Node will continue to run (node shutdown triggers active: {})" .format(self._node._node_shutdown_triggers)) d_on_exit = worker.exit.addCallbacks(on_exit_success, on_exit_error) d_on_exit.addBoth(check_for_shutdown) # create a transport factory for talking WAMP to the native worker # transport_factory = create_native_worker_client_factory( self._node._router_session_factory, worker.ready, worker.exit) transport_factory.noisy = False self._workers[id].factory = transport_factory # now (immediately before actually forking) signal the starting of the worker # starting_info = { 'id': id, 'status': worker.status, 'created': utcstr(worker.created), 'who': worker.who } # the caller gets a progressive result .. if details.progress: details.progress(starting_info) # .. while all others get an event self.publish(starting_topic, starting_info, options=PublishOptions(exclude=[details.caller])) # now actually fork the worker .. # self.log.info("Starting {worker} with ID '{id}'...", worker=worker_logname, id=id) self.log.debug("{worker} '{id}' command line is '{cmdline}'", worker=worker_logname, id=id, cmdline=' '.join(args)) d = ep.connect(transport_factory) def on_connect_success(proto): # this seems to be called immediately when the child process # has been forked. even if it then immediately fails because # e.g. the executable doesn't even exist. in other words, # I'm not sure under what conditions the deferred will errback .. pid = proto.transport.pid self.log.debug("Native worker process connected with PID {pid}", pid=pid) # note the PID of the worker worker.pid = pid # proto is an instance of NativeWorkerClientProtocol worker.proto = proto worker.status = 'connected' worker.connected = datetime.utcnow() def on_connect_error(err): # not sure when this errback is triggered at all .. self.log.error( "Interal error: connection to forked native worker failed ({err})", err=err) # in any case, forward the error .. worker.ready.errback(err) d.addCallbacks(on_connect_success, on_connect_error) return worker.ready
def _start_native_worker(self, wtype, id, options=None, details=None): assert (wtype in ['router', 'container']) ## prohibit starting a worker twice ## if id in self._workers: emsg = "ERROR: could not start worker - a worker with ID '{}'' is already running (or starting)".format( id) log.msg(emsg) raise ApplicationError('crossbar.error.worker_already_running', emsg) ## check worker options ## options = options or {} try: if wtype == 'router': checkconfig.check_router_options(options) elif wtype == 'container': checkconfig.check_container_options(options) else: raise Exception("logic error") except Exception as e: emsg = "ERROR: could not start native worker - invalid configuration ({})".format( e) log.msg(emsg) raise ApplicationError('crossbar.error.invalid_configuration', emsg) ## allow override Python executable from options ## if 'python' in options: exe = options['python'] ## the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy ## if not os.path.isabs(exe): emsg = "ERROR: python '{}' from worker options must be an absolute path".format( exe) log.msg(emsg) raise ApplicationError('crossbar.error.invalid_configuration', emsg) ## of course the path must exist and actually be executable ## if not (os.path.isfile(exe) and os.access(exe, os.X_OK)): emsg = "ERROR: python '{}' from worker options does not exist or isn't an executable".format( exe) log.msg(emsg) raise ApplicationError('crossbar.error.invalid_configuration', emsg) else: exe = sys.executable ## all native workers (routers and containers for now) start from the same script ## filename = pkg_resources.resource_filename('crossbar', 'worker/process.py') ## assemble command line for forking the worker ## args = [exe, "-u", filename] args.extend(["--cbdir", self._node._cbdir]) args.extend(["--node", str(self._node_id)]) args.extend(["--worker", str(id)]) args.extend(["--realm", self._realm]) args.extend(["--type", wtype]) ## allow override worker process title from options ## if options.get('title', None): args.extend(['--title', options['title']]) ## allow overriding debug flag from options ## if options.get('debug', self.debug): args.append('--debug') ## forward explicit reactor selection ## if 'reactor' in options and sys.platform in options['reactor']: args.extend(['--reactor', options['reactor'][sys.platform]]) elif self._node.options.reactor: args.extend(['--reactor', self._node.options.reactor]) ## create worker process environment ## worker_env = create_process_env(options) ## log name of worker ## worker_logname = { 'router': 'Router', 'container': 'Container' }.get(wtype, 'Worker') ## topic URIs used (later) ## if wtype == 'router': starting_topic = 'crossbar.node.{}.on_router_starting'.format( self._node_id) started_topic = 'crossbar.node.{}.on_router_started'.format( self._node_id) elif wtype == 'container': starting_topic = 'crossbar.node.{}.on_container_starting'.format( self._node_id) started_topic = 'crossbar.node.{}.on_container_started'.format( self._node_id) else: raise Exception("logic error") ## add worker tracking instance to the worker map .. ## if wtype == 'router': worker = RouterWorkerProcess(self, id, details.authid, keeplog=options.get( 'traceback', None)) elif wtype == 'container': worker = ContainerWorkerProcess(self, id, details.authid, keeplog=options.get( 'traceback', None)) else: raise Exception("logic error") self._workers[id] = worker ## create a (custom) process endpoint ## ep = WorkerProcessEndpoint(self._node._reactor, exe, args, env=worker_env, worker=worker) ## ready handling ## def on_ready_success(id): log.msg("{} with ID '{}' and PID {} started".format( worker_logname, worker.id, worker.pid)) worker.status = 'started' worker.started = datetime.utcnow() started_info = { 'id': worker.id, 'status': worker.status, 'started': utcstr(worker.started), 'who': worker.who } self.publish(started_topic, started_info, options=PublishOptions(exclude=[details.caller])) return started_info def on_ready_error(err): del self._workers[worker.id] emsg = 'ERROR: failed to start native worker - {}'.format( err.value) log.msg(emsg) raise ApplicationError("crossbar.error.cannot_start", emsg, worker.getlog()) worker.ready.addCallbacks(on_ready_success, on_ready_error) def on_exit_success(res): del self._workers[worker.id] def on_exit_error(err): del self._workers[worker.id] worker.exit.addCallbacks(on_exit_success, on_exit_error) ## create a transport factory for talking WAMP to the native worker ## transport_factory = create_native_worker_client_factory( self._node._router_session_factory, worker.ready, worker.exit) transport_factory.noisy = False self._workers[id].factory = transport_factory ## now (immediately before actually forking) signal the starting of the worker ## starting_info = { 'id': id, 'status': worker.status, 'created': utcstr(worker.created), 'who': worker.who } ## the caller gets a progressive result .. if details.progress: details.progress(starting_info) ## .. while all others get an event self.publish(starting_topic, starting_info, options=PublishOptions(exclude=[details.caller])) ## now actually fork the worker .. ## if self.debug: log.msg( "Starting {} with ID '{}' using command line '{}' ..".format( worker_logname, id, ' '.join(args))) else: log.msg("Starting {} with ID '{}' ..".format(worker_logname, id)) d = ep.connect(transport_factory) def on_connect_success(proto): ## this seems to be called immediately when the child process ## has been forked. even if it then immediately fails because ## e.g. the executable doesn't even exist. in other words, ## I'm not sure under what conditions the deferred will errback .. pid = proto.transport.pid if self.debug: log.msg( "Native worker process connected with PID {}".format(pid)) ## note the PID of the worker worker.pid = pid ## proto is an instance of NativeWorkerClientProtocol worker.proto = proto worker.status = 'connected' worker.connected = datetime.utcnow() def on_connect_error(err): ## not sure when this errback is triggered at all .. if self.debug: log.msg("ERROR: Connecting forked native worker failed - {}". format(err)) ## in any case, forward the error .. worker.ready.errback(err) d.addCallbacks(on_connect_success, on_connect_error) return worker.ready