Example #1
0
    def _start_native_worker(self, wtype, id, options=None, details=None):

        assert (wtype in ['router', 'container', 'websocket-testee'])

        # prohibit starting a worker twice
        #
        if id in self._workers:
            emsg = "Could not start worker: a worker with ID '{}' is already running (or starting)".format(
                id)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.worker_already_running',
                                   emsg)

        # check worker options
        #
        options = options or {}
        try:
            if wtype == 'router':
                checkconfig.check_router_options(options)
            elif wtype == 'container':
                checkconfig.check_container_options(options)
            elif wtype == 'websocket-testee':
                checkconfig.check_websocket_testee_options(options)
            else:
                raise Exception("logic error")
        except Exception as e:
            emsg = "Could not start native worker: invalid configuration ({})".format(
                e)
            self.log.error(emsg)
            raise ApplicationError(u'crossbar.error.invalid_configuration',
                                   emsg)

        # allow override Python executable from options
        #
        if 'python' in options:
            exe = options['python']

            # the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            #
            if not os.path.isabs(exe):
                emsg = "Invalid worker configuration: python executable '{}' must be an absolute path".format(
                    exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)

            # of course the path must exist and actually be executable
            #
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "Invalid worker configuration: python executable '{}' does not exist or isn't an executable".format(
                    exe)
                self.log.error(emsg)
                raise ApplicationError(u'crossbar.error.invalid_configuration',
                                       emsg)
        else:
            exe = sys.executable

        # all native workers (routers and containers for now) start from the same script
        #
        filename = os.path.abspath(
            os.path.join(crossbar.__file__, "..", "worker", "process.py"))

        # assemble command line for forking the worker
        #
        args = [exe, "-u", filename]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node_id)])
        args.extend(["--worker", str(id)])
        args.extend(["--realm", self._realm])
        args.extend(["--type", wtype])
        args.extend(["--loglevel", _loglevel])

        # allow override worker process title from options
        #
        if options.get('title', None):
            args.extend(['--title', options['title']])

        # forward explicit reactor selection
        #
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        # FIXME
        # elif self._node.options.reactor:
        #    args.extend(['--reactor', self._node.options.reactor])

        # create worker process environment
        #
        worker_env = create_process_env(options)

        # We need to use the same PYTHONPATH we were started with, so we can
        # find the Crossbar we're working with -- it may not be the same as the
        # one on the default path
        worker_env["PYTHONPATH"] = os.pathsep.join(sys.path)

        # log name of worker
        #
        worker_logname = {
            'router': 'Router',
            'container': 'Container',
            'websocket-testee': 'WebSocketTestee'
        }.get(wtype, 'Worker')

        # topic URIs used (later)
        #
        if wtype == 'router':
            starting_topic = 'crossbar.node.{}.on_router_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_router_started'.format(
                self._node_id)
        elif wtype == 'container':
            starting_topic = 'crossbar.node.{}.on_container_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_container_started'.format(
                self._node_id)
        elif wtype == 'websocket-testee':
            starting_topic = 'crossbar.node.{}.on_websocket_testee_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_websocket_testee_started'.format(
                self._node_id)
        else:
            raise Exception("logic error")

        # add worker tracking instance to the worker map ..
        #
        if wtype == 'router':
            worker = RouterWorkerProcess(self,
                                         id,
                                         details.caller,
                                         keeplog=options.get(
                                             'traceback', None))
        elif wtype == 'container':
            worker = ContainerWorkerProcess(self,
                                            id,
                                            details.caller,
                                            keeplog=options.get(
                                                'traceback', None))
        elif wtype == 'websocket-testee':
            worker = WebSocketTesteeWorkerProcess(self,
                                                  id,
                                                  details.caller,
                                                  keeplog=options.get(
                                                      'traceback', None))
        else:
            raise Exception("logic error")

        self._workers[id] = worker

        # create a (custom) process endpoint.
        #
        if platform.isWindows():
            childFDs = None  # Use the default Twisted ones
        else:
            # The communication between controller and container workers is
            # using WAMP running over 2 pipes.
            # For controller->container traffic this runs over FD 0 (`stdin`)
            # and for the container->controller traffic, this runs over FD 3.
            #
            # Note: We use FD 3, not FD 1 (`stdout`) or FD 2 (`stderr`) for
            # container->controller traffic, so that components running in the
            # container which happen to write to `stdout` or `stderr` do not
            # interfere with the container-controller communication.
            childFDs = {0: "w", 1: "r", 2: "r", 3: "r"}

        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   env=worker_env,
                                   worker=worker,
                                   childFDs=childFDs)

        # ready handling
        #
        def on_ready_success(id):
            self.log.info("{worker} with ID '{id}' and PID {pid} started",
                          worker=worker_logname,
                          id=worker.id,
                          pid=worker.pid)

            self._node._reactor.addSystemEventTrigger(
                'before',
                'shutdown',
                self._cleanup_worker,
                self._node._reactor,
                worker,
            )

            worker.status = 'started'
            worker.started = datetime.utcnow()

            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            # FIXME: make start of stats printer dependent on log level ..
            worker.log_stats(5.)

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]
            emsg = 'Failed to start native worker: {}'.format(err.value)
            self.log.error(emsg)
            raise ApplicationError(u"crossbar.error.cannot_start", emsg,
                                   worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(_):
            self.log.info("Node worker {} ended successfully".format(
                worker.id))
            worker.log_stats(0)
            del self._workers[worker.id]
            return True

        def on_exit_error(err):
            self.log.info("Node worker {} ended with error ({})".format(
                worker.id, err))
            worker.log_stats(0)
            del self._workers[worker.id]
            return False

        def check_for_shutdown(was_successful):
            shutdown = False

            # automatically shutdown node whenever a worker ended (successfully, or with error)
            #
            if checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info(
                    "Node worker ended, and trigger '{}' active".format(
                        checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT))
                shutdown = True

            # automatically shutdown node when worker ended with error
            #
            if not was_successful and checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR in self._node._node_shutdown_triggers:
                self.log.info(
                    "Node worker ended with error, and trigger '{}' active".
                    format(
                        checkconfig.NODE_SHUTDOWN_ON_WORKER_EXIT_WITH_ERROR))
                shutdown = True

            # automatically shutdown node when no more workers are left
            #
            if len(
                    self._workers
            ) == 0 and checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT in self._node._node_shutdown_triggers:
                self.log.info(
                    "No more node workers running, and trigger '{}' active".
                    format(checkconfig.NODE_SHUTDOWN_ON_LAST_WORKER_EXIT))
                shutdown = True

            # initiate shutdown (but only if we are not already shutting down)
            #
            if shutdown:
                if not self._shutdown_requested:
                    self.log.info("Node shutting down ..")
                    self.shutdown()
                else:
                    # ignore: shutdown already initiated ..
                    self.log.info("Node is already shutting down.")
            else:
                self.log.info(
                    "Node will continue to run (node shutdown triggers active: {})"
                    .format(self._node._node_shutdown_triggers))

        d_on_exit = worker.exit.addCallbacks(on_exit_success, on_exit_error)
        d_on_exit.addBoth(check_for_shutdown)

        # create a transport factory for talking WAMP to the native worker
        #
        transport_factory = create_native_worker_client_factory(
            self._node._router_session_factory, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        # now (immediately before actually forking) signal the starting of the worker
        #
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        # the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        # .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=[details.caller]))

        # now actually fork the worker ..
        #
        self.log.info("Starting {worker} with ID '{id}'...",
                      worker=worker_logname,
                      id=id)
        self.log.debug("{worker} '{id}' command line is '{cmdline}'",
                       worker=worker_logname,
                       id=id,
                       cmdline=' '.join(args))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            # this seems to be called immediately when the child process
            # has been forked. even if it then immediately fails because
            # e.g. the executable doesn't even exist. in other words,
            # I'm not sure under what conditions the deferred will errback ..

            pid = proto.transport.pid
            self.log.debug("Native worker process connected with PID {pid}",
                           pid=pid)

            # note the PID of the worker
            worker.pid = pid

            # proto is an instance of NativeWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            # not sure when this errback is triggered at all ..
            self.log.error(
                "Interal error: connection to forked native worker failed ({err})",
                err=err)

            # in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready
Example #2
0
    def _start_native_worker(self, wtype, id, options=None, details=None):

        assert (wtype in ['router', 'container'])

        ## prohibit starting a worker twice
        ##
        if id in self._workers:
            emsg = "ERROR: could not start worker - a worker with ID '{}'' is already running (or starting)".format(
                id)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.worker_already_running',
                                   emsg)

        ## check worker options
        ##
        options = options or {}
        try:
            if wtype == 'router':
                checkconfig.check_router_options(options)
            elif wtype == 'container':
                checkconfig.check_container_options(options)
            else:
                raise Exception("logic error")
        except Exception as e:
            emsg = "ERROR: could not start native worker - invalid configuration ({})".format(
                e)
            log.msg(emsg)
            raise ApplicationError('crossbar.error.invalid_configuration',
                                   emsg)

        ## allow override Python executable from options
        ##
        if 'python' in options:
            exe = options['python']

            ## the executable must be an absolute path, e.g. /home/oberstet/pypy-2.2.1-linux64/bin/pypy
            ##
            if not os.path.isabs(exe):
                emsg = "ERROR: python '{}' from worker options must be an absolute path".format(
                    exe)
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration',
                                       emsg)

            ## of course the path must exist and actually be executable
            ##
            if not (os.path.isfile(exe) and os.access(exe, os.X_OK)):
                emsg = "ERROR: python '{}' from worker options does not exist or isn't an executable".format(
                    exe)
                log.msg(emsg)
                raise ApplicationError('crossbar.error.invalid_configuration',
                                       emsg)
        else:
            exe = sys.executable

        ## all native workers (routers and containers for now) start from the same script
        ##
        filename = pkg_resources.resource_filename('crossbar',
                                                   'worker/process.py')

        ## assemble command line for forking the worker
        ##
        args = [exe, "-u", filename]
        args.extend(["--cbdir", self._node._cbdir])
        args.extend(["--node", str(self._node_id)])
        args.extend(["--worker", str(id)])
        args.extend(["--realm", self._realm])
        args.extend(["--type", wtype])

        ## allow override worker process title from options
        ##
        if options.get('title', None):
            args.extend(['--title', options['title']])

        ## allow overriding debug flag from options
        ##
        if options.get('debug', self.debug):
            args.append('--debug')

        ## forward explicit reactor selection
        ##
        if 'reactor' in options and sys.platform in options['reactor']:
            args.extend(['--reactor', options['reactor'][sys.platform]])
        elif self._node.options.reactor:
            args.extend(['--reactor', self._node.options.reactor])

        ## create worker process environment
        ##
        worker_env = create_process_env(options)

        ## log name of worker
        ##
        worker_logname = {
            'router': 'Router',
            'container': 'Container'
        }.get(wtype, 'Worker')

        ## topic URIs used (later)
        ##
        if wtype == 'router':
            starting_topic = 'crossbar.node.{}.on_router_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_router_started'.format(
                self._node_id)
        elif wtype == 'container':
            starting_topic = 'crossbar.node.{}.on_container_starting'.format(
                self._node_id)
            started_topic = 'crossbar.node.{}.on_container_started'.format(
                self._node_id)
        else:
            raise Exception("logic error")

        ## add worker tracking instance to the worker map ..
        ##
        if wtype == 'router':
            worker = RouterWorkerProcess(self,
                                         id,
                                         details.authid,
                                         keeplog=options.get(
                                             'traceback', None))
        elif wtype == 'container':
            worker = ContainerWorkerProcess(self,
                                            id,
                                            details.authid,
                                            keeplog=options.get(
                                                'traceback', None))
        else:
            raise Exception("logic error")

        self._workers[id] = worker

        ## create a (custom) process endpoint
        ##
        ep = WorkerProcessEndpoint(self._node._reactor,
                                   exe,
                                   args,
                                   env=worker_env,
                                   worker=worker)

        ## ready handling
        ##
        def on_ready_success(id):
            log.msg("{} with ID '{}' and PID {} started".format(
                worker_logname, worker.id, worker.pid))

            worker.status = 'started'
            worker.started = datetime.utcnow()

            started_info = {
                'id': worker.id,
                'status': worker.status,
                'started': utcstr(worker.started),
                'who': worker.who
            }

            self.publish(started_topic,
                         started_info,
                         options=PublishOptions(exclude=[details.caller]))

            return started_info

        def on_ready_error(err):
            del self._workers[worker.id]

            emsg = 'ERROR: failed to start native worker - {}'.format(
                err.value)
            log.msg(emsg)
            raise ApplicationError("crossbar.error.cannot_start", emsg,
                                   worker.getlog())

        worker.ready.addCallbacks(on_ready_success, on_ready_error)

        def on_exit_success(res):
            del self._workers[worker.id]

        def on_exit_error(err):
            del self._workers[worker.id]

        worker.exit.addCallbacks(on_exit_success, on_exit_error)

        ## create a transport factory for talking WAMP to the native worker
        ##
        transport_factory = create_native_worker_client_factory(
            self._node._router_session_factory, worker.ready, worker.exit)
        transport_factory.noisy = False
        self._workers[id].factory = transport_factory

        ## now (immediately before actually forking) signal the starting of the worker
        ##
        starting_info = {
            'id': id,
            'status': worker.status,
            'created': utcstr(worker.created),
            'who': worker.who
        }

        ## the caller gets a progressive result ..
        if details.progress:
            details.progress(starting_info)

        ## .. while all others get an event
        self.publish(starting_topic,
                     starting_info,
                     options=PublishOptions(exclude=[details.caller]))

        ## now actually fork the worker ..
        ##
        if self.debug:
            log.msg(
                "Starting {} with ID '{}' using command line '{}' ..".format(
                    worker_logname, id, ' '.join(args)))
        else:
            log.msg("Starting {} with ID '{}' ..".format(worker_logname, id))

        d = ep.connect(transport_factory)

        def on_connect_success(proto):

            ## this seems to be called immediately when the child process
            ## has been forked. even if it then immediately fails because
            ## e.g. the executable doesn't even exist. in other words,
            ## I'm not sure under what conditions the deferred will errback ..

            pid = proto.transport.pid
            if self.debug:
                log.msg(
                    "Native worker process connected with PID {}".format(pid))

            ## note the PID of the worker
            worker.pid = pid

            ## proto is an instance of NativeWorkerClientProtocol
            worker.proto = proto

            worker.status = 'connected'
            worker.connected = datetime.utcnow()

        def on_connect_error(err):

            ## not sure when this errback is triggered at all ..
            if self.debug:
                log.msg("ERROR: Connecting forked native worker failed - {}".
                        format(err))

            ## in any case, forward the error ..
            worker.ready.errback(err)

        d.addCallbacks(on_connect_success, on_connect_error)

        return worker.ready